]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 1 Mar 2016 16:57:34 +0000 (08:57 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 1 Mar 2016 16:57:34 +0000 (08:57 -0800)
Pull sparc fixes from David Miller:

 1) System call tracing doesn't handle register contents properly across
    the trace.  From Mike Frysinger.

 2) Hook up copy_file_range

 3) Build fix for 32-bit with newer tools.

 4) New sun4v watchdog driver, from Wim Coekaerts.

 5) Set context system call has to allow for servicable faults when we
    flush the register windows to memory

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Fix sparc64_set_context stack handling.
  sparc32: Add -Wa,-Av8 to KBUILD_CFLAGS.
  Add sun4v_wdt watchdog driver
  sparc: Fix system call tracing register handling.
  sparc: Hook up copy_file_range syscall.

2078 files changed:
.mailmap
Documentation/ABI/testing/configfs-rdma_cm [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-infiniband [new file with mode: 0644]
Documentation/Intel-IOMMU.txt
Documentation/cgroup-v2.txt
Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt
Documentation/devicetree/bindings/input/gpio-keys.txt
Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
Documentation/devicetree/bindings/net/mdio-mux.txt
Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/phy.txt
Documentation/devicetree/bindings/net/ralink,rt2880-net.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/renesas,ravb.txt
Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
Documentation/devicetree/bindings/pci/rcar-pci.txt
Documentation/devicetree/bindings/phy/phy-ath79-usb.txt [new file with mode: 0644]
Documentation/devicetree/bindings/regulator/tps65217.txt
Documentation/devicetree/bindings/rtc/s3c-rtc.txt
Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
Documentation/devicetree/bindings/thermal/rcar-thermal.txt
Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
Documentation/filesystems/efivarfs.txt
Documentation/filesystems/proc.txt
Documentation/infiniband/core_locking.txt
Documentation/kernel-parameters.txt
Documentation/kernel-per-CPU-kthreads.txt
Documentation/networking/ip-sysctl.txt
Documentation/sysctl/fs.txt
Documentation/timers/hpet.txt
Documentation/virtual/kvm/api.txt
MAINTAINERS
Makefile
arch/arc/Kconfig
arch/arc/Makefile
arch/arc/configs/axs101_defconfig
arch/arc/configs/axs103_defconfig
arch/arc/configs/axs103_smp_defconfig
arch/arc/configs/nsim_700_defconfig
arch/arc/configs/nsim_hs_defconfig
arch/arc/configs/nsim_hs_smp_defconfig
arch/arc/configs/nsimosci_defconfig
arch/arc/configs/nsimosci_hs_defconfig
arch/arc/configs/nsimosci_hs_smp_defconfig
arch/arc/configs/tb10x_defconfig
arch/arc/configs/vdk_hs38_smp_defconfig
arch/arc/include/asm/arcregs.h
arch/arc/include/asm/irq.h
arch/arc/include/asm/irqflags-arcv2.h
arch/arc/include/asm/mcip.h
arch/arc/include/asm/pgtable.h
arch/arc/kernel/entry-arcv2.S
arch/arc/kernel/intc-arcv2.c
arch/arc/kernel/intc-compact.c
arch/arc/kernel/mcip.c
arch/arc/kernel/setup.c
arch/arc/kernel/smp.c
arch/arc/kernel/time.c
arch/arm/Kconfig.debug
arch/arm/boot/compressed/Makefile
arch/arm/boot/dts/am335x-bone-common.dtsi
arch/arm/boot/dts/am335x-chilisom.dtsi
arch/arm/boot/dts/am335x-nano.dts
arch/arm/boot/dts/am335x-pepper.dts
arch/arm/boot/dts/am335x-shc.dts
arch/arm/boot/dts/am335x-sl50.dts
arch/arm/boot/dts/am33xx.dtsi
arch/arm/boot/dts/am4372.dtsi
arch/arm/boot/dts/am437x-gp-evm.dts
arch/arm/boot/dts/am43x-epos-evm.dts
arch/arm/boot/dts/am57xx-beagle-x15.dts
arch/arm/boot/dts/am57xx-cl-som-am57x.dts
arch/arm/boot/dts/am57xx-sbc-am57x.dts
arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
arch/arm/boot/dts/at91-sama5d2_xplained.dts
arch/arm/boot/dts/at91-sama5d4_xplained.dts
arch/arm/boot/dts/at91-sama5d4ek.dts
arch/arm/boot/dts/at91sam9n12ek.dts
arch/arm/boot/dts/imx6qdl.dtsi
arch/arm/boot/dts/kirkwood-ds112.dts
arch/arm/boot/dts/kirkwood-lswvl.dts
arch/arm/boot/dts/kirkwood-lswxl.dts
arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/omap5-board-common.dtsi
arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
arch/arm/boot/dts/r8a7740-armadillo800eva.dts
arch/arm/boot/dts/sama5d2-pinfunc.h
arch/arm/boot/dts/sama5d4.dtsi
arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
arch/arm/boot/dts/tps65217.dtsi [new file with mode: 0644]
arch/arm/common/icst.c
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/crypto/aes-ce-glue.c
arch/arm/include/asm/arch_gicv3.h
arch/arm/include/asm/xen/page-coherent.h
arch/arm/include/uapi/asm/unistd.h
arch/arm/kernel/calls.S
arch/arm/kvm/mmio.c
arch/arm/mach-omap2/board-generic.c
arch/arm/mach-omap2/devices.c
arch/arm/mach-omap2/gpmc-onenand.c
arch/arm/mach-omap2/omap_device.c
arch/arm/mach-omap2/pdata-quirks.c
arch/arm/mach-omap2/sleep34xx.S
arch/arm/mach-omap2/sleep44xx.S
arch/arm/mach-realview/Kconfig
arch/arm/mach-realview/Makefile
arch/arm/mach-realview/platsmp-dt.c
arch/arm/mach-shmobile/common.h
arch/arm/mach-shmobile/headsmp-scu.S
arch/arm/mach-shmobile/headsmp.S
arch/arm/mach-shmobile/platsmp-apmu.c
arch/arm/mach-shmobile/platsmp-scu.c
arch/arm/mach-shmobile/smp-r8a7779.c
arch/arm/mach-tango/Kconfig
arch/arm/mach-tango/platsmp.c
arch/arm/mach-tegra/Kconfig
arch/arm/mach-tegra/sleep-tegra20.S
arch/arm/mach-tegra/sleep-tegra30.S
arch/arm/mm/dma-mapping.c
arch/arm/mm/mmap.c
arch/arm64/Kconfig.platforms
arch/arm64/Makefile
arch/arm64/boot/Makefile
arch/arm64/boot/dts/Makefile
arch/arm64/boot/dts/arm/juno-base.dtsi
arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
arch/arm64/boot/dts/nvidia/Makefile [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra132-norrin.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra132.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2571.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210.dtsi [new file with mode: 0644]
arch/arm64/boot/install.sh
arch/arm64/configs/defconfig
arch/arm64/crypto/aes-glue.c
arch/arm64/include/asm/arch_gicv3.h
arch/arm64/include/asm/futex.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/page.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/head.S
arch/arm64/kernel/image.h
arch/arm64/kernel/stacktrace.c
arch/arm64/kernel/traps.c
arch/arm64/kvm/hyp-init.S
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/hyp/vgic-v3-sr.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/sys_regs.c
arch/arm64/lib/strnlen.S
arch/arm64/mm/dma-mapping.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmap.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc-macros.S
arch/arm64/mm/proc.S
arch/ia64/include/asm/unistd.h
arch/ia64/include/uapi/asm/unistd.h
arch/ia64/kernel/entry.S
arch/m32r/Kconfig
arch/m68k/configs/amiga_defconfig
arch/m68k/configs/apollo_defconfig
arch/m68k/configs/atari_defconfig
arch/m68k/configs/bvme6000_defconfig
arch/m68k/configs/hp300_defconfig
arch/m68k/configs/mac_defconfig
arch/m68k/configs/multi_defconfig
arch/m68k/configs/mvme147_defconfig
arch/m68k/configs/mvme16x_defconfig
arch/m68k/configs/q40_defconfig
arch/m68k/configs/sun3_defconfig
arch/m68k/configs/sun3x_defconfig
arch/m68k/include/asm/unistd.h
arch/m68k/include/uapi/asm/unistd.h
arch/m68k/kernel/syscalltable.S
arch/mips/Kbuild.platforms
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/alchemy/common/gpiolib.c
arch/mips/ar7/gpio.c
arch/mips/ath79/common.h
arch/mips/ath79/irq.c
arch/mips/ath79/setup.c
arch/mips/bcm47xx/sprom.c
arch/mips/bcm63xx/nvram.c
arch/mips/bmips/setup.c
arch/mips/boot/compressed/Makefile
arch/mips/boot/compressed/uart-prom.c [new file with mode: 0644]
arch/mips/boot/dts/Makefile
arch/mips/boot/dts/brcm/bcm6328.dtsi
arch/mips/boot/dts/brcm/bcm6368.dtsi
arch/mips/boot/dts/brcm/bcm7125.dtsi
arch/mips/boot/dts/brcm/bcm7346.dtsi
arch/mips/boot/dts/brcm/bcm7358.dtsi
arch/mips/boot/dts/brcm/bcm7360.dtsi
arch/mips/boot/dts/brcm/bcm7362.dtsi
arch/mips/boot/dts/brcm/bcm7420.dtsi
arch/mips/boot/dts/brcm/bcm7425.dtsi
arch/mips/boot/dts/brcm/bcm7435.dtsi
arch/mips/boot/dts/ingenic/ci20.dts
arch/mips/boot/dts/ingenic/jz4780.dtsi
arch/mips/boot/dts/pic32/Makefile [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda.dtsi [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda_sk.dts [new file with mode: 0644]
arch/mips/boot/dts/qca/ar9132.dtsi
arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
arch/mips/configs/pic32mzda_defconfig [new file with mode: 0644]
arch/mips/include/asm/cacheops.h
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/elf.h
arch/mips/include/asm/fpu.h
arch/mips/include/asm/fpu_emulator.h
arch/mips/include/asm/io.h
arch/mips/include/asm/irqflags.h
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mach-ath79/ath79.h
arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h [deleted file]
arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/irq.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/pic32.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/spaces.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/irq.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/mt7621.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mips-cm.h
arch/mips/include/asm/mips-r2-to-r6-emul.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/asm/octeon/octeon-feature.h
arch/mips/include/asm/page.h
arch/mips/include/asm/pgtable.h
arch/mips/include/asm/processor.h
arch/mips/include/asm/stackframe.h
arch/mips/include/asm/syscall.h
arch/mips/include/uapi/asm/inst.h
arch/mips/include/uapi/asm/unistd.h
arch/mips/kernel/binfmt_elfn32.c
arch/mips/kernel/binfmt_elfo32.c
arch/mips/kernel/cpu-bugs64.c
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/elf.c
arch/mips/kernel/gpio_txx9.c
arch/mips/kernel/process.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-64.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-o32.S
arch/mips/kernel/setup.c
arch/mips/kernel/smp-cps.c
arch/mips/kernel/sync-r4k.c
arch/mips/kernel/traps.c
arch/mips/kvm/callback.c
arch/mips/kvm/dyntrans.c
arch/mips/kvm/emulate.c
arch/mips/kvm/interrupt.c
arch/mips/kvm/locore.S
arch/mips/kvm/mips.c
arch/mips/kvm/opcode.h [deleted file]
arch/mips/kvm/tlb.c
arch/mips/kvm/trap_emul.c
arch/mips/lib/mips-atomic.c
arch/mips/loongson64/Platform
arch/mips/loongson64/loongson-3/hpet.c
arch/mips/loongson64/loongson-3/smp.c
arch/mips/math-emu/cp1emu.c
arch/mips/math-emu/dp_simple.c
arch/mips/math-emu/dp_tint.c
arch/mips/math-emu/dp_tlong.c
arch/mips/math-emu/dsemul.c
arch/mips/math-emu/ieee754.c
arch/mips/math-emu/ieee754.h
arch/mips/math-emu/ieee754dp.c
arch/mips/math-emu/ieee754int.h
arch/mips/math-emu/ieee754sp.c
arch/mips/math-emu/sp_fdp.c
arch/mips/math-emu/sp_simple.c
arch/mips/math-emu/sp_tint.c
arch/mips/math-emu/sp_tlong.c
arch/mips/mm/mmap.c
arch/mips/mm/sc-mips.c
arch/mips/mm/tlbex.c
arch/mips/mti-malta/malta-init.c
arch/mips/pci/Makefile
arch/mips/pci/pci-mt7620.c [new file with mode: 0644]
arch/mips/pic32/Kconfig [new file with mode: 0644]
arch/mips/pic32/Makefile [new file with mode: 0644]
arch/mips/pic32/Platform [new file with mode: 0644]
arch/mips/pic32/common/Makefile [new file with mode: 0644]
arch/mips/pic32/common/irq.c [new file with mode: 0644]
arch/mips/pic32/common/reset.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/Makefile [new file with mode: 0644]
arch/mips/pic32/pic32mzda/config.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_clk.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_console.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_pin.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_pin.h [new file with mode: 0644]
arch/mips/pic32/pic32mzda/init.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/pic32mzda.h [new file with mode: 0644]
arch/mips/pic32/pic32mzda/time.c [new file with mode: 0644]
arch/mips/ralink/Kconfig
arch/mips/ralink/Makefile
arch/mips/ralink/Platform
arch/mips/ralink/irq-gic.c [new file with mode: 0644]
arch/mips/ralink/mt7620.c
arch/mips/ralink/mt7621.c [new file with mode: 0644]
arch/mips/ralink/rt288x.c
arch/mips/ralink/rt305x.c
arch/mips/ralink/rt3883.c
arch/mips/ralink/timer-gic.c [new file with mode: 0644]
arch/mips/rb532/gpio.c
arch/mips/txx9/generic/setup.c
arch/powerpc/Kconfig
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/trace.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/uapi/asm/unistd.h
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/eeh_pe.c
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/module_64.c
arch/powerpc/kernel/process.c
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/hash64_64k.c
arch/powerpc/mm/hugepage-hash64.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/mmap.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/perf/power8-pmu.c
arch/powerpc/platforms/cell/spufs/file.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/powernv/eeh-powernv.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/powernv/pci.h
arch/s390/hypfs/inode.c
arch/s390/include/asm/fpu/internal.h
arch/s390/include/asm/irqflags.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/livepatch.h
arch/s390/include/asm/pci_io.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/ptrace.h
arch/s390/include/uapi/asm/unistd.h
arch/s390/kernel/compat_signal.c
arch/s390/kernel/compat_wrapper.c
arch/s390/kernel/crash_dump.c
arch/s390/kernel/debug.c
arch/s390/kernel/dumpstack.c
arch/s390/kernel/early.c
arch/s390/kernel/ftrace.c
arch/s390/kernel/ipl.c
arch/s390/kernel/kprobes.c
arch/s390/kernel/perf_event.c
arch/s390/kernel/process.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/signal.c
arch/s390/kernel/smp.c
arch/s390/kernel/stacktrace.c
arch/s390/kernel/syscalls.S
arch/s390/kernel/trace.c
arch/s390/kernel/traps.c
arch/s390/kvm/Kconfig
arch/s390/kvm/Makefile
arch/s390/kvm/guestdbg.c
arch/s390/kvm/kvm-s390.c
arch/s390/mm/fault.c
arch/s390/mm/init.c
arch/s390/mm/maccess.c
arch/s390/mm/mmap.c
arch/s390/mm/pgtable.c
arch/s390/numa/numa.c
arch/s390/oprofile/backtrace.c
arch/s390/pci/pci.c
arch/s390/pci/pci_event.c
arch/sh/include/asm/barrier.h
arch/sparc/kernel/sys_sparc_64.c
arch/um/include/asm/page.h
arch/x86/Kconfig
arch/x86/crypto/chacha20-ssse3-x86_64.S
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64_compat.S
arch/x86/include/asm/irq.h
arch/x86/include/asm/livepatch.h
arch/x86/include/asm/pci_x86.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/pmem.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/uaccess_32.h
arch/x86/include/asm/xen/pci.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/perf_event_amd_uncore.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/cpu/perf_event_intel_uncore.h
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
arch/x86/kernel/head64.c
arch/x86/kernel/irq.c
arch/x86/kvm/emulate.c
arch/x86/kvm/paging_tmpl.h
arch/x86/kvm/x86.c
arch/x86/lib/copy_user_64.S
arch/x86/mm/fault.c
arch/x86/mm/gup.c
arch/x86/mm/hugetlbpage.c
arch/x86/mm/mmap.c
arch/x86/mm/mpx.c
arch/x86/mm/numa.c
arch/x86/mm/pageattr.c
arch/x86/pci/common.c
arch/x86/pci/intel_mid_pci.c
arch/x86/pci/irq.c
arch/x86/pci/xen.c
arch/x86/platform/efi/quirks.c
arch/x86/platform/intel-mid/intel-mid.c
arch/x86/platform/intel-quark/imr.c
block/Kconfig
block/Makefile
block/bio-integrity.c
block/bio.c
block/blk-cgroup.c
block/blk-core.c
block/blk-iopoll.c [deleted file]
block/blk-merge.c
block/blk-mq.c
block/blk-settings.c
block/blk-sysfs.c
block/blk-timeout.c
block/blk.h
block/deadline-iosched.c
block/ioctl.c
block/partition-generic.c
crypto/Kconfig
crypto/af_alg.c
crypto/ahash.c
crypto/algif_hash.c
crypto/algif_skcipher.c
crypto/asymmetric_keys/pkcs7_parser.c
crypto/crc32c_generic.c
crypto/crypto_user.c
crypto/shash.c
crypto/skcipher.c
drivers/acpi/acpi_lpss.c
drivers/acpi/apei/erst.c
drivers/acpi/nfit.c
drivers/acpi/pci_irq.c
drivers/acpi/pci_link.c
drivers/acpi/video_detect.c
drivers/amba/Kconfig
drivers/android/binder.c
drivers/ata/ahci.c
drivers/ata/ahci.h
drivers/ata/ahci_brcmstb.c
drivers/ata/libahci.c
drivers/ata/libata-core.c
drivers/ata/libata-sff.c
drivers/base/component.c
drivers/base/devtmpfs.c
drivers/base/platform-msi.c
drivers/base/platform.c
drivers/base/power/common.c
drivers/base/power/domain.c
drivers/base/regmap/regmap-mmio.c
drivers/block/aoe/aoecmd.c
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_debugfs.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_proc.c
drivers/block/drbd/drbd_protocol.h
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_req.h
drivers/block/drbd/drbd_state.c
drivers/block/drbd/drbd_state.h
drivers/block/drbd/drbd_state_change.h [new file with mode: 0644]
drivers/block/drbd/drbd_worker.c
drivers/block/floppy.c
drivers/block/mtip32xx/mtip32xx.c
drivers/block/null_blk.c
drivers/block/rbd.c
drivers/block/sx8.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/bus/Kconfig
drivers/bus/vexpress-config.c
drivers/char/hpet.c
drivers/char/hw_random/Kconfig
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/mem.c
drivers/char/mspec.c
drivers/char/ps3flash.c
drivers/char/random.c
drivers/clk/Makefile
drivers/clk/clk-gpio.c
drivers/clk/clk-scpi.c
drivers/clk/mvebu/dove-divider.c
drivers/clk/qcom/gcc-apq8084.c
drivers/clk/qcom/gcc-ipq806x.c
drivers/clk/qcom/gcc-msm8660.c
drivers/clk/qcom/gcc-msm8916.c
drivers/clk/qcom/gcc-msm8960.c
drivers/clk/qcom/gcc-msm8974.c
drivers/clk/qcom/lcc-ipq806x.c
drivers/clk/qcom/lcc-msm8960.c
drivers/clk/qcom/mmcc-apq8084.c
drivers/clk/qcom/mmcc-msm8960.c
drivers/clk/qcom/mmcc-msm8974.c
drivers/clk/rockchip/clk-rk3036.c
drivers/clk/rockchip/clk-rk3368.c
drivers/clk/tegra/clk-emc.c
drivers/clk/tegra/clk-id.h
drivers/clk/tegra/clk-pll.c
drivers/clk/tegra/clk-tegra-periph.c
drivers/clk/tegra/clk-tegra-super-gen4.c
drivers/clk/tegra/clk-tegra210.c
drivers/clk/ti/dpll3xxx.c
drivers/clk/versatile/clk-icst.c
drivers/clocksource/Kconfig
drivers/clocksource/tcb_clksrc.c
drivers/cpufreq/cpufreq-dt.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/pxa2xx-cpufreq.c
drivers/cpuidle/coupled.c
drivers/cpuidle/cpuidle.c
drivers/crypto/Kconfig
drivers/crypto/atmel-aes.c
drivers/crypto/atmel-sha.c
drivers/crypto/caam/ctrl.c
drivers/crypto/marvell/cesa.c
drivers/crypto/qat/qat_common/qat_hal.c
drivers/devfreq/tegra-devfreq.c
drivers/dma/dw/core.c
drivers/dma/dw/pci.c
drivers/dma/edma.c
drivers/dma/ioat/dma.c
drivers/firmware/efi/efivars.c
drivers/firmware/efi/vars.c
drivers/gpio/gpio-altera.c
drivers/gpio/gpio-davinci.c
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/ci_dpm.c
drivers/gpu/drm/amd/amdgpu/cik.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/cz_dpm.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/iceland_smc.c
drivers/gpu/drm/amd/amdgpu/kv_dpm.c
drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/include/amd_shared.h
drivers/gpu/drm/amd/include/cgs_common.h
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/drm_dp_mst_topology.c
drivers/gpu/drm/drm_hashtab.c
drivers/gpu/drm/drm_irq.c
drivers/gpu/drm/etnaviv/common.xml.h
drivers/gpu/drm/etnaviv/etnaviv_drv.c
drivers/gpu/drm/etnaviv/etnaviv_drv.h
drivers/gpu/drm/etnaviv/etnaviv_dump.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.h
drivers/gpu/drm/etnaviv/state_hi.xml.h
drivers/gpu/drm/exynos/Kconfig
drivers/gpu/drm/exynos/exynos5433_drm_decon.c
drivers/gpu/drm/exynos/exynos_dp_core.c
drivers/gpu/drm/exynos/exynos_drm_dsi.c
drivers/gpu/drm/exynos/exynos_drm_fbdev.c
drivers/gpu/drm/exynos/exynos_drm_fimc.c
drivers/gpu/drm/exynos/exynos_drm_g2d.c
drivers/gpu/drm/exynos/exynos_drm_gem.c
drivers/gpu/drm/exynos/exynos_drm_gsc.c
drivers/gpu/drm/exynos/exynos_drm_ipp.c
drivers/gpu/drm/exynos/exynos_drm_mic.c
drivers/gpu/drm/exynos/exynos_drm_rotator.c
drivers/gpu/drm/exynos/exynos_drm_vidi.c
drivers/gpu/drm/exynos/exynos_mixer.c
drivers/gpu/drm/i2c/adv7511.c
drivers/gpu/drm/i2c/adv7511.h
drivers/gpu/drm/i915/Kconfig
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_userptr.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_suspend.c
drivers/gpu/drm/i915/intel_crt.c
drivers/gpu/drm/i915/intel_csr.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_dp_link_training.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_dsi.c
drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
drivers/gpu/drm/i915/intel_hdmi.c
drivers/gpu/drm/i915/intel_i2c.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_display.c
drivers/gpu/drm/nouveau/nouveau_platform.c
drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
drivers/gpu/drm/qxl/qxl_ioctl.c
drivers/gpu/drm/qxl/qxl_prime.c
drivers/gpu/drm/radeon/dce6_afmt.c
drivers/gpu/drm/radeon/evergreen_hdmi.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_audio.h
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/radeon/radeon_sa.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/radeon/vce_v1_0.c
drivers/gpu/drm/rockchip/Makefile
drivers/gpu/drm/rockchip/dw-mipi-dsi.c
drivers/gpu/drm/rockchip/rockchip_drm_drv.c
drivers/gpu/drm/rockchip/rockchip_drm_fb.c
drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
drivers/gpu/drm/rockchip/rockchip_drm_gem.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/vc4/vc4_bo.c
drivers/gpu/drm/vc4/vc4_drv.h
drivers/gpu/drm/vc4/vc4_gem.c
drivers/gpu/drm/vc4/vc4_irq.c
drivers/gpu/drm/vc4/vc4_render_cl.c
drivers/gpu/drm/vc4/vc4_v3d.c
drivers/gpu/drm/vc4/vc4_validate.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/hwmon/ads1015.c
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/fam15h_power.c
drivers/hwmon/gpio-fan.c
drivers/hwspinlock/hwspinlock_core.c
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-omap.c
drivers/i2c/busses/i2c-piix4.c
drivers/i2c/busses/i2c-uniphier-f.c
drivers/i2c/busses/i2c-uniphier.c
drivers/iio/accel/Kconfig
drivers/iio/adc/Kconfig
drivers/iio/adc/ti_am335x_adc.c
drivers/iio/dac/mcp4725.c
drivers/iio/humidity/dht11.c
drivers/iio/imu/adis_buffer.c
drivers/iio/imu/inv_mpu6050/Kconfig
drivers/iio/inkern.c
drivers/iio/light/acpi-als.c
drivers/iio/light/ltr501.c
drivers/iio/pressure/mpl115.c
drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
drivers/infiniband/Kconfig
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_configfs.c [new file with mode: 0644]
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/cq.c [new file with mode: 0644]
drivers/infiniband/core/device.c
drivers/infiniband/core/fmr_pool.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/multicast.c
drivers/infiniband/core/roce_gid_mgmt.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ud_header.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_marshall.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb3/iwch_cq.c
drivers/infiniband/hw/cxgb3/iwch_mem.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_provider.h
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/cxgb4/user.h
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mlx5/ah.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mlx5/user.h
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_utils.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_stats.c
drivers/infiniband/hw/ocrdma/ocrdma_stats.h
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/hw/qib/qib_mr.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/hw/qib/qib_verbs_mcast.c
drivers/infiniband/hw/usnic/usnic_debugfs.c
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.h
drivers/infiniband/hw/usnic/usnic_vnic.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/isert/ib_isert.h
drivers/infiniband/ulp/isert/isert_proto.h [deleted file]
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/infiniband/ulp/srpt/ib_srpt.h
drivers/input/joystick/xpad.c
drivers/input/keyboard/adp5589-keys.c
drivers/input/keyboard/cap11xx.c
drivers/input/keyboard/gpio_keys.c
drivers/input/misc/Kconfig
drivers/input/misc/sirfsoc-onkey.c
drivers/input/mouse/vmmouse.c
drivers/input/serio/serio.c
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/input/touchscreen/colibri-vf50-ts.c
drivers/input/touchscreen/edt-ft5x06.c
drivers/iommu/amd_iommu.c
drivers/iommu/dmar.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel-svm.c
drivers/iommu/intel_irq_remapping.c
drivers/iommu/io-pgtable-arm.c
drivers/irqchip/Kconfig
drivers/irqchip/Makefile
drivers/irqchip/irq-atmel-aic-common.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-mxs.c
drivers/irqchip/irq-pic32-evic.c [new file with mode: 0644]
drivers/irqchip/irq-s3c24xx.c
drivers/irqchip/irq-sun4i.c
drivers/isdn/gigaset/ser-gigaset.c
drivers/isdn/hardware/mISDN/netjet.c
drivers/lightnvm/Makefile
drivers/lightnvm/core.c
drivers/lightnvm/gennvm.c
drivers/lightnvm/rrpc.c
drivers/lightnvm/rrpc.h
drivers/lightnvm/sysblk.c [new file with mode: 0644]
drivers/mailbox/Kconfig
drivers/mailbox/pcc.c
drivers/md/bcache/btree.c
drivers/md/bcache/super.c
drivers/md/bcache/writeback.c
drivers/md/bcache/writeback.h
drivers/md/bitmap.c
drivers/md/dm.c
drivers/md/faulty.c
drivers/md/md-cluster.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/media/dvb-frontends/tda1004x.c
drivers/media/i2c/ir-kbd-i2c.c
drivers/media/i2c/s5k6a3.c
drivers/media/pci/saa7134/saa7134-alsa.c
drivers/media/platform/Kconfig
drivers/media/platform/exynos4-is/Kconfig
drivers/media/platform/exynos4-is/fimc-is.c
drivers/media/platform/exynos4-is/fimc-isp-video.c
drivers/media/platform/exynos4-is/media-dev.c
drivers/media/platform/soc_camera/atmel-isi.c
drivers/media/platform/soc_camera/soc_camera.c
drivers/media/platform/vsp1/vsp1_drv.c
drivers/media/platform/vsp1/vsp1_video.c
drivers/media/v4l2-core/videobuf2-core.c
drivers/media/v4l2-core/videobuf2-v4l2.c
drivers/mfd/db8500-prcmu.c
drivers/misc/mei/main.c
drivers/mmc/card/block.c
drivers/mmc/core/debugfs.c
drivers/mmc/core/pwrseq_simple.c
drivers/mmc/core/sd.c
drivers/mmc/core/sdio.c
drivers/mmc/core/sdio_cis.c
drivers/mmc/host/mmc_spi.c
drivers/mmc/host/mmci.c
drivers/mmc/host/omap_hsmmc.c
drivers/mmc/host/pxamci.c
drivers/mmc/host/sdhci-acpi.c
drivers/mmc/host/sdhci-of-at91.c
drivers/mmc/host/sdhci-pci-core.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/mmc/host/sh_mmcif.c
drivers/mmc/host/tmio_mmc_dma.c
drivers/mtd/bcm63xxpart.c
drivers/mtd/ubi/cdev.c
drivers/net/bonding/bond_main.c
drivers/net/can/usb/ems_usb.c
drivers/net/dsa/mv88e6352.c
drivers/net/dsa/mv88e6xxx.c
drivers/net/ethernet/8390/pcnet_cs.c
drivers/net/ethernet/agere/et131x.c
drivers/net/ethernet/amd/am79c961a.c
drivers/net/ethernet/amd/lance.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/aurora/nb8800.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.h
drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
drivers/net/ethernet/cisco/enic/enic.h
drivers/net/ethernet/cisco/enic/vnic_dev.c
drivers/net/ethernet/davicom/dm9000.c
drivers/net/ethernet/ezchip/Kconfig
drivers/net/ethernet/freescale/Makefile
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
drivers/net/ethernet/fujitsu/fmvj18x_cs.c
drivers/net/ethernet/hisilicon/hns/hnae.c
drivers/net/ethernet/hisilicon/hns/hnae.h
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns/hns_enet.h
drivers/net/ethernet/hp/hp100.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mellanox/mlx4/catas.c
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/cq.c
drivers/net/ethernet/mellanox/mlx4/en_clock.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_resources.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/pd.c
drivers/net/ethernet/mellanox/mlx4/port.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/srq.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.h [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxsw/port.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/moxa/moxart_ether.c
drivers/net/ethernet/moxa/moxart_ether.h
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/rocker/rocker.c
drivers/net/ethernet/smsc/smc91x.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/synopsys/dwc_eth_qos.c
drivers/net/ethernet/ti/cpsw-phy-sel.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/ti/netcp_core.c
drivers/net/fddi/defxx.c
drivers/net/geneve.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/irda/bfin_sir.h
drivers/net/macvlan.c
drivers/net/phy/Kconfig
drivers/net/phy/bcm7xxx.c
drivers/net/phy/dp83640.c
drivers/net/phy/marvell.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/smsc.c
drivers/net/ppp/pppoe.c
drivers/net/ppp/pptp.c
drivers/net/usb/Kconfig
drivers/net/usb/Makefile
drivers/net/usb/lan78xx.c
drivers/net/usb/qmi_wwan.c
drivers/net/vmxnet3/vmxnet3_defs.h
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/vxlan.c
drivers/net/wan/dscc4.c
drivers/net/wireless/ath/ath9k/eeprom.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
drivers/net/wireless/intel/iwlwifi/Kconfig
drivers/net/wireless/intel/iwlwifi/iwl-7000.c
drivers/net/wireless/intel/iwlwifi/iwl-8000.c
drivers/net/wireless/intel/iwlwifi/iwl-drv.c
drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
drivers/net/wireless/intel/iwlwifi/mvm/rs.c
drivers/net/wireless/intel/iwlwifi/mvm/scan.c
drivers/net/wireless/intel/iwlwifi/mvm/tx.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/intel/iwlwifi/pcie/internal.h
drivers/net/wireless/intel/iwlwifi/pcie/rx.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/ralink/rt2x00/rt2400pci.c
drivers/net/wireless/ralink/rt2x00/rt2500pci.c
drivers/net/wireless/ralink/rt2x00/rt2500usb.c
drivers/net/wireless/ralink/rt2x00/rt2800lib.c
drivers/net/wireless/ralink/rt2x00/rt2x00.h
drivers/net/wireless/ralink/rt2x00/rt2x00config.c
drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
drivers/net/wireless/ralink/rt2x00/rt61pci.c
drivers/net/wireless/ralink/rt2x00/rt73usb.c
drivers/net/wireless/realtek/rtlwifi/rc.c
drivers/net/wireless/realtek/rtlwifi/regd.c
drivers/net/wireless/ti/wlcore/io.c
drivers/net/wireless/ti/wlcore/io.h
drivers/net/xen-netfront.c
drivers/ntb/hw/Kconfig
drivers/ntb/hw/Makefile
drivers/ntb/hw/amd/Kconfig [new file with mode: 0644]
drivers/ntb/hw/amd/Makefile [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.c [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.h [new file with mode: 0644]
drivers/ntb/hw/intel/ntb_hw_intel.c
drivers/ntb/hw/intel/ntb_hw_intel.h
drivers/ntb/ntb_transport.c
drivers/ntb/test/Kconfig
drivers/ntb/test/Makefile
drivers/ntb/test/ntb_perf.c [new file with mode: 0644]
drivers/nvdimm/bus.c
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/pmem.c
drivers/nvme/host/Kconfig
drivers/nvme/host/Makefile
drivers/nvme/host/core.c [new file with mode: 0644]
drivers/nvme/host/lightnvm.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/scsi.c
drivers/nvmem/core.c
drivers/nvmem/qfprom.c
drivers/of/irq.c
drivers/of/of_mdio.c
drivers/oprofile/oprofilefs.c
drivers/pci/host/Kconfig
drivers/pci/host/pcie-iproc.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/pci/pcie/aer/aerdrv.c
drivers/pci/pcie/aer/aerdrv.h
drivers/pci/pcie/aer/aerdrv_core.c
drivers/pci/xen-pcifront.c
drivers/phy/Kconfig
drivers/phy/phy-core.c
drivers/phy/phy-twl4030-usb.c
drivers/pinctrl/mediatek/pinctrl-mtk-common.c
drivers/pinctrl/mvebu/pinctrl-mvebu.c
drivers/pinctrl/nomadik/pinctrl-abx500.c
drivers/pinctrl/pxa/pinctrl-pxa2xx.c
drivers/pinctrl/samsung/pinctrl-samsung.c
drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel_scu_ipcutil.c
drivers/platform/x86/intel_telemetry_debugfs.c
drivers/pnp/quirks.c
drivers/power/bq27xxx_battery_i2c.c
drivers/ptp/ptp_ixp46x.c
drivers/s390/block/dasd.c
drivers/s390/block/dasd_alias.c
drivers/s390/cio/chp.c
drivers/s390/cio/chp.h
drivers/s390/cio/chsc.c
drivers/s390/crypto/zcrypt_error.h
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/scsi/3w-xxxx.c
drivers/scsi/Kconfig
drivers/scsi/NCR5380.c
drivers/scsi/NCR5380.h
drivers/scsi/arm/cumana_1.c
drivers/scsi/arm/oak.c
drivers/scsi/atari_NCR5380.c
drivers/scsi/atari_scsi.c
drivers/scsi/be2iscsi/Kconfig
drivers/scsi/be2iscsi/be.h
drivers/scsi/be2iscsi/be_iscsi.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
drivers/scsi/device_handler/scsi_dh_rdac.c
drivers/scsi/dmx3191d.c
drivers/scsi/dtc.c
drivers/scsi/dtc.h
drivers/scsi/g_NCR5380.c
drivers/scsi/g_NCR5380.h
drivers/scsi/hisi_sas/Kconfig
drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
drivers/scsi/imm.c
drivers/scsi/ipr.c
drivers/scsi/ipr.h
drivers/scsi/mac_scsi.c
drivers/scsi/megaraid/megaraid_mm.c
drivers/scsi/pas16.c
drivers/scsi/pas16.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mid.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla2xxx/qla_target.h
drivers/scsi/qla2xxx/qla_tmpl.c
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/sd.c
drivers/scsi/sg.c
drivers/scsi/sr.c
drivers/scsi/storvsc_drv.c
drivers/scsi/sun3_scsi.c
drivers/scsi/t128.c
drivers/scsi/t128.h
drivers/sh/pm_runtime.c
drivers/soc/Kconfig
drivers/soc/qcom/spm.c
drivers/soc/tegra/Kconfig [new file with mode: 0644]
drivers/spi/spi-atmel.c
drivers/spi/spi-bcm2835aux.c
drivers/spi/spi-fsl-espi.c
drivers/spi/spi-imx.c
drivers/spi/spi-loopback-test.c
drivers/spi/spi-omap2-mcspi.c
drivers/ssb/main.c
drivers/staging/iio/adc/Kconfig
drivers/staging/iio/meter/ade7753.c
drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
drivers/staging/lustre/lustre/llite/dir.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/llite_lib.c
drivers/staging/lustre/lustre/llite/llite_nfs.c
drivers/staging/lustre/lustre/llite/lloop.c
drivers/staging/lustre/lustre/llite/rw.c
drivers/staging/lustre/lustre/llite/rw26.c
drivers/staging/lustre/lustre/llite/vvp_io.c
drivers/staging/lustre/lustre/llite/vvp_page.c
drivers/staging/panel/panel.c
drivers/staging/rdma/Kconfig
drivers/staging/rdma/Makefile
drivers/staging/rdma/amso1100/Kbuild [deleted file]
drivers/staging/rdma/amso1100/Kconfig [deleted file]
drivers/staging/rdma/amso1100/TODO [deleted file]
drivers/staging/rdma/amso1100/c2.c [deleted file]
drivers/staging/rdma/amso1100/c2.h [deleted file]
drivers/staging/rdma/amso1100/c2_ae.c [deleted file]
drivers/staging/rdma/amso1100/c2_ae.h [deleted file]
drivers/staging/rdma/amso1100/c2_alloc.c [deleted file]
drivers/staging/rdma/amso1100/c2_cm.c [deleted file]
drivers/staging/rdma/amso1100/c2_cq.c [deleted file]
drivers/staging/rdma/amso1100/c2_intr.c [deleted file]
drivers/staging/rdma/amso1100/c2_mm.c [deleted file]
drivers/staging/rdma/amso1100/c2_mq.c [deleted file]
drivers/staging/rdma/amso1100/c2_mq.h [deleted file]
drivers/staging/rdma/amso1100/c2_pd.c [deleted file]
drivers/staging/rdma/amso1100/c2_provider.c [deleted file]
drivers/staging/rdma/amso1100/c2_provider.h [deleted file]
drivers/staging/rdma/amso1100/c2_qp.c [deleted file]
drivers/staging/rdma/amso1100/c2_rnic.c [deleted file]
drivers/staging/rdma/amso1100/c2_status.h [deleted file]
drivers/staging/rdma/amso1100/c2_user.h [deleted file]
drivers/staging/rdma/amso1100/c2_vq.c [deleted file]
drivers/staging/rdma/amso1100/c2_vq.h [deleted file]
drivers/staging/rdma/amso1100/c2_wr.h [deleted file]
drivers/staging/rdma/ehca/Kconfig [deleted file]
drivers/staging/rdma/ehca/Makefile [deleted file]
drivers/staging/rdma/ehca/TODO [deleted file]
drivers/staging/rdma/ehca/ehca_av.c [deleted file]
drivers/staging/rdma/ehca/ehca_classes.h [deleted file]
drivers/staging/rdma/ehca/ehca_classes_pSeries.h [deleted file]
drivers/staging/rdma/ehca/ehca_cq.c [deleted file]
drivers/staging/rdma/ehca/ehca_eq.c [deleted file]
drivers/staging/rdma/ehca/ehca_hca.c [deleted file]
drivers/staging/rdma/ehca/ehca_irq.c [deleted file]
drivers/staging/rdma/ehca/ehca_irq.h [deleted file]
drivers/staging/rdma/ehca/ehca_iverbs.h [deleted file]
drivers/staging/rdma/ehca/ehca_main.c [deleted file]
drivers/staging/rdma/ehca/ehca_mcast.c [deleted file]
drivers/staging/rdma/ehca/ehca_mrmw.c [deleted file]
drivers/staging/rdma/ehca/ehca_mrmw.h [deleted file]
drivers/staging/rdma/ehca/ehca_pd.c [deleted file]
drivers/staging/rdma/ehca/ehca_qes.h [deleted file]
drivers/staging/rdma/ehca/ehca_qp.c [deleted file]
drivers/staging/rdma/ehca/ehca_reqs.c [deleted file]
drivers/staging/rdma/ehca/ehca_sqp.c [deleted file]
drivers/staging/rdma/ehca/ehca_tools.h [deleted file]
drivers/staging/rdma/ehca/ehca_uverbs.c [deleted file]
drivers/staging/rdma/ehca/hcp_if.c [deleted file]
drivers/staging/rdma/ehca/hcp_if.h [deleted file]
drivers/staging/rdma/ehca/hcp_phyp.c [deleted file]
drivers/staging/rdma/ehca/hcp_phyp.h [deleted file]
drivers/staging/rdma/ehca/hipz_fns.h [deleted file]
drivers/staging/rdma/ehca/hipz_fns_core.h [deleted file]
drivers/staging/rdma/ehca/hipz_hw.h [deleted file]
drivers/staging/rdma/ehca/ipz_pt_fn.c [deleted file]
drivers/staging/rdma/ehca/ipz_pt_fn.h [deleted file]
drivers/staging/rdma/hfi1/mr.c
drivers/staging/rdma/hfi1/verbs.c
drivers/staging/rdma/hfi1/verbs.h
drivers/staging/rdma/ipath/Kconfig [deleted file]
drivers/staging/rdma/ipath/Makefile [deleted file]
drivers/staging/rdma/ipath/TODO [deleted file]
drivers/staging/rdma/ipath/ipath_common.h [deleted file]
drivers/staging/rdma/ipath/ipath_cq.c [deleted file]
drivers/staging/rdma/ipath/ipath_debug.h [deleted file]
drivers/staging/rdma/ipath/ipath_diag.c [deleted file]
drivers/staging/rdma/ipath/ipath_dma.c [deleted file]
drivers/staging/rdma/ipath/ipath_driver.c [deleted file]
drivers/staging/rdma/ipath/ipath_eeprom.c [deleted file]
drivers/staging/rdma/ipath/ipath_file_ops.c [deleted file]
drivers/staging/rdma/ipath/ipath_fs.c [deleted file]
drivers/staging/rdma/ipath/ipath_iba6110.c [deleted file]
drivers/staging/rdma/ipath/ipath_init_chip.c [deleted file]
drivers/staging/rdma/ipath/ipath_intr.c [deleted file]
drivers/staging/rdma/ipath/ipath_kernel.h [deleted file]
drivers/staging/rdma/ipath/ipath_keys.c [deleted file]
drivers/staging/rdma/ipath/ipath_mad.c [deleted file]
drivers/staging/rdma/ipath/ipath_mmap.c [deleted file]
drivers/staging/rdma/ipath/ipath_mr.c [deleted file]
drivers/staging/rdma/ipath/ipath_qp.c [deleted file]
drivers/staging/rdma/ipath/ipath_rc.c [deleted file]
drivers/staging/rdma/ipath/ipath_registers.h [deleted file]
drivers/staging/rdma/ipath/ipath_ruc.c [deleted file]
drivers/staging/rdma/ipath/ipath_sdma.c [deleted file]
drivers/staging/rdma/ipath/ipath_srq.c [deleted file]
drivers/staging/rdma/ipath/ipath_stats.c [deleted file]
drivers/staging/rdma/ipath/ipath_sysfs.c [deleted file]
drivers/staging/rdma/ipath/ipath_uc.c [deleted file]
drivers/staging/rdma/ipath/ipath_ud.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_pages.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_sdma.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_sdma.h [deleted file]
drivers/staging/rdma/ipath/ipath_verbs.c [deleted file]
drivers/staging/rdma/ipath/ipath_verbs.h [deleted file]
drivers/staging/rdma/ipath/ipath_verbs_mcast.c [deleted file]
drivers/staging/rdma/ipath/ipath_wc_ppc64.c [deleted file]
drivers/staging/rdma/ipath/ipath_wc_x86_64.c [deleted file]
drivers/staging/speakup/Kconfig
drivers/staging/speakup/main.c
drivers/staging/speakup/selection.c
drivers/staging/speakup/serialio.c
drivers/target/target_core_configfs.c
drivers/target/target_core_device.c
drivers/target/target_core_file.c
drivers/target/target_core_iblock.c
drivers/target/target_core_internal.h
drivers/target/target_core_tmr.c
drivers/target/target_core_transport.c
drivers/target/target_core_user.c
drivers/thermal/Kconfig
drivers/thermal/cpu_cooling.c
drivers/thermal/int340x_thermal/processor_thermal_device.c
drivers/thermal/intel_pch_thermal.c
drivers/thermal/of-thermal.c
drivers/thermal/rcar_thermal.c
drivers/thermal/rockchip_thermal.c
drivers/thermal/spear_thermal.c
drivers/thermal/step_wise.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_core.h
drivers/tty/n_tty.c
drivers/tty/pty.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/serial/omap-serial.c
drivers/tty/tty_io.c
drivers/tty/tty_mutex.c
drivers/tty/vt/vt.c
drivers/usb/chipidea/ci_hdrc_pci.c
drivers/usb/chipidea/debug.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/dwc2/Kconfig
drivers/usb/dwc2/core.c
drivers/usb/dwc2/hcd_ddma.c
drivers/usb/dwc2/hcd_intr.c
drivers/usb/dwc2/platform.c
drivers/usb/dwc3/core.h
drivers/usb/dwc3/ep0.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/function/f_printer.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/udc/atmel_usba_udc.c
drivers/usb/gadget/udc/fsl_qe_udc.c
drivers/usb/gadget/udc/net2280.h
drivers/usb/gadget/udc/udc-core.c
drivers/usb/host/Kconfig
drivers/usb/host/xhci-ext-caps.h
drivers/usb/host/xhci-mtk-sch.c
drivers/usb/host/xhci-mtk.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-plat.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/musb/musb_host.c
drivers/usb/musb/ux500.c
drivers/usb/phy/phy-msm-usb.c
drivers/usb/phy/phy-mxs-usb.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/mxu11x0.c
drivers/usb/serial/option.c
drivers/usb/serial/visor.c
drivers/vfio/vfio.c
drivers/video/fbdev/core/fb_defio.c
drivers/video/fbdev/da8xx-fb.c
drivers/video/fbdev/exynos/s6e8ax0.c
drivers/video/fbdev/imxfb.c
drivers/video/fbdev/mmp/hw/mmp_ctrl.c
drivers/video/fbdev/ocfb.c
drivers/virtio/virtio_pci_common.c
drivers/watchdog/Kconfig
drivers/watchdog/max63xx_wdt.c
drivers/watchdog/pcwd_usb.c
drivers/watchdog/sp805_wdt.c
drivers/xen/tmem.c
drivers/xen/xen-pciback/pciback_ops.c
drivers/xen/xen-scsiback.c
drivers/xen/xenbus/xenbus_dev_frontend.c
fs/9p/vfs_file.c
fs/affs/file.c
fs/afs/flock.c
fs/afs/write.c
fs/attr.c
fs/binfmt_elf.c
fs/binfmt_misc.c
fs/block_dev.c
fs/btrfs/async-thread.c
fs/btrfs/backref.c
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file.c
fs/btrfs/free-space-tree.c
fs/btrfs/inode-map.c
fs/btrfs/inode-map.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/cachefiles/interface.c
fs/cachefiles/namei.c
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/cifs/cifs_debug.c
fs/cifs/cifs_debug.h
fs/cifs/cifs_dfs_ref.c
fs/cifs/cifsencrypt.c
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/cifsproto.h
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/misc.c
fs/cifs/readdir.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smb2proto.h
fs/cifs/smb2transport.c
fs/cifs/transport.c
fs/coda/coda_linux.h
fs/coda/dir.c
fs/coda/file.c
fs/compat_ioctl.c
fs/configfs/dir.c
fs/configfs/file.c
fs/configfs/inode.c
fs/dax.c
fs/dcache.c
fs/debugfs/inode.c
fs/devpts/inode.c
fs/direct-io.c
fs/dlm/user.c
fs/ecryptfs/inode.c
fs/ecryptfs/mmap.c
fs/efivarfs/file.c
fs/efivarfs/inode.c
fs/efivarfs/internal.h
fs/efivarfs/super.c
fs/eventpoll.c
fs/exec.c
fs/exofs/file.c
fs/exportfs/expfs.c
fs/ext2/file.c
fs/ext2/inode.c
fs/ext2/ioctl.c
fs/ext4/balloc.c
fs/ext4/crypto.c
fs/ext4/crypto_key.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/ialloc.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/move_extent.c
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/truncate.h
fs/f2fs/data.c
fs/f2fs/file.c
fs/fat/dir.c
fs/fat/file.c
fs/filesystems.c
fs/fs-writeback.c
fs/fuse/dir.c
fs/fuse/file.c
fs/gfs2/file.c
fs/gfs2/inode.c
fs/gfs2/quota.c
fs/hfs/dir.c
fs/hfs/inode.c
fs/hfsplus/dir.c
fs/hfsplus/inode.c
fs/hfsplus/ioctl.c
fs/hostfs/hostfs_kern.c
fs/hpfs/dir.c
fs/hpfs/namei.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/ioctl.c
fs/jffs2/build.c
fs/jffs2/file.c
fs/jffs2/fs.c
fs/jffs2/super.c
fs/jfs/file.c
fs/jfs/ioctl.c
fs/jfs/super.c
fs/kernfs/dir.c
fs/libfs.c
fs/locks.c
fs/logfs/file.c
fs/namei.c
fs/namespace.c
fs/ncpfs/dir.c
fs/ncpfs/file.c
fs/nfs/blocklayout/extent_tree.c
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/filelayout/filelayout.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/nfs42proc.c
fs/nfs/nfs4file.c
fs/nfs/nfs4proc.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/write.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfsfh.h
fs/nfsd/vfs.c
fs/nilfs2/inode.c
fs/nilfs2/ioctl.c
fs/notify/mark.c
fs/ntfs/dir.c
fs/ntfs/file.c
fs/ntfs/quota.c
fs/ntfs/super.c
fs/ocfs2/alloc.c
fs/ocfs2/aops.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmrecovery.c
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/ioctl.c
fs/ocfs2/journal.c
fs/ocfs2/localalloc.c
fs/ocfs2/move_extents.c
fs/ocfs2/namei.c
fs/ocfs2/quota_global.c
fs/ocfs2/refcounttree.c
fs/ocfs2/resize.c
fs/ocfs2/suballoc.c
fs/ocfs2/xattr.c
fs/open.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/inode.c
fs/overlayfs/readdir.c
fs/overlayfs/super.c
fs/pipe.c
fs/pnode.c
fs/proc/kcore.c
fs/proc/self.c
fs/proc/task_mmu.c
fs/proc/task_nommu.c
fs/proc/thread_self.c
fs/pstore/inode.c
fs/quota/dquot.c
fs/read_write.c
fs/readdir.c
fs/reiserfs/dir.c
fs/reiserfs/file.c
fs/reiserfs/ioctl.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/timerfd.c
fs/tracefs/inode.c
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/xattr.c
fs/udf/file.c
fs/udf/inode.c
fs/udf/super.c
fs/utimes.c
fs/xattr.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_fs.h
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.h
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_trans_ail.c
include/acpi/cppc_acpi.h
include/asm-generic/cputime_nsecs.h
include/asm-generic/pgtable.h
include/crypto/hash.h
include/crypto/if_alg.h
include/crypto/skcipher.h
include/drm/drm_atomic_helper.h
include/drm/drm_cache.h
include/drm/drm_crtc.h
include/drm/drm_dp_mst_helper.h
include/drm/drm_fixed.h
include/dt-bindings/clock/tegra210-car.h
include/linux/aer.h
include/linux/bcm963xx_nvram.h [new file with mode: 0644]
include/linux/bcm963xx_tag.h [new file with mode: 0644]
include/linux/bio.h
include/linux/blk-iopoll.h [deleted file]
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/ceph/ceph_features.h
include/linux/ceph/ceph_frag.h
include/linux/ceph/messenger.h
include/linux/cgroup-defs.h
include/linux/cleancache.h
include/linux/compiler.h
include/linux/cpuset.h
include/linux/crush/crush.h
include/linux/dax.h
include/linux/devpts_fs.h
include/linux/drbd.h
include/linux/drbd_genl.h
include/linux/efi.h
include/linux/fs.h
include/linux/fsnotify_backend.h
include/linux/ftrace.h
include/linux/gfp.h
include/linux/hrtimer.h
include/linux/huge_mm.h
include/linux/idr.h
include/linux/intel-iommu.h
include/linux/interrupt.h
include/linux/iommu.h
include/linux/irq_poll.h [new file with mode: 0644]
include/linux/irqdomain.h
include/linux/libata.h
include/linux/libnvdimm.h
include/linux/lightnvm.h
include/linux/lockdep.h
include/linux/lru_cache.h
include/linux/memcontrol.h
include/linux/mlx4/cmd.h
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/qp.h
include/linux/mlx5/transobj.h [new file with mode: 0644]
include/linux/mlx5/vport.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mmzone.h
include/linux/module.h
include/linux/netdevice.h
include/linux/nfs_fs.h
include/linux/nfs_xdr.h
include/linux/nvme.h
include/linux/of.h
include/linux/pagemap.h
include/linux/pci.h
include/linux/perf_event.h
include/linux/pfn.h
include/linux/pfn_t.h
include/linux/pipe_fs_i.h
include/linux/platform_data/sdhci-pic32.h [new file with mode: 0644]
include/linux/pmem.h
include/linux/power/bq27xxx_battery.h
include/linux/radix-tree.h
include/linux/raid/pq.h
include/linux/random.h
include/linux/rmap.h
include/linux/sched.h
include/linux/shmem_fs.h
include/linux/skbuff.h
include/linux/soc/ti/knav_dma.h
include/linux/sunrpc/svc_rdma.h
include/linux/swiotlb.h
include/linux/thermal.h
include/linux/tracepoint.h
include/linux/tty.h
include/linux/ucs2_string.h
include/linux/workqueue.h
include/media/videobuf2-core.h
include/net/af_unix.h
include/net/bluetooth/l2cap.h
include/net/dst_metadata.h
include/net/inet_connection_sock.h
include/net/ip6_route.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/netfilter/nf_conntrack_core.h
include/net/scm.h
include/net/sctp/structs.h
include/net/sock.h
include/net/sock_reuseport.h
include/net/tcp.h
include/rdma/ib_addr.h
include/rdma/ib_cache.h
include/rdma/ib_mad.h
include/rdma/ib_pack.h
include/rdma/ib_pma.h
include/rdma/ib_sa.h
include/rdma/ib_verbs.h
include/scsi/iser.h [new file with mode: 0644]
include/sound/hdaudio.h
include/sound/rawmidi.h
include/sound/timer.h
include/target/target_core_backend.h
include/target/target_core_base.h
include/trace/events/ext4.h
include/trace/events/fence.h
include/trace/events/huge_memory.h
include/trace/events/irq.h
include/uapi/drm/etnaviv_drm.h
include/uapi/linux/Kbuild
include/uapi/linux/fs.h
include/uapi/linux/lightnvm.h
include/uapi/linux/ndctl.h
include/xen/interface/io/blkif.h
ipc/mqueue.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
kernel/audit_fsnotify.c
kernel/audit_watch.c
kernel/bpf/arraymap.c
kernel/bpf/verifier.c
kernel/cgroup.c
kernel/cpuset.c
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/events/ring_buffer.c
kernel/futex.c
kernel/irq/handle.c
kernel/irq/irqdomain.c
kernel/locking/lockdep.c
kernel/locking/rtmutex.c
kernel/memremap.c
kernel/module.c
kernel/pid.c
kernel/power/Kconfig
kernel/relay.c
kernel/resource.c
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/seccomp.c
kernel/signal.c
kernel/sysctl.c
kernel/time/hrtimer.c
kernel/time/itimer.c
kernel/time/ntp.c
kernel/time/posix-timers.c
kernel/time/tick-sched.c
kernel/time/timer_list.c
kernel/trace/bpf_trace.c
kernel/trace/ftrace.c
kernel/trace/trace.c
kernel/trace/trace_events.c
kernel/trace/trace_stack.c
kernel/workqueue.c
lib/Kconfig
lib/Kconfig.debug
lib/Kconfig.ubsan
lib/Makefile
lib/debugobjects.c
lib/dump_stack.c
lib/irq_poll.c [new file with mode: 0644]
lib/klist.c
lib/libcrc32c.c
lib/lru_cache.c
lib/radix-tree.c
lib/ratelimit.c
lib/scatterlist.c
lib/test-string_helpers.c
lib/ucs2_string.c
lib/vsprintf.c
mm/Kconfig
mm/backing-dev.c
mm/cleancache.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/memblock.c
mm/memcontrol.c
mm/memory.c
mm/mempolicy.c
mm/migrate.c
mm/mincore.c
mm/mlock.c
mm/mmap.c
mm/mprotect.c
mm/mremap.c
mm/page_alloc.c
mm/percpu.c
mm/pgtable-generic.c
mm/shmem.c
mm/slab.c
mm/slab.h
mm/slab_common.c
mm/slob.c
mm/slub.c
mm/swapfile.c
mm/truncate.c
mm/util.c
mm/vmpressure.c
mm/vmscan.c
mm/vmstat.c
mm/workingset.c
net/9p/trans_fd.c
net/9p/trans_virtio.c
net/appletalk/ddp.c
net/batman-adv/gateway_client.c
net/batman-adv/hard-interface.c
net/batman-adv/translation-table.c
net/bluetooth/6lowpan.c
net/bluetooth/hci_core.c
net/bluetooth/hci_request.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bluetooth/smp.c
net/bridge/br.c
net/bridge/br_mdb.c
net/caif/cfrfml.c
net/ceph/auth_x.c
net/ceph/auth_x.h
net/ceph/crush/mapper.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/core/dev.c
net/core/flow_dissector.c
net/core/scm.c
net/core/skbuff.c
net/core/sock_reuseport.c
net/core/sysctl_net_core.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dsa/slave.c
net/ipv4/Kconfig
net/ipv4/devinet.c
net/ipv4/fib_trie.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_diag.c
net/ipv4/ip_fragment.c
net/ipv4/ip_gre.c
net/ipv4/ip_input.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel.c
net/ipv4/ipconfig.c
net/ipv4/netfilter/nf_defrag_ipv4.c
net/ipv4/ping.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/datagram.c
net/ipv6/ip6_flowlabel.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
net/ipv6/route.c
net/ipv6/sit.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/irda/ircomm/ircomm_param.c
net/iucv/af_iucv.c
net/l2tp/l2tp_netlink.c
net/mac80211/ibss.c
net/mac80211/main.c
net/mac80211/mesh.c
net/mac80211/mesh.h
net/mac80211/mlme.c
net/mac80211/offchannel.c
net/mac80211/scan.c
net/mac80211/sta_info.c
net/mac80211/status.c
net/mac80211/util.c
net/netfilter/Kconfig
net/netfilter/ipset/ip_set_hash_netiface.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_tables_netdev.c
net/netfilter/nfnetlink.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_counter.c
net/netfilter/nft_ct.c
net/netfilter/xt_TCPMSS.c
net/netfilter/xt_TEE.c
net/netlink/af_netlink.c
net/openvswitch/vport-vxlan.c
net/rds/ib.c
net/rds/iw.c
net/rfkill/core.c
net/sched/sch_api.c
net/sched/sch_drr.c
net/sctp/input.c
net/sctp/proc.c
net/sctp/protocol.c
net/sctp/sm_sideeffect.c
net/sctp/socket.c
net/sctp/transport.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/cache.c
net/sunrpc/rpc_pipe.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/Makefile
net/sunrpc/xprtrdma/backchannel.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c [new file with mode: 0644]
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/switchdev/switchdev.c
net/tipc/link.c
net/tipc/node.c
net/tipc/subscr.c
net/unix/af_unix.c
net/unix/diag.c
net/unix/garbage.c
net/vmw_vsock/af_vsock.c
net/wireless/reg.c
scripts/mod/modpost.c
scripts/prune-kernel [new file with mode: 0755]
security/inode.c
security/integrity/evm/evm_main.c
security/integrity/ima/ima_main.c
security/keys/key.c
security/selinux/hooks.c
security/selinux/nlmsgtab.c
security/selinux/selinuxfs.c
sound/core/Kconfig
sound/core/compress_offload.c
sound/core/control.c
sound/core/hrtimer.c
sound/core/oss/pcm_oss.c
sound/core/pcm_compat.c
sound/core/pcm_native.c
sound/core/rawmidi.c
sound/core/seq/oss/seq_oss_init.c
sound/core/seq/oss/seq_oss_synth.c
sound/core/seq/seq_clientmgr.c
sound/core/seq/seq_compat.c
sound/core/seq/seq_memory.c
sound/core/seq/seq_ports.c
sound/core/seq/seq_timer.c
sound/core/seq/seq_virmidi.c
sound/core/timer.c
sound/drivers/dummy.c
sound/firewire/bebob/bebob_stream.c
sound/firewire/digi00x/amdtp-dot.c
sound/firewire/tascam/tascam-transaction.c
sound/firewire/tascam/tascam.c
sound/firewire/tascam/tascam.h
sound/hda/hdac_controller.c
sound/hda/hdac_i915.c
sound/isa/Kconfig
sound/pci/Kconfig
sound/pci/emu10k1/emu10k1_main.c
sound/pci/hda/hda_bind.c
sound/pci/hda/hda_controller.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/pci/hda/hda_jack.c
sound/pci/hda/hda_jack.h
sound/pci/hda/patch_ca0132.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_sigmatel.c
sound/soc/amd/acp-pcm-dma.c
sound/soc/codecs/arizona.c
sound/soc/codecs/rt286.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/rt5659.c
sound/soc/codecs/rt5659.h
sound/soc/codecs/sigmadsp-i2c.c
sound/soc/codecs/wm5110.c
sound/soc/codecs/wm8960.c
sound/soc/dwc/designware_i2s.c
sound/soc/fsl/fsl_ssi.c
sound/soc/fsl/imx-spdif.c
sound/soc/generic/simple-card.c
sound/soc/intel/Kconfig
sound/soc/intel/atom/sst-mfld-platform-pcm.c
sound/soc/intel/boards/skl_rt286.c
sound/soc/intel/common/Makefile
sound/soc/intel/common/sst-acpi.c
sound/soc/intel/common/sst-match-acpi.c
sound/soc/intel/skylake/skl-messages.c
sound/soc/intel/skylake/skl-pcm.c
sound/soc/intel/skylake/skl-topology.c
sound/soc/intel/skylake/skl.c
sound/soc/mediatek/Kconfig
sound/soc/mxs/mxs-saif.c
sound/soc/qcom/lpass-platform.c
sound/soc/soc-dapm.c
sound/soc/soc-pcm.c
sound/sparc/Kconfig
sound/spi/at73c213.c
sound/usb/midi.c
sound/usb/quirks.c
tools/lib/traceevent/event-parse.c
tools/perf/Makefile.perf
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/config/Makefile
tools/perf/tests/make
tools/perf/ui/browsers/annotate.c
tools/perf/util/hist.c
tools/perf/util/intel-pt.c
tools/perf/util/parse-events.c
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/session.c
tools/perf/util/stat.c
tools/perf/util/symbol.c
tools/perf/util/trace-event-parse.c
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c
tools/testing/selftests/efivarfs/efivarfs.sh
tools/testing/selftests/efivarfs/open-unlink.c
tools/testing/selftests/ftrace/test.d/instances/instance.tc
tools/testing/selftests/timers/valid-adjtimex.c
tools/virtio/asm/barrier.h
tools/virtio/linux/compiler.h [new file with mode: 0644]
tools/virtio/linux/kernel.h
tools/virtio/ringtest/Makefile [new file with mode: 0644]
tools/virtio/ringtest/README [new file with mode: 0644]
tools/virtio/ringtest/main.c [new file with mode: 0644]
tools/virtio/ringtest/main.h [new file with mode: 0644]
tools/virtio/ringtest/ring.c [new file with mode: 0644]
tools/virtio/ringtest/run-on-all.sh [new file with mode: 0755]
tools/virtio/ringtest/virtio_ring_0_9.c [new file with mode: 0644]
tools/virtio/ringtest/virtio_ring_poll.c [new file with mode: 0644]
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic.c
virt/kvm/async_pf.c

index b1e9a97653dc64853775a97db377a8f269cc8d95..7e6c5334c337ae0c792a36ec5a2e9309b4128061 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
diff --git a/Documentation/ABI/testing/configfs-rdma_cm b/Documentation/ABI/testing/configfs-rdma_cm
new file mode 100644 (file)
index 0000000..5c389aa
--- /dev/null
@@ -0,0 +1,22 @@
+What:          /config/rdma_cm
+Date:          November 29, 2015
+KernelVersion:  4.4.0
+Description:   Interface is used to configure RDMA-cable HCAs in respect to
+               RDMA-CM attributes.
+
+               Attributes are visible only when configfs is mounted. To mount
+               configfs in /config directory use:
+               # mount -t configfs none /config/
+
+               In order to set parameters related to a specific HCA, a directory
+               for this HCA has to be created:
+               mkdir -p /config/rdma_cm/<hca>
+
+
+What:          /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
+Date:          November 29, 2015
+KernelVersion:  4.4.0
+Description:   RDMA-CM based connections from HCA <hca> at port <port-num>
+               will be initiated with this RoCE type as default.
+               The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
+               This parameter has RW access.
diff --git a/Documentation/ABI/testing/sysfs-class-infiniband b/Documentation/ABI/testing/sysfs-class-infiniband
new file mode 100644 (file)
index 0000000..a86abe6
--- /dev/null
@@ -0,0 +1,16 @@
+What:          /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/ndevs/<gid-index>
+Date:          November 29, 2015
+KernelVersion: 4.4.0
+Contact:       linux-rdma@vger.kernel.org
+Description:   The net-device's name associated with the GID resides
+               at index <gid-index>.
+
+What:          /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/types/<gid-index>
+Date:          November 29, 2015
+KernelVersion: 4.4.0
+Contact:       linux-rdma@vger.kernel.org
+Description:   The RoCE type of the associated GID resides at index <gid-index>.
+               This could either be "IB/RoCE v1" for IB and RoCE v1 based GODs
+               or "RoCE v2" for RoCE v2 based GIDs.
+
+
index 7b57fc087088f49756eeb8eaabf403bfbbd92b93..49585b6e1ea24c951ea58090b762740aa36b066d 100644 (file)
@@ -3,7 +3,7 @@ Linux IOMMU Support
 
 The architecture spec can be obtained from the below location.
 
-http://www.intel.com/technology/virtualization/
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
 
 This guide gives a quick cheat sheet for some basic understanding.
 
index 65b3eac8856cf7ec26b341b217046a2fb9b7bf26..ff49cf901148d895b765800ec6ddb79c0e38ed53 100644 (file)
@@ -7,7 +7,7 @@ This is the authoritative documentation on the design, interface and
 conventions of cgroup v2.  It describes all userland-visible aspects
 of cgroup including core and specific controller behaviors.  All
 future changes must be reflected in this document.  Documentation for
-v1 is available under Documentation/cgroup-legacy/.
+v1 is available under Documentation/cgroup-v1/.
 
 CONTENTS
 
@@ -843,6 +843,10 @@ PAGE_SIZE multiple when read back.
                Amount of memory used to cache filesystem data,
                including tmpfs and shared memory.
 
+         sock
+
+               Amount of memory used in network transmission buffers
+
          file_mapped
 
                Amount of cached filesystem data mapped with mmap()
index ace05992a262592fb22f88db5c2e13036e20c015..20df350b9ef3d491b3e2e714f8877d74477afcd8 100644 (file)
@@ -30,7 +30,7 @@ that they are defined using standard clock bindings with following
 clock-output-names:
  - "xin24m" - crystal input - required,
  - "ext_i2s" - external I2S clock - optional,
- - "ext_gmac" - external GMAC clock - optional
+ - "rmii_clkin" - external EMAC clock - optional
 
 Example: Clock controller node:
 
index cf1333d1dd52974e4e960a7c97418f86864e56f1..21641236c095dfc257b0d5375ef0e8710129e690 100644 (file)
@@ -6,6 +6,7 @@ Required properties:
 Optional properties:
        - autorepeat: Boolean, Enable auto repeat feature of Linux input
          subsystem.
+       - label: String, name of the input device.
 
 Each button (key) is represented as a sub-node of "gpio-keys":
 Subnode properties:
index 7803e77d85cbe9d69dfcd42405ece4de0ddfe329..007a5b46256ad57effb087c5beb2eae8c7e3d1e7 100644 (file)
@@ -24,9 +24,8 @@ Main node required properties:
                1 = edge triggered
                4 = level triggered
 
-  Cells 4 and beyond are reserved for future use. When the 1st cell
-  has a value of 0 or 1, cells 4 and beyond act as padding, and may be
-  ignored. It is recommended that padding cells have a value of 0.
+  Cells 4 and beyond are reserved for future use and must have a value
+  of 0 if present.
 
 - reg : Specifies base physical address(s) and size of the GIC
   registers, in the following order:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt b/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt
new file mode 100644 (file)
index 0000000..c3a1b37
--- /dev/null
@@ -0,0 +1,67 @@
+Microchip PIC32 Interrupt Controller
+====================================
+
+The Microchip PIC32 contains an Enhanced Vectored Interrupt Controller (EVIC).
+It handles all internal and external interrupts. This controller exists outside
+of the CPU and is the arbitrator of all interrupts (including interrupts from
+the CPU itself) before they are presented to the CPU.
+
+External interrupts have a software configurable edge polarity. Non external
+interrupts have a type and polarity that is determined by the source of the
+interrupt.
+
+Required properties
+-------------------
+
+- compatible: Should be "microchip,pic32mzda-evic"
+- reg: Specifies physical base address and size of register range.
+- interrupt-controller: Identifies the node as an interrupt controller.
+- #interrupt cells: Specifies the number of cells used to encode an interrupt
+  source connected to this controller. The value shall be 2 and interrupt
+  descriptor shall have the following format:
+
+       <hw_irq irq_type>
+
+  hw_irq - represents the hardware interrupt number as in the data sheet.
+  irq_type - is used to describe the type and polarity of an interrupt. For
+  internal interrupts use IRQ_TYPE_EDGE_RISING for non persistent interrupts and
+  IRQ_TYPE_LEVEL_HIGH for persistent interrupts. For external interrupts use
+  IRQ_TYPE_EDGE_RISING or IRQ_TYPE_EDGE_FALLING to select the desired polarity.
+
+Optional properties
+-------------------
+- microchip,external-irqs: u32 array of external interrupts with software
+  polarity configuration. This array corresponds to the bits in the INTCON
+  SFR.
+
+Example
+-------
+
+evic: interrupt-controller@1f810000 {
+       compatible = "microchip,pic32mzda-evic";
+       interrupt-controller;
+       #interrupt-cells = <2>;
+       reg = <0x1f810000 0x1000>;
+       microchip,external-irqs = <3 8 13 18 23>;
+};
+
+Each device/peripheral must request its interrupt line with the associated type
+and polarity.
+
+Internal interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+       ...
+       interrupts = <113 IRQ_TYPE_LEVEL_HIGH>;
+       ...
+};
+
+External interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+       ...
+       interrupts = <3 IRQ_TYPE_EDGE_RISING>;
+       ...
+};
diff --git a/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt b/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt
new file mode 100644 (file)
index 0000000..1c8dbc4
--- /dev/null
@@ -0,0 +1,31 @@
+* Microchip PIC32MZDA Platforms
+
+PIC32MZDA Starter Kit
+Required root node properties:
+    - compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda"
+
+CPU nodes:
+----------
+A "cpus" node is required.  Required properties:
+ - #address-cells: Must be 1.
+ - #size-cells: Must be 0.
+A CPU sub-node is also required.  Required properties:
+ - device_type: Must be "cpu".
+ - compatible: Must be "mti,mips14KEc".
+Example:
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu0: cpu@0 {
+                       device_type = "cpu";
+                       compatible = "mti,mips14KEc";
+               };
+       };
+
+Boot protocol
+--------------
+In accordance with Unified Hosting Interface Reference Manual (MD01069), the
+bootloader must pass the following arguments to the kernel:
+ - $a0: -2.
+ - $a1: KSEG0 address of the flattened device-tree blob.
index 451fef26b4dfaf05b6782099e54816d52a52baa8..10587bdadbbe5a8e8fe703e23c194420e3701f9f 100644 (file)
@@ -68,7 +68,7 @@ ethernet@f0b60000 {
                phy1: ethernet-phy@1 {
                        max-speed = <1000>;
                        reg = <0x1>;
-                       compatible = "brcm,28nm-gphy", "ethernet-phy-ieee802.3-c22";
+                       compatible = "ethernet-phy-ieee802.3-c22";
                };
        };
 };
@@ -115,7 +115,7 @@ ethernet@f0ba0000 {
                phy0: ethernet-phy@0 {
                        max-speed = <1000>;
                        reg = <0x0>;
-                       compatible = "brcm,bcm53125", "ethernet-phy-ieee802.3-c22";
+                       compatible = "ethernet-phy-ieee802.3-c22";
                };
        };
 };
index 80411b2f04906bc065513cb8a968f1a57f132906..ecacfa44b1eb9603d8b77c712dcff565ef4f470d 100644 (file)
@@ -4,8 +4,6 @@ Required properties:
 - compatible: should be "hisilicon,hns-dsaf-v1" or "hisilicon,hns-dsaf-v2".
   "hisilicon,hns-dsaf-v1" is for hip05.
   "hisilicon,hns-dsaf-v2" is for Hi1610 and Hi1612.
-- dsa-name: dsa fabric name who provide this interface.
-  should be "dsafX", X is the dsaf id.
 - mode: dsa fabric mode string. only support one of dsaf modes like these:
                "2port-64vf",
                "6port-16rss",
@@ -26,9 +24,8 @@ Required properties:
 
 Example:
 
-dsa: dsa@c7000000 {
+dsaf0: dsa@c7000000 {
        compatible = "hisilicon,hns-dsaf-v1";
-       dsa_name = "dsaf0";
        mode = "6port-16rss";
        interrupt-parent = <&mbigen_dsa>;
        reg = <0x0 0xC0000000 0x0 0x420000
index 41d19be7011ecd543af6c1226d2f332b8306a01e..e6a9d1c30878f89ff948d5fcd8cbe308e53327df 100644 (file)
@@ -4,8 +4,9 @@ Required properties:
 - compatible: "hisilicon,hns-nic-v1" or "hisilicon,hns-nic-v2".
   "hisilicon,hns-nic-v1" is for hip05.
   "hisilicon,hns-nic-v2" is for Hi1610 and Hi1612.
-- ae-name: accelerator name who provides this interface,
-  is simply a name referring to the name of name in the accelerator node.
+- ae-handle: accelerator engine handle for hns,
+  specifies a reference to the associating hardware driver node.
+  see Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
 - port-id: is the index of port provided by DSAF (the accelerator). DSAF can
   connect to 8 PHYs. Port 0 to 1 are both used for adminstration purpose. They
   are called debug ports.
@@ -41,7 +42,7 @@ Example:
 
        ethernet@0{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <0>;
                local-mac-address = [a2 14 e4 4b 56 76];
        };
index aeea50c84e921fb301fc7fe9fd2cd6f295c7139c..d0cb8693963b51f447afbb9b6e6c5bd42e87f34e 100644 (file)
@@ -6,12 +6,17 @@ Required properties:
 - interrupts: interrupt for the device
 - phy: See ethernet.txt file in the same directory.
 - phy-mode: See ethernet.txt file in the same directory
-- clocks: a pointer to the reference clock for this device.
+- clocks: List of clocks for this device. At least one clock is
+  mandatory for the core clock. If several clocks are given, then the
+  clock-names property must be used to identify them.
 
 Optional properties:
 - tx-csum-limit: maximum mtu supported by port that allow TX checksum.
   Value is presented in bytes. If not used, by default 1600B is set for
   "marvell,armada-370-neta" and 9800B for others.
+- clock-names: List of names corresponding to clocks property; shall be
+  "core" for core clock and "bus" for the optional bus clock.
+
 
 Example:
 
index 79384113c2b060a4068413d427d3509cdcaa2d5b..694987d3c17a1085ba9fce589d81cc9ee99a7717 100644 (file)
@@ -38,7 +38,6 @@ Example :
 
                        phy11: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -48,7 +47,6 @@ Example :
                        };
                        phy12: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -58,7 +56,6 @@ Example :
                        };
                        phy13: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -68,7 +65,6 @@ Example :
                        };
                        phy14: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -85,7 +81,6 @@ Example :
 
                        phy21: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -95,7 +90,6 @@ Example :
                        };
                        phy22: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -105,7 +99,6 @@ Example :
                        };
                        phy23: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -115,7 +108,6 @@ Example :
                        };
                        phy24: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
index f65606f8d632b86bab2af28eb4a198d426a378ab..491f5bd55203b31c62b63ed2bcc624a701fd1580 100644 (file)
@@ -47,7 +47,6 @@ Example :
 
                        phy11: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -57,7 +56,6 @@ Example :
                        };
                        phy12: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -67,7 +65,6 @@ Example :
                        };
                        phy13: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -77,7 +74,6 @@ Example :
                        };
                        phy14: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -94,7 +90,6 @@ Example :
 
                        phy21: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -104,7 +99,6 @@ Example :
                        };
                        phy22: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -114,7 +108,6 @@ Example :
                        };
                        phy23: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -124,7 +117,6 @@ Example :
                        };
                        phy24: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
diff --git a/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt b/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt
new file mode 100644 (file)
index 0000000..aa63130
--- /dev/null
@@ -0,0 +1,26 @@
+Mediatek Gigabit Switch
+=======================
+
+The mediatek gigabit switch can be found on Mediatek SoCs (mt7620, mt7621).
+
+Required properties:
+- compatible: Should be "mediatek,mt7620-gsw" or "mediatek,mt7621-gsw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the gigabit switches interrupt
+- resets: Should contain the gigabit switches resets
+- reset-names: Should contain the reset names "gsw"
+
+Example:
+
+gsw@10110000 {
+       compatible = "ralink,mt7620-gsw";
+       reg = <0x10110000 8000>;
+
+       resets = <&rstctrl 23>;
+       reset-names = "gsw";
+
+       interrupt-parent = <&intc>;
+       interrupts = <17>;
+};
index 525e1658f2da5ca9ed22594fda97a25646ac2e20..bc1c3c8bf8fa37fa7e08dcabc65f1752a8a79749 100644 (file)
@@ -17,8 +17,7 @@ Optional Properties:
   "ethernet-phy-ieee802.3-c22" or "ethernet-phy-ieee802.3-c45" for
   PHYs that implement IEEE802.3 clause 22 or IEEE802.3 clause 45
   specifications. If neither of these are specified, the default is to
-  assume clause 22. The compatible list may also contain other
-  elements.
+  assume clause 22.
 
   If the phy's identifier is known then the list may contain an entry
   of the form: "ethernet-phy-idAAAA.BBBB" where
@@ -28,6 +27,9 @@ Optional Properties:
             4 hex digits. This is the chip vendor OUI bits 19:24,
             followed by 10 bits of a vendor specific ID.
 
+  The compatible list should not contain other values than those
+  listed here.
+
 - max-speed: Maximum PHY supported speed (10, 100, 1000...)
 
 - broken-turn-around: If set, indicates the PHY device does not correctly
diff --git a/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt b/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt
new file mode 100644 (file)
index 0000000..88b095d
--- /dev/null
@@ -0,0 +1,61 @@
+Ralink Frame Engine Ethernet controller
+=======================================
+
+The Ralink frame engine ethernet controller can be found on Ralink and
+Mediatek SoCs (RT288x, RT3x5x, RT366x, RT388x, rt5350, mt7620, mt7621, mt76x8).
+
+Depending on the SoC, there is a number of ports connected to the CPU port
+directly and/or via a (gigabit-)switch.
+
+* Ethernet controller node
+
+Required properties:
+- compatible: Should be one of "ralink,rt2880-eth", "ralink,rt3050-eth",
+  "ralink,rt3050-eth", "ralink,rt3883-eth", "ralink,rt5350-eth",
+  "mediatek,mt7620-eth", "mediatek,mt7621-eth"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the frame engines interrupt
+- resets: Should contain the frame engines resets
+- reset-names: Should contain the reset names "fe". If a switch is present
+  "esw" is also required.
+
+
+* Ethernet port node
+
+Required properties:
+- compatible: Should be "ralink,eth-port"
+- reg: The number of the physical port
+- phy-handle: reference to the node describing the phy
+
+Example:
+
+mdio-bus {
+       ...
+       phy0: ethernet-phy@0 {
+               phy-mode = "mii";
+               reg = <0>;
+       };
+};
+
+ethernet@400000 {
+       compatible = "ralink,rt2880-eth";
+       reg = <0x00400000 10000>;
+
+       #address-cells = <1>;
+       #size-cells = <0>;
+
+       resets = <&rstctrl 18>;
+       reset-names = "fe";
+
+       interrupt-parent = <&cpuintc>;
+       interrupts = <5>;
+
+       port@0 {
+               compatible = "ralink,eth-port";
+               reg = <0>;
+               phy-handle = <&phy0>;
+       };
+
+};
diff --git a/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt b/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt
new file mode 100644 (file)
index 0000000..2e79bd3
--- /dev/null
@@ -0,0 +1,32 @@
+Ralink Fast Ethernet Embedded Switch
+====================================
+
+The ralink fast ethernet embedded switch can be found on Ralink and Mediatek
+SoCs (RT3x5x, RT5350, MT76x8).
+
+Required properties:
+- compatible: Should be "ralink,rt3050-esw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the embedded switches interrupt
+- resets: Should contain the embedded switches resets
+- reset-names: Should contain the reset names "esw"
+
+Optional properties:
+- ralink,portmap: can be used to choose if the default switch setup is
+  llllw or wllll
+- ralink,led_polarity: override the active high/low settings of the leds
+
+Example:
+
+esw@10110000 {
+       compatible = "ralink,rt3050-esw";
+       reg = <0x10110000 8000>;
+
+       resets = <&rstctrl 23>;
+       reset-names = "esw";
+
+       interrupt-parent = <&intc>;
+       interrupts = <17>;
+};
index 81a9f9e6b45ff85c83da488fbafc2c2b89839600..c8ac222eac67a0c330bec3d641d0ed65cc399281 100644 (file)
@@ -82,8 +82,8 @@ Example:
                                  "ch16", "ch17", "ch18", "ch19",
                                  "ch20", "ch21", "ch22", "ch23",
                                  "ch24";
-               clocks = <&mstp8_clks R8A7795_CLK_ETHERAVB>;
-               power-domains = <&cpg_clocks>;
+               clocks = <&cpg CPG_MOD 812>;
+               power-domains = <&cpg>;
                phy-mode = "rgmii-id";
                phy-handle = <&phy0>;
 
index 4e8b90e43dd83c72d81df6d644317ba60b489559..07a75094c5a8ed07e927c0641584d5e8348e3dc6 100644 (file)
@@ -8,6 +8,7 @@ OHCI and EHCI controllers.
 Required properties:
 - compatible: "renesas,pci-r8a7790" for the R8A7790 SoC;
              "renesas,pci-r8a7791" for the R8A7791 SoC;
+             "renesas,pci-r8a7793" for the R8A7793 SoC;
              "renesas,pci-r8a7794" for the R8A7794 SoC;
              "renesas,pci-rcar-gen2" for a generic R-Car Gen2 compatible device
 
index 558fe528ae1951104b6266a2f5ed4849017c6b35..6cf99690eef94fa4ed033fb91e08c2aa010395bf 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
 compatible: "renesas,pcie-r8a7779" for the R8A7779 SoC;
            "renesas,pcie-r8a7790" for the R8A7790 SoC;
            "renesas,pcie-r8a7791" for the R8A7791 SoC;
+           "renesas,pcie-r8a7793" for the R8A7793 SoC;
            "renesas,pcie-r8a7795" for the R8A7795 SoC;
            "renesas,pcie-rcar-gen2" for a generic R-Car Gen2 compatible device.
 
diff --git a/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt b/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt
new file mode 100644 (file)
index 0000000..cafe219
--- /dev/null
@@ -0,0 +1,18 @@
+* Atheros AR71XX/9XXX USB PHY
+
+Required properties:
+- compatible: "qca,ar7100-usb-phy"
+- #phys-cells: should be 0
+- reset-names: "usb-phy"[, "usb-suspend-override"]
+- resets: references to the reset controllers
+
+Example:
+
+       usb-phy {
+               compatible = "qca,ar7100-usb-phy";
+
+               reset-names = "usb-phy", "usb-suspend-override";
+               resets = <&rst 4>, <&rst 3>;
+
+               #phy-cells = <0>;
+       };
index d18109657da6c0154d3abebd980178f91b4a9102..4f05d208c95cfeac7ebbc217003d34f08e0f4772 100644 (file)
@@ -26,11 +26,7 @@ Example:
                ti,pmic-shutdown-controller;
 
                regulators {
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-
                        dcdc1_reg: dcdc1 {
-                               reg = <0>;
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <1800000>;
                                regulator-boot-on;
@@ -38,7 +34,6 @@ Example:
                        };
 
                        dcdc2_reg: dcdc2 {
-                               reg = <1>;
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <3300000>;
                                regulator-boot-on;
@@ -46,7 +41,6 @@ Example:
                        };
 
                        dcdc3_reg: dcc3 {
-                               reg = <2>;
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <1500000>;
                                regulator-boot-on;
@@ -54,7 +48,6 @@ Example:
                        };
 
                        ldo1_reg: ldo1 {
-                               reg = <3>;
                                regulator-min-microvolt = <1000000>;
                                regulator-max-microvolt = <3300000>;
                                regulator-boot-on;
@@ -62,7 +55,6 @@ Example:
                        };
 
                        ldo2_reg: ldo2 {
-                               reg = <4>;
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <3300000>;
                                regulator-boot-on;
@@ -70,7 +62,6 @@ Example:
                        };
 
                        ldo3_reg: ldo3 {
-                               reg = <5>;
                                regulator-min-microvolt = <1800000>;
                                regulator-max-microvolt = <3300000>;
                                regulator-boot-on;
@@ -78,7 +69,6 @@ Example:
                        };
 
                        ldo4_reg: ldo4 {
-                               reg = <6>;
                                regulator-min-microvolt = <1800000>;
                                regulator-max-microvolt = <3300000>;
                                regulator-boot-on;
index ac2fcd6ff4b8cde39bf8446c406571fefa7684a9..1068ffce9f9125559e301f60f9c0ce6ca40d2374 100644 (file)
@@ -14,6 +14,10 @@ Required properties:
   interrupt number is the rtc alarm interrupt and second interrupt number
   is the rtc tick interrupt. The number of cells representing a interrupt
   depends on the parent interrupt controller.
+- clocks: Must contain a list of phandle and clock specifier for the rtc
+          and source clocks.
+- clock-names: Must contain "rtc" and "rtc_src" entries sorted in the
+               same order as the clocks property.
 
 Example:
 
@@ -21,4 +25,6 @@ Example:
                compatible = "samsung,s3c6410-rtc";
                reg = <0x10070000 0x100>;
                interrupts = <44 0 45 0>;
+               clocks = <&clock CLK_RTC>, <&s2mps11_osc S2MPS11_CLK_AP>;
+               clock-names = "rtc", "rtc_src";
        };
index 35ae1fb3537f3217a50fe92fd8f4042b52e6e6d0..ed94c217c98d18a655aa4df150de7b62f1e17440 100644 (file)
@@ -9,7 +9,7 @@ Optional properties:
 - fsl,uart-has-rtscts : Indicate the uart has rts and cts
 - fsl,irda-mode : Indicate the uart supports irda mode
 - fsl,dte-mode : Indicate the uart works in DTE mode. The uart works
-                  is DCE mode by default.
+                  in DCE mode by default.
 
 Note: Each uart controller should have an alias correctly numbered
 in "aliases" node.
index ce55c0a6f7578ee192a207a6b247095d605072b8..4da41bf1888e9baaf2471a73db7d8019be9d6d0c 100644 (file)
@@ -30,6 +30,8 @@ The compatible list for this generic sound card currently:
  "fsl,imx-audio-sgtl5000"
  (compatible with Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt)
 
+ "fsl,imx-audio-wm8960"
+
 Required properties:
 
   - compatible         : Contains one of entries in the compatible list.
index 332e625f6ed01cb4e442c0ea530ab29eb92f2a77..e5ee3f15989337f37b9d5e43036c31a2b4c9ff11 100644 (file)
@@ -1,8 +1,9 @@
 * Renesas R-Car Thermal
 
 Required properties:
-- compatible           : "renesas,thermal-<soctype>", "renesas,rcar-thermal"
-                         as fallback.
+- compatible           : "renesas,thermal-<soctype>",
+                          "renesas,rcar-gen2-thermal" (with thermal-zone) or
+                          "renesas,rcar-thermal" (without thermal-zone) as fallback.
                          Examples with soctypes are:
                            - "renesas,thermal-r8a73a4" (R-Mobile APE6)
                            - "renesas,thermal-r8a7779" (R-Car H1)
@@ -36,3 +37,35 @@ thermal@e61f0000 {
                0xe61f0300 0x38>;
        interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
 };
+
+Example (with thermal-zone):
+
+thermal-zones {
+       cpu_thermal: cpu-thermal {
+               polling-delay-passive   = <1000>;
+               polling-delay           = <5000>;
+
+               thermal-sensors = <&thermal>;
+
+               trips {
+                       cpu-crit {
+                               temperature     = <115000>;
+                               hysteresis      = <0>;
+                               type            = "critical";
+                       };
+               };
+               cooling-maps {
+               };
+       };
+};
+
+thermal: thermal@e61f0000 {
+       compatible =    "renesas,thermal-r8a7790",
+                       "renesas,rcar-gen2-thermal",
+                       "renesas,rcar-thermal";
+       reg = <0 0xe61f0000 0 0x14>, <0 0xe61f0100 0 0x38>;
+       interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
+       clocks = <&mstp5_clks R8A7790_CLK_THERMAL>;
+       power-domains = <&cpg_clocks>;
+       #thermal-sensor-cells = <0>;
+};
index 0dfa60d88dd3b4bb4ce2a42d2c0b3d15f4835e45..08efe6bc21935fa36785612043489faab5ba33c7 100644 (file)
@@ -2,8 +2,10 @@
 
 Required properties:
 - compatible : should be "rockchip,<name>-tsadc"
+   "rockchip,rk3228-tsadc": found on RK3228 SoCs
    "rockchip,rk3288-tsadc": found on RK3288 SoCs
    "rockchip,rk3368-tsadc": found on RK3368 SoCs
+   "rockchip,rk3399-tsadc": found on RK3399 SoCs
 - reg : physical base address of the controller and length of memory mapped
        region.
 - interrupts : The interrupt number to the cpu. The interrupt specifier format
index c477af086e6569398d85b3f938e9f067b1976f7e..686a64bba775e0af9990cf305ab81f1ac4c8c278 100644 (file)
@@ -14,3 +14,10 @@ filesystem.
 efivarfs is typically mounted like this,
 
        mount -t efivarfs none /sys/firmware/efi/efivars
+
+Due to the presence of numerous firmware bugs where removing non-standard
+UEFI variables causes the system firmware to fail to POST, efivarfs
+files that are not well-known standardized variables are created
+as immutable files.  This doesn't prevent removal - "chattr -i" will work -
+but it does prevent this kind of failure from being accomplished
+accidentally.
index fde9fd06fa988b2eac60f10880973351e783fe37..843b045b4069a62c154bdf6c2723b3061bd81abd 100644 (file)
@@ -240,8 +240,8 @@ Table 1-2: Contents of the status files (as of 4.1)
  RssFile                     size of resident file mappings
  RssShmem                    size of resident shmem memory (includes SysV shm,
                              mapping of tmpfs and shared anonymous mappings)
- VmData                      size of data, stack, and text segments
- VmStk                       size of data, stack, and text segments
+ VmData                      size of private data segments
+ VmStk                       size of stack segments
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
@@ -356,7 +356,7 @@ address           perms offset  dev   inode      pathname
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
 a7cb2000-a7eb2000 rw-p 00000000 00:00 0
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0          [stack:1001]
+a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
 a8008000-a800a000 r--p 00133000 03:00 4222       /lib/libc.so.6
 a800a000-a800b000 rw-p 00135000 03:00 4222       /lib/libc.so.6
@@ -388,7 +388,6 @@ is not associated with a file:
 
  [heap]                   = the heap of the program
  [stack]                  = the stack of the main process
- [stack:1001]             = the stack of the thread with tid 1001
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
 
@@ -396,10 +395,8 @@ is not associated with a file:
 
 The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
 of the individual tasks of a process. In this file you will see a mapping marked
-as [stack] if that task sees it as a stack. This is a key difference from the
-content of /proc/PID/maps, where you will see all mappings that are being used
-as stack by all of those tasks. Hence, for the example above, the task-level
-map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
+as [stack] if that task sees it as a stack. Hence, for the example above, the
+task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
 
 08048000-08049000 r-xp 00000000 03:00 8312       /opt/test
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
index e1678542279a2d2ed8b3dd3222db81b267f7c2d4..4b1f36b6ada034000d3d8e80831f79b4921f0ab4 100644 (file)
@@ -15,7 +15,6 @@ Sleeping and interrupt context
     modify_ah
     query_ah
     destroy_ah
-    bind_mw
     post_send
     post_recv
     poll_cq
@@ -31,7 +30,6 @@ Sleeping and interrupt context
     ib_modify_ah
     ib_query_ah
     ib_destroy_ah
-    ib_bind_mw
     ib_post_send
     ib_post_recv
     ib_req_notify_cq
index cfb2c0f1a4a89237ce131999d7b1cc03aff178b4..9a53c929f017d16527270bc2244352edf1d34cd8 100644 (file)
@@ -1454,6 +1454,41 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        In such case C2/C3 won't be used again.
                        idle=nomwait: Disable mwait for CPU C-states
 
+       ieee754=        [MIPS] Select IEEE Std 754 conformance mode
+                       Format: { strict | legacy | 2008 | relaxed }
+                       Default: strict
+
+                       Choose which programs will be accepted for execution
+                       based on the IEEE 754 NaN encoding(s) supported by
+                       the FPU and the NaN encoding requested with the value
+                       of an ELF file header flag individually set by each
+                       binary.  Hardware implementations are permitted to
+                       support either or both of the legacy and the 2008 NaN
+                       encoding mode.
+
+                       Available settings are as follows:
+                       strict  accept binaries that request a NaN encoding
+                               supported by the FPU
+                       legacy  only accept legacy-NaN binaries, if supported
+                               by the FPU
+                       2008    only accept 2008-NaN binaries, if supported
+                               by the FPU
+                       relaxed accept any binaries regardless of whether
+                               supported by the FPU
+
+                       The FPU emulator is always able to support both NaN
+                       encodings, so if no FPU hardware is present or it has
+                       been disabled with 'nofpu', then the settings of
+                       'legacy' and '2008' strap the emulator accordingly,
+                       'relaxed' straps the emulator for both legacy-NaN and
+                       2008-NaN, whereas 'strict' enables legacy-NaN only on
+                       legacy processors and both NaN encodings on MIPS32 or
+                       MIPS64 CPUs.
+
+                       The setting for ABS.fmt/NEG.fmt instruction execution
+                       mode generally follows that for the NaN encoding,
+                       except where unsupported by hardware.
+
        ignore_loglevel [KNL]
                        Ignore loglevel setting - this will print /all/
                        kernel messages to the console. Useful for debugging.
@@ -1461,6 +1496,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        could change it dynamically, usually by
                        /sys/module/printk/parameters/ignore_loglevel.
 
+       ignore_rlimit_data
+                       Ignore RLIMIT_DATA setting for data mappings,
+                       print warning at first misuse.  Can be changed via
+                       /sys/module/kernel/parameters/ignore_rlimit_data.
+
        ihash_entries=  [KNL]
                        Set number of hash buckets for inode cache.
 
@@ -4195,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        The default value of this parameter is determined by
                        the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
 
+       workqueue.debug_force_rr_cpu
+                       Workqueue used to implicitly guarantee that work
+                       items queued without explicit CPU specified are put
+                       on the local CPU.  This guarantee is no longer true
+                       and while local CPU is still preferred work items
+                       may be put on foreign CPUs.  This debug option
+                       forces round-robin CPU selection to flush out
+                       usages which depend on the now broken guarantee.
+                       When enabled, memory and cache locality will be
+                       impacted.
+
        x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
                        default x2apic cluster mode on platforms
                        supporting x2apic.
index f4cbfe0ba1085b4df3067dcc457219699c5c6150..edec3a3e648d12eee550f953b6dec87ab58e34f8 100644 (file)
@@ -90,7 +90,7 @@ BLOCK_SOFTIRQ:  Do all of the following:
        from being initiated from tasks that might run on the CPU to
        be de-jittered.  (It is OK to force this CPU offline and then
        bring it back online before you start your application.)
-BLOCK_IOPOLL_SOFTIRQ:  Do all of the following:
+IRQ_POLL_SOFTIRQ:  Do all of the following:
 1.     Force block-device interrupts onto some other CPU.
 2.     Initiate any block I/O and block-I/O polling on other CPUs.
 3.     Once your application has started, prevent CPU-hotplug operations
index ceb44a095a27ba98804442c1f7aa91ed616b7d42..73b36d7c7b0d67309cce7bf0bd8a46f0c6ee7ea1 100644 (file)
@@ -594,7 +594,7 @@ tcp_fastopen - INTEGER
 
 tcp_syn_retries - INTEGER
        Number of times initial SYNs for an active TCP connection attempt
-       will be retransmitted. Should not be higher than 255. Default value
+       will be retransmitted. Should not be higher than 127. Default value
        is 6, which corresponds to 63seconds till the last retransmission
        with the current initial RTO of 1second. With this the final timeout
        for an active TCP connection attempt will happen after 127seconds.
index 88152f214f48cb69c643d4bf2ff2ac9a61ad2eb0..302b5ed616a6b2a5360e88e519140284b2f0e0d0 100644 (file)
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs:
 - nr_open
 - overflowuid
 - overflowgid
+- pipe-user-pages-hard
+- pipe-user-pages-soft
 - protected_hardlinks
 - protected_symlinks
 - suid_dumpable
@@ -159,6 +161,27 @@ The default is 65534.
 
 ==============================================================
 
+pipe-user-pages-hard:
+
+Maximum total number of pages a non-privileged user may allocate for pipes.
+Once this limit is reached, no new pipes may be allocated until usage goes
+below the limit again. When set to 0, no limit is applied, which is the default
+setting.
+
+==============================================================
+
+pipe-user-pages-soft:
+
+Maximum total number of pages a non-privileged user may allocate for pipes
+before the pipe size gets limited to a single page. Once this limit is reached,
+new pipes will be limited to a single page in size for this user in order to
+limit total memory usage, and trying to increase them using fcntl() will be
+denied until usage goes below the limit again. The default value allows to
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
+applied.
+
+==============================================================
+
 protected_hardlinks:
 
 A long-standing class of security issues is the hardlink-based
index 767392ffd31e1451aa14da7453fc581264b4491c..a484d2c109d7ff0689d8997c04ce477ca6e79d1c 100644 (file)
@@ -1,9 +1,7 @@
                High Precision Event Timer Driver for Linux
 
 The High Precision Event Timer (HPET) hardware follows a specification
-by Intel and Microsoft which can be found at
-
-       http://www.intel.com/hardwaredesign/hpetspec_1.pdf
+by Intel and Microsoft, revision 1.
 
 Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
 and up to 32 comparators.  Normally three or more comparators are provided,
index 053f613fc9a913af132da7f794742c45d6727dfc..07e4cdf024073033e2a52213bcbed4d562111a33 100644 (file)
@@ -3025,7 +3025,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
-4.90 KVM_SMI
+4.96 KVM_SMI
 
 Capability: KVM_CAP_X86_SMM
 Architectures: x86
index 45d2717760fc229171775a5216b13d9ef3025e1d..da3e4d8016d0f7e3b893e1f835c433a8e20f9213 100644 (file)
@@ -223,9 +223,7 @@ F:  drivers/scsi/aacraid/
 
 ABI/API
 L:     linux-api@vger.kernel.org
-F:     Documentation/ABI/
 F:     include/linux/syscalls.h
-F:     include/uapi/
 F:     kernel/sys_ni.c
 
 ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
@@ -686,13 +684,6 @@ M: Michael Hanselmann <linux-kernel@hansmi.ch>
 S:     Supported
 F:     drivers/macintosh/ams/
 
-AMSO1100 RNIC DRIVER
-M:     Tom Tucker <tom@opengridcomputing.com>
-M:     Steve Wise <swise@opengridcomputing.com>
-L:     linux-rdma@vger.kernel.org
-S:     Maintained
-F:     drivers/infiniband/hw/amso1100/
-
 ANALOG DEVICES INC AD9389B DRIVER
 M:     Hans Verkuil <hans.verkuil@cisco.com>
 L:     linux-media@vger.kernel.org
@@ -929,17 +920,24 @@ M:        Emilio López <emilio@elopez.com.ar>
 S:     Maintained
 F:     drivers/clk/sunxi/
 
-ARM/Amlogic MesonX SoC support
+ARM/Amlogic Meson SoC support
 M:     Carlo Caione <carlo@caione.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:     linux-meson@googlegroups.com
+W:     http://linux-meson.com/
 S:     Maintained
-F:     drivers/media/rc/meson-ir.c
-N:     meson[x68]
+F:     arch/arm/mach-meson/
+F:     arch/arm/boot/dts/meson*
+N:     meson
 
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:     Tsahee Zidenberg <tsahee@annapurnalabs.com>
+M:     Antoine Tenart <antoine.tenart@free-electrons.com>
 S:     Maintained
 F:     arch/arm/mach-alpine/
+F:     arch/arm/boot/dts/alpine*
+F:     arch/arm64/boot/dts/al/
+F:     drivers/*/*alpine*
 
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:     Nicolas Ferre <nicolas.ferre@atmel.com>
@@ -967,6 +965,8 @@ M:  Rob Herring <robh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-highbank/
+F:     arch/arm/boot/dts/highbank.dts
+F:     arch/arm/boot/dts/ecx-*.dts*
 
 ARM/CAVIUM NETWORKS CNS3XXX MACHINE SUPPORT
 M:     Krzysztof Halasa <khalasa@piap.pl>
@@ -1042,6 +1042,7 @@ M:        Barry Song <baohua@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/prima2*
 F:     arch/arm/mach-prima2/
 F:     drivers/clk/sirf/
 F:     drivers/clocksource/timer-prima2.c
@@ -1143,6 +1144,10 @@ W:       http://www.hisilicon.com
 S:     Supported
 T:     git git://github.com/hisilicon/linux-hisi.git
 F:     arch/arm/mach-hisi/
+F:     arch/arm/boot/dts/hi3*
+F:     arch/arm/boot/dts/hip*
+F:     arch/arm/boot/dts/hisi*
+F:     arch/arm64/boot/dts/hisilicon/
 
 ARM/HP JORNADA 7XX MACHINE SUPPORT
 M:     Kristoffer Ericson <kristoffer.ericson@gmail.com>
@@ -1219,6 +1224,7 @@ M:        Santosh Shilimkar <ssantosh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-keystone/
+F:     arch/arm/boot/dts/k2*
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
 
 ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
@@ -1287,6 +1293,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-berlin/
 F:     arch/arm/boot/dts/berlin*
+F:     arch/arm64/boot/dts/marvell/berlin*
 
 
 ARM/Marvell Dove/MV78xx0/Orion SOC support
@@ -1425,6 +1432,7 @@ S:        Maintained
 F:     arch/arm/boot/dts/qcom-*.dts
 F:     arch/arm/boot/dts/qcom-*.dtsi
 F:     arch/arm/mach-qcom/
+F:     arch/arm64/boot/dts/qcom/*
 F:     drivers/soc/qcom/
 F:     drivers/tty/serial/msm_serial.h
 F:     drivers/tty/serial/msm_serial.c
@@ -1441,8 +1449,8 @@ S:        Maintained
 ARM/RENESAS ARM64 ARCHITECTURE
 M:     Simon Horman <horms@verge.net.au>
 M:     Magnus Damm <magnus.damm@gmail.com>
-L:     linux-sh@vger.kernel.org
-Q:     http://patchwork.kernel.org/project/linux-sh/list/
+L:     linux-renesas-soc@vger.kernel.org
+Q:     http://patchwork.kernel.org/project/linux-renesas-soc/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
 S:     Supported
 F:     arch/arm64/boot/dts/renesas/
@@ -1484,6 +1492,8 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/s3c*
+F:     arch/arm/boot/dts/s5p*
+F:     arch/arm/boot/dts/samsung*
 F:     arch/arm/boot/dts/exynos*
 F:     arch/arm64/boot/dts/exynos/
 F:     arch/arm/plat-samsung/
@@ -1563,6 +1573,7 @@ S:        Maintained
 F:     arch/arm/mach-socfpga/
 F:     arch/arm/boot/dts/socfpga*
 F:     arch/arm/configs/socfpga_defconfig
+F:     arch/arm64/boot/dts/altera/
 W:     http://www.rocketboards.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
 
@@ -1716,7 +1727,7 @@ M:        Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/vexpress*
-F:     arch/arm64/boot/dts/arm/vexpress*
+F:     arch/arm64/boot/dts/arm/
 F:     arch/arm/mach-vexpress/
 F:     */*/vexpress*
 F:     */*/*/vexpress*
@@ -2343,6 +2354,7 @@ F:        arch/arm/mach-bcm/
 F:     arch/arm/boot/dts/bcm113*
 F:     arch/arm/boot/dts/bcm216*
 F:     arch/arm/boot/dts/bcm281*
+F:     arch/arm64/boot/dts/broadcom/
 F:     arch/arm/configs/bcm_defconfig
 F:     drivers/mmc/host/sdhci-bcm-kona.c
 F:     drivers/clocksource/bcm_kona_timer.c
@@ -2357,14 +2369,6 @@ T:       git git://git.kernel.org/pub/scm/linux/kernel/git/rpi/linux-rpi.git
 S:     Maintained
 N:     bcm2835
 
-BROADCOM BCM33XX MIPS ARCHITECTURE
-M:     Kevin Cernekee <cernekee@gmail.com>
-L:     linux-mips@linux-mips.org
-S:     Maintained
-F:     arch/mips/bcm3384/*
-F:     arch/mips/include/asm/mach-bcm3384/*
-F:     arch/mips/kernel/*bmips*
-
 BROADCOM BCM47XX MIPS ARCHITECTURE
 M:     Hauke Mehrtens <hauke@hauke-m.de>
 M:     RafaÅ‚ MiÅ‚ecki <zajec5@gmail.com>
@@ -2420,6 +2424,8 @@ F:        arch/mips/kernel/*bmips*
 F:     arch/mips/boot/dts/brcm/bcm*.dts*
 F:     drivers/irqchip/irq-bcm7*
 F:     drivers/irqchip/irq-brcmstb*
+F:     include/linux/bcm963xx_nvram.h
+F:     include/linux/bcm963xx_tag.h
 
 BROADCOM TG3 GIGABIT ETHERNET DRIVER
 M:     Prashant Sreedharan <prashant@broadcom.com>
@@ -3443,9 +3449,8 @@ S:        Maintained
 F:     drivers/usb/dwc2/
 
 DESIGNWARE USB3 DRD IP DRIVER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
-L:     linux-omap@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
 F:     drivers/usb/dwc3/
@@ -3665,13 +3670,12 @@ F:      drivers/scsi/dpt*
 F:     drivers/scsi/dpt/
 
 DRBD DRIVER
-P:     Philipp Reisner
-P:     Lars Ellenberg
-M:     drbd-dev@lists.linbit.com
-L:     drbd-user@lists.linbit.com
+M:     Philipp Reisner <philipp.reisner@linbit.com>
+M:     Lars Ellenberg <lars.ellenberg@linbit.com>
+L:     drbd-dev@lists.linbit.com
 W:     http://www.drbd.org
-T:     git git://git.drbd.org/linux-2.6-drbd.git drbd
-T:     git git://git.drbd.org/drbd-8.3.git
+T:     git git://git.linbit.com/linux-drbd.git
+T:     git git://git.linbit.com/drbd-8.4.git
 S:     Supported
 F:     drivers/block/drbd/
 F:     lib/lru_cache.c
@@ -4183,13 +4187,6 @@ W:       http://aeschi.ch.eu.org/efs/
 S:     Orphan
 F:     fs/efs/
 
-EHCA (IBM GX bus InfiniBand adapter) DRIVER
-M:     Hoang-Nam Nguyen <hnguyen@de.ibm.com>
-M:     Christoph Raisch <raisch@de.ibm.com>
-L:     linux-rdma@vger.kernel.org
-S:     Supported
-F:     drivers/infiniband/hw/ehca/
-
 EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
 M:     Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
 L:     netdev@vger.kernel.org
@@ -5781,10 +5778,8 @@ INTEL TELEMETRY DRIVER
 M:     Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
-F:     drivers/platform/x86/intel_telemetry_core.c
 F:     arch/x86/include/asm/intel_telemetry.h
-F:     drivers/platform/x86/intel_telemetry_pltdrv.c
-F:     drivers/platform/x86/intel_telemetry_debugfs.c
+F:     drivers/platform/x86/intel_telemetry*
 
 IOC3 ETHERNET DRIVER
 M:     Ralf Baechle <ralf@linux-mips.org>
@@ -5810,12 +5805,6 @@ M:       Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
 S:     Maintained
 F:     net/ipv4/netfilter/ipt_MASQUERADE.c
 
-IPATH DRIVER
-M:     Mike Marciniszyn <infinipath@intel.com>
-L:     linux-rdma@vger.kernel.org
-S:     Maintained
-F:     drivers/staging/rdma/ipath/
-
 IPMI SUBSYSTEM
 M:     Corey Minyard <minyard@acm.org>
 L:     openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
@@ -6145,7 +6134,7 @@ F:        include/uapi/linux/sunrpc/
 
 KERNEL SELFTEST FRAMEWORK
 M:     Shuah Khan <shuahkh@osg.samsung.com>
-L:     linux-api@vger.kernel.org
+L:     linux-kselftest@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/shuah/linux-kselftest
 S:     Maintained
 F:     tools/testing/selftests
@@ -6219,6 +6208,14 @@ F:       arch/arm64/include/uapi/asm/kvm*
 F:     arch/arm64/include/asm/kvm*
 F:     arch/arm64/kvm/
 
+KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
+M:     James Hogan <james.hogan@imgtec.com>
+L:     linux-mips@linux-mips.org
+S:     Supported
+F:     arch/mips/include/uapi/asm/kvm*
+F:     arch/mips/include/asm/kvm*
+F:     arch/mips/kvm/
+
 KEXEC
 M:     Eric Biederman <ebiederm@xmission.com>
 W:     http://kernel.org/pub/linux/utils/kernel/kexec/
@@ -6316,6 +6313,12 @@ S:       Maintained
 F:     net/l3mdev
 F:     include/net/l3mdev.h
 
+LANTIQ MIPS ARCHITECTURE
+M:     John Crispin <blogic@openwrt.org>
+L:     linux-mips@linux-mips.org
+S:     Maintained
+F:     arch/mips/lantiq
+
 LAPB module
 L:     linux-x25@vger.kernel.org
 S:     Orphan
@@ -7152,27 +7155,45 @@ W:      https://linuxtv.org
 S:     Odd Fixes
 F:     drivers/media/radio/radio-miropcm20*
 
-Mellanox MLX5 core VPI driver
-M:     Eli Cohen <eli@mellanox.com>
+MELLANOX MLX4 core VPI driver
+M:     Yishai Hadas <yishaih@mellanox.com>
 L:     netdev@vger.kernel.org
 L:     linux-rdma@vger.kernel.org
 W:     http://www.mellanox.com
 Q:     http://patchwork.ozlabs.org/project/netdev/list/
+S:     Supported
+F:     drivers/net/ethernet/mellanox/mlx4/
+F:     include/linux/mlx4/
+
+MELLANOX MLX4 IB driver
+M:     Yishai Hadas <yishaih@mellanox.com>
+L:     linux-rdma@vger.kernel.org
+W:     http://www.mellanox.com
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-T:     git git://openfabrics.org/~eli/connect-ib.git
+S:     Supported
+F:     drivers/infiniband/hw/mlx4/
+F:     include/linux/mlx4/
+
+MELLANOX MLX5 core VPI driver
+M:     Matan Barak <matanb@mellanox.com>
+M:     Leon Romanovsky <leonro@mellanox.com>
+L:     netdev@vger.kernel.org
+L:     linux-rdma@vger.kernel.org
+W:     http://www.mellanox.com
+Q:     http://patchwork.ozlabs.org/project/netdev/list/
 S:     Supported
 F:     drivers/net/ethernet/mellanox/mlx5/core/
 F:     include/linux/mlx5/
 
-Mellanox MLX5 IB driver
-M:     Eli Cohen <eli@mellanox.com>
+MELLANOX MLX5 IB driver
+M:     Matan Barak <matanb@mellanox.com>
+M:     Leon Romanovsky <leonro@mellanox.com>
 L:     linux-rdma@vger.kernel.org
 W:     http://www.mellanox.com
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-T:     git git://openfabrics.org/~eli/connect-ib.git
 S:     Supported
-F:     include/linux/mlx5/
 F:     drivers/infiniband/hw/mlx5/
+F:     include/linux/mlx5/
 
 MELEXIS MLX90614 DRIVER
 M:     Crt Mori <cmo@melexis.com>
@@ -7339,7 +7360,7 @@ F:        drivers/tty/isicom.c
 F:     include/linux/isicom.h
 
 MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Bin Liu <b-liu@ti.com>
 L:     linux-usb@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
@@ -7671,13 +7692,13 @@ S:      Maintained
 F:     arch/nios2/
 
 NOKIA N900 POWER SUPPLY DRIVERS
-M:     Pali Rohár <pali.rohar@gmail.com>
-S:     Maintained
+R:     Pali Rohár <pali.rohar@gmail.com>
 F:     include/linux/power/bq2415x_charger.h
 F:     include/linux/power/bq27xxx_battery.h
 F:     include/linux/power/isp1704_charger.h
 F:     drivers/power/bq2415x_charger.c
 F:     drivers/power/bq27xxx_battery.c
+F:     drivers/power/bq27xxx_battery_i2c.c
 F:     drivers/power/isp1704_charger.c
 F:     drivers/power/rx51_battery.c
 
@@ -7703,6 +7724,12 @@ W:       https://github.com/jonmason/ntb/wiki
 T:     git git://github.com/jonmason/ntb.git
 F:     drivers/ntb/hw/intel/
 
+NTB AMD DRIVER
+M:     Xiangliang Yu <Xiangliang.Yu@amd.com>
+L:     linux-ntb@googlegroups.com
+S:     Supported
+F:     drivers/ntb/hw/amd/
+
 NTFS FILESYSTEM
 M:     Anton Altaparmakov <anton@tuxera.com>
 L:     linux-ntfs-dev@lists.sourceforge.net
@@ -7902,11 +7929,9 @@ F:       drivers/media/platform/omap3isp/
 F:     drivers/staging/media/omap4iss/
 
 OMAP USB SUPPORT
-M:     Felipe Balbi <balbi@ti.com>
 L:     linux-usb@vger.kernel.org
 L:     linux-omap@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
-S:     Maintained
+S:     Orphan
 F:     drivers/usb/*/*omap*
 F:     arch/arm/*omap*/usb*
 
@@ -8781,6 +8806,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/hzhuang1/linux.git
 T:     git git://github.com/rjarzmik/linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/pxa*
 F:     arch/arm/mach-pxa/
 F:     drivers/dma/pxa*
 F:     drivers/pcmcia/pxa2xx*
@@ -8810,6 +8836,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/hzhuang1/linux.git
 T:     git git://git.linaro.org/people/ycmiao/pxa-linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/mmp*
 F:     arch/arm/mach-mmp/
 
 PXA MMCI DRIVER
@@ -8976,6 +9003,12 @@ L:       linux-fbdev@vger.kernel.org
 S:     Maintained
 F:     drivers/video/fbdev/aty/aty128fb.c
 
+RALINK MIPS ARCHITECTURE
+M:     John Crispin <blogic@openwrt.org>
+L:     linux-mips@linux-mips.org
+S:     Maintained
+F:     arch/mips/ralink
+
 RALINK RT2X00 WIRELESS LAN DRIVER
 P:     rt2x00 project
 M:     Stanislaw Gruszka <sgruszka@redhat.com>
@@ -9529,6 +9562,12 @@ M:       Andreas Noever <andreas.noever@gmail.com>
 S:     Maintained
 F:     drivers/thunderbolt/
 
+TI BQ27XXX POWER SUPPLY DRIVER
+R:     Andrew F. Davis <afd@ti.com>
+F:     include/linux/power/bq27xxx_battery.h
+F:     drivers/power/bq27xxx_battery.c
+F:     drivers/power/bq27xxx_battery_i2c.c
+
 TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
 M:     John Stultz <john.stultz@linaro.org>
 M:     Thomas Gleixner <tglx@linutronix.de>
@@ -9750,10 +9789,11 @@ S:      Supported
 F:     drivers/scsi/be2iscsi/
 
 Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER
-M:     Sathya Perla <sathya.perla@avagotech.com>
-M:     Ajit Khaparde <ajit.khaparde@avagotech.com>
-M:     Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
-M:     Sriharsha Basavapatna <sriharsha.basavapatna@avagotech.com>
+M:     Sathya Perla <sathya.perla@broadcom.com>
+M:     Ajit Khaparde <ajit.khaparde@broadcom.com>
+M:     Padmanabh Ratnakar <padmanabh.ratnakar@broadcom.com>
+M:     Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
+M:     Somnath Kotur <somnath.kotur@broadcom.com>
 L:     netdev@vger.kernel.org
 W:     http://www.emulex.com
 S:     Supported
@@ -10115,6 +10155,7 @@ S:      Supported
 F:     drivers/media/pci/solo6x10/
 
 SOFTWARE RAID (Multiple Disks) SUPPORT
+M:     Shaohua Li <shli@kernel.org>
 L:     linux-raid@vger.kernel.org
 T:     git git://neil.brown.name/md
 S:     Supported
@@ -10130,7 +10171,7 @@ F:      drivers/net/ethernet/natsemi/sonic.*
 
 SONICS SILICON BACKPLANE DRIVER (SSB)
 M:     Michael Buesch <m@bues.ch>
-L:     netdev@vger.kernel.org
+L:     linux-wireless@vger.kernel.org
 S:     Maintained
 F:     drivers/ssb/
 F:     include/linux/ssb/
@@ -10248,6 +10289,7 @@ L:      spear-devel@list.st.com
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
+F:     arch/arm/boot/dts/spear*
 F:     arch/arm/mach-spear/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
@@ -10454,9 +10496,11 @@ S:     Maintained
 F:     drivers/net/ethernet/dlink/sundance.c
 
 SUPERH
+M:     Yoshinori Sato <ysato@users.sourceforge.jp>
+M:     Rich Felker <dalias@libc.org>
 L:     linux-sh@vger.kernel.org
 Q:     http://patchwork.kernel.org/project/linux-sh/list/
-S:     Orphan
+S:     Maintained
 F:     Documentation/sh/
 F:     arch/sh/
 F:     drivers/sh/
@@ -11273,7 +11317,7 @@ F:      Documentation/usb/ehci.txt
 F:     drivers/usb/host/ehci*
 
 USB GADGET/PERIPHERAL SUBSYSTEM
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 W:     http://www.linux-usb.org/gadget
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -11349,7 +11393,7 @@ S:      Maintained
 F:     drivers/net/usb/pegasus.*
 
 USB PHY LAYER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
@@ -11979,7 +12023,6 @@ F:      arch/arm64/xen/
 F:     arch/arm64/include/asm/xen/
 
 XEN NETWORK BACKEND DRIVER
-M:     Ian Campbell <ian.campbell@citrix.com>
 M:     Wei Liu <wei.liu2@citrix.com>
 L:     xen-devel@lists.xenproject.org (moderated for non-subscribers)
 L:     netdev@vger.kernel.org
@@ -12088,7 +12131,7 @@ F:      drivers/net/hamradio/*scc.c
 F:     drivers/net/hamradio/z8530.h
 
 ZBUD COMPRESSED PAGE ALLOCATOR
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/zbud.c
@@ -12143,7 +12186,7 @@ F:      include/linux/zsmalloc.h
 F:     Documentation/vm/zsmalloc.txt
 
 ZSWAP COMPRESSED SWAP CACHING
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/zswap.c
index abfb3e8eb0b14e680290f3800bd331e870cbb8c6..af6e5f893d56eaa4170414ba29655f832765b583 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 4
+PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc6
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
index 76dde9db79349d0977687623307c934de91e314a..8a188bc1786a2c5cb181f21e10e5ba254b368199 100644 (file)
@@ -12,8 +12,6 @@ config ARC
        select BUILDTIME_EXTABLE_SORT
        select COMMON_CLK
        select CLONE_BACKWARDS
-       # ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev
-       select DEVTMPFS if !INITRAMFS_SOURCE=""
        select GENERIC_ATOMIC64
        select GENERIC_CLOCKEVENTS
        select GENERIC_FIND_FIRST_BIT
@@ -275,14 +273,6 @@ config ARC_DCCM_BASE
        default "0xA0000000"
        depends on ARC_HAS_DCCM
 
-config ARC_HAS_HW_MPY
-       bool "Use Hardware Multiplier (Normal or Faster XMAC)"
-       default y
-       help
-         Influences how gcc generates code for MPY operations.
-         If enabled, MPYxx insns are generated, provided by Standard/XMAC
-         Multipler. Otherwise software multipy lib is used
-
 choice
        prompt "MMU Version"
        default ARC_MMU_V3 if ARC_CPU_770
@@ -338,6 +328,19 @@ config ARC_PAGE_SIZE_4K
 
 endchoice
 
+choice
+       prompt "MMU Super Page Size"
+       depends on ISA_ARCV2 && TRANSPARENT_HUGEPAGE
+       default ARC_HUGEPAGE_2M
+
+config ARC_HUGEPAGE_2M
+       bool "2MB"
+
+config ARC_HUGEPAGE_16M
+       bool "16MB"
+
+endchoice
+
 if ISA_ARCOMPACT
 
 config ARC_COMPACT_IRQ_LEVELS
@@ -410,7 +413,7 @@ config ARC_HAS_RTC
        default n
        depends on !SMP
 
-config ARC_HAS_GRTC
+config ARC_HAS_GFRC
        bool "SMP synchronized 64-bit cycle counter"
        default y
        depends on SMP
@@ -529,14 +532,6 @@ config ARC_DBG_TLB_MISS_COUNT
          Counts number of I and D TLB Misses and exports them via Debugfs
          The counters can be cleared via Debugfs as well
 
-if SMP
-
-config ARC_IPI_DBG
-       bool "Debug Inter Core interrupts"
-       default n
-
-endif
-
 endif
 
 config ARC_UBOOT_SUPPORT
@@ -566,6 +561,12 @@ endmenu
 endmenu         # "ARC Architecture Configuration"
 
 source "mm/Kconfig"
+
+config FORCE_MAX_ZONEORDER
+       int "Maximum zone order"
+       default "12" if ARC_HUGEPAGE_16M
+       default "11"
+
 source "net/Kconfig"
 source "drivers/Kconfig"
 source "fs/Kconfig"
index aeb19021099e315967ba95524e9528c48da0d159..c8230f3395f281f9c11ee6de130137d84e1bb543 100644 (file)
@@ -74,10 +74,6 @@ ldflags-$(CONFIG_CPU_BIG_ENDIAN)     += -EB
 # --build-id w/o "-marclinux". Default arc-elf32-ld is OK
 ldflags-$(upto_gcc44)                  += -marclinux
 
-ifndef CONFIG_ARC_HAS_HW_MPY
-       cflags-y        += -mno-mpy
-endif
-
 LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
 
 # Modules with short calls might break for calls into builtin-kernel
index f1ac9818b751e1fab854f2c1a4440b9ca8af3e85..5d4e2a07ad3eb8d11c605f385b881769d44b16ae 100644 (file)
@@ -39,6 +39,7 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -73,7 +74,6 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
@@ -91,12 +91,10 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
index 323486d6ee83419a9e50d5c53fa042f53e08ebed..87ee46b237ef7baa6dc5008f968e5c914d03aee4 100644 (file)
@@ -39,14 +39,10 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
@@ -78,14 +74,12 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
@@ -97,12 +91,10 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
index 66191cd0447eaabc524a36ec497e7047043dc9a4..d80daf4f7e735aa3787df6f75957c2b9a273b361 100644 (file)
@@ -40,14 +40,10 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
@@ -79,14 +75,12 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
@@ -98,12 +92,10 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
index 138f9d8879570a8b329415d9238c710fa6e5b032..f41095340b6a7a8662fc4e51e16e39524f359ae7 100644 (file)
@@ -4,6 +4,7 @@ CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -26,7 +27,6 @@ CONFIG_ARC_PLAT_SIM=y
 CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -34,6 +34,7 @@ CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -51,7 +52,6 @@ CONFIG_SERIAL_ARC=y
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -63,4 +63,3 @@ CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y
index f68838e8068af53eb2ebb29c83d88cb10dc0bfac..cfaa33cb59217c6261667ec4d51b2762fea985ab 100644 (file)
@@ -35,6 +35,7 @@ CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +50,6 @@ CONFIG_SERIAL_ARC=y
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -61,4 +61,3 @@ CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y
index 96bd1c20fb0badeb5d3ebf41f2671f6558939c22..bb2a8dc778b5be48943f25f4415abbe73c476969 100644 (file)
@@ -2,6 +2,7 @@ CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -21,13 +22,11 @@ CONFIG_MODULES=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -35,6 +34,7 @@ CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +49,6 @@ CONFIG_SERIAL_ARC=y
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -60,4 +59,3 @@ CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
index 31e1d95764ff91dc10fe80d936a5613e6f713cc4..646182e93753af74110471f7739fab2454246b1b 100644 (file)
@@ -33,6 +33,7 @@ CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -58,7 +59,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
index fcae66683ca0bd5865924a88816a388e763604cd..ceca2541950d1bbe7f950bad17ab2c598c79c65a 100644 (file)
@@ -34,12 +34,12 @@ CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_OSCI_LAN=y
 CONFIG_INPUT_EVDEV=y
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
@@ -58,7 +58,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
index b01b659168ea4a1c36ac85e0ee41c545aa697ec8..4b6da90f6f261e3d9783719a5bbc2060bf2e8408 100644 (file)
@@ -2,6 +2,7 @@ CONFIG_CROSS_COMPILE="arc-linux-"
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
@@ -18,15 +19,11 @@ CONFIG_MODULES=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
-CONFIG_ARC_HAS_LL64=y
-# CONFIG_ARC_HAS_RTSC is not set
 CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
@@ -40,6 +37,7 @@ CONFIG_INET=y
 # CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -56,14 +54,11 @@ CONFIG_NETDEVICES=y
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
-CONFIG_NET_OSCI_LAN=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
 CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_LIBPS2=y
 CONFIG_SERIO_ARC_PS2=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
@@ -75,9 +70,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-CONFIG_ARCPGU_RGB888=y
-CONFIG_ARCPGU_DISPTYPE=0
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
index 3b4dc9cebcf15234f3d42a4efd2e40b9c5bd4150..9b342eaf95aec97830e155a00250d3d419e6d24d 100644 (file)
@@ -3,6 +3,7 @@ CONFIG_CROSS_COMPILE="arc-linux-"
 CONFIG_DEFAULT_HOSTNAME="tb10x"
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -26,12 +27,10 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLOCK is not set
 CONFIG_ARC_PLAT_TB10X=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
-CONFIG_ARC_STACK_NONEXEC=y
 CONFIG_HZ=250
 CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
 CONFIG_PREEMPT_VOLUNTARY=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -44,8 +43,8 @@ CONFIG_IP_MULTICAST=y
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_PROC_DEVICETREE=y
 CONFIG_NETDEVICES=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -55,9 +54,6 @@ CONFIG_NETDEVICES=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 CONFIG_STMMAC_ETH=y
-CONFIG_STMMAC_DEBUG_FS=y
-CONFIG_STMMAC_DA=y
-CONFIG_STMMAC_CHAINED=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
@@ -91,7 +87,6 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_LEDS_TRIGGER_TRANSIENT=y
 CONFIG_DMADEVICES=y
 CONFIG_DW_DMAC=y
-CONFIG_NET_DMA=y
 CONFIG_ASYNC_TX_DMA=y
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
@@ -100,17 +95,16 @@ CONFIG_TMPFS=y
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_MAGIC_SYSRQ=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
 CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
index f36c047b33cad0c469bd2368bb3e5fa4260535e5..735985974a3136d2a1b1d6184ff754efd0a02743 100644 (file)
@@ -16,7 +16,7 @@ CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
-# CONFIG_ARC_HAS_GRTC is not set
+# CONFIG_ARC_HAS_GFRC is not set
 CONFIG_ARC_UBOOT_SUPPORT=y
 CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
 CONFIG_PREEMPT=y
index 7fac7d85ed6a32bb1abaa4f8e36c5d243cec06c0..f9f4c6f59fdbb3f450ec13420e94f2ee1cb4428c 100644 (file)
@@ -10,7 +10,8 @@
 #define _ASM_ARC_ARCREGS_H
 
 /* Build Configuration Registers */
-#define ARC_REG_DCCMBASE_BCR   0x61    /* DCCM Base Addr */
+#define ARC_REG_AUX_DCCM       0x18    /* DCCM Base Addr ARCv2 */
+#define ARC_REG_DCCM_BASE_BUILD        0x61    /* DCCM Base Addr ARCompact */
 #define ARC_REG_CRC_BCR                0x62
 #define ARC_REG_VECBASE_BCR    0x68
 #define ARC_REG_PERIBASE_BCR   0x69
 #define ARC_REG_DPFP_BCR       0x6C    /* ARCompact: Dbl Precision FPU */
 #define ARC_REG_FP_V2_BCR      0xc8    /* ARCv2 FPU */
 #define ARC_REG_SLC_BCR                0xce
-#define ARC_REG_DCCM_BCR       0x74    /* DCCM Present + SZ */
+#define ARC_REG_DCCM_BUILD     0x74    /* DCCM size (common) */
 #define ARC_REG_TIMERS_BCR     0x75
 #define ARC_REG_AP_BCR         0x76
-#define ARC_REG_ICCM_BCR       0x78
+#define ARC_REG_ICCM_BUILD     0x78    /* ICCM size (common) */
 #define ARC_REG_XY_MEM_BCR     0x79
 #define ARC_REG_MAC_BCR                0x7a
 #define ARC_REG_MUL_BCR                0x7b
@@ -36,6 +37,7 @@
 #define ARC_REG_IRQ_BCR                0xF3
 #define ARC_REG_SMART_BCR      0xFF
 #define ARC_REG_CLUSTER_BCR    0xcf
+#define ARC_REG_AUX_ICCM       0x208   /* ICCM Base Addr (ARCv2) */
 
 /* status32 Bits Positions */
 #define STATUS_AE_BIT          5       /* Exception active */
@@ -246,7 +248,7 @@ struct bcr_perip {
 #endif
 };
 
-struct bcr_iccm {
+struct bcr_iccm_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int base:16, pad:5, sz:3, ver:8;
 #else
@@ -254,17 +256,15 @@ struct bcr_iccm {
 #endif
 };
 
-/* DCCM Base Address Register: ARC_REG_DCCMBASE_BCR */
-struct bcr_dccm_base {
+struct bcr_iccm_arcv2 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int addr:24, ver:8;
+       unsigned int pad:8, sz11:4, sz01:4, sz10:4, sz00:4, ver:8;
 #else
-       unsigned int ver:8, addr:24;
+       unsigned int ver:8, sz00:4, sz10:4, sz01:4, sz11:4, pad:8;
 #endif
 };
 
-/* DCCM RAM Configuration Register: ARC_REG_DCCM_BCR */
-struct bcr_dccm {
+struct bcr_dccm_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int res:21, sz:3, ver:8;
 #else
@@ -272,6 +272,14 @@ struct bcr_dccm {
 #endif
 };
 
+struct bcr_dccm_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad2:12, cyc:3, pad1:1, sz1:4, sz0:4, ver:8;
+#else
+       unsigned int ver:8, sz0:4, sz1:4, pad1:1, cyc:3, pad2:12;
+#endif
+};
+
 /* ARCompact: Both SP and DP FPU BCRs have same format */
 struct bcr_fp_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
@@ -315,9 +323,9 @@ struct bcr_bpu_arcv2 {
 
 struct bcr_generic {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int pad:24, ver:8;
+       unsigned int info:24, ver:8;
 #else
-       unsigned int ver:8, pad:24;
+       unsigned int ver:8, info:24;
 #endif
 };
 
@@ -349,14 +357,13 @@ struct cpuinfo_arc {
        struct cpuinfo_arc_bpu bpu;
        struct bcr_identity core;
        struct bcr_isa isa;
-       struct bcr_timer timers;
        unsigned int vec_base;
        struct cpuinfo_arc_ccm iccm, dccm;
        struct {
                unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
                             fpu_sp:1, fpu_dp:1, pad2:6,
                             debug:1, ap:1, smart:1, rtt:1, pad3:4,
-                            pad4:8;
+                            timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
        } extn;
        struct bcr_mpy extn_mpy;
        struct bcr_extn_xymem extn_xymem;
index 4fd7d62a6e30aa513b9e04f6ee881c219b96da96..49014f0ef36d19458203b904f41513f776aeedff 100644 (file)
 #ifdef CONFIG_ISA_ARCOMPACT
 #define TIMER0_IRQ      3
 #define TIMER1_IRQ      4
-#define IPI_IRQ                (NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */
 #else
 #define TIMER0_IRQ      16
 #define TIMER1_IRQ      17
-#define IPI_IRQ         19
 #endif
 
 #include <linux/interrupt.h>
index 258b0e5ad3329a614e59204df55d91873e969623..37c2f751eebf03d8f9f16d997bacdcb5b8f6323f 100644 (file)
@@ -22,6 +22,7 @@
 #define AUX_IRQ_CTRL           0x00E
 #define AUX_IRQ_ACT            0x043   /* Active Intr across all levels */
 #define AUX_IRQ_LVL_PEND       0x200   /* Pending Intr across all levels */
+#define AUX_IRQ_HINT           0x201   /* For generating Soft Interrupts */
 #define AUX_IRQ_PRIORITY       0x206
 #define ICAUSE                 0x40a
 #define AUX_IRQ_SELECT         0x40b
 /* Was Intr taken in User Mode */
 #define AUX_IRQ_ACT_BIT_U      31
 
-/* 0 is highest level, but taken by FIRQs, if present in design */
-#define ARCV2_IRQ_DEF_PRIO             0
+/*
+ * User space should be interruptable even by lowest prio interrupt
+ * Safe even if actual interrupt priorities is fewer or even one
+ */
+#define ARCV2_IRQ_DEF_PRIO     15
 
 /* seed value for status register */
 #define ISA_INIT_STATUS_BITS   (STATUS_IE_MASK | STATUS_AD_MASK | \
@@ -112,6 +116,16 @@ static inline int arch_irqs_disabled(void)
        return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
+static inline void arc_softirq_trigger(int irq)
+{
+       write_aux_reg(AUX_IRQ_HINT, irq);
+}
+
+static inline void arc_softirq_clear(int irq)
+{
+       write_aux_reg(AUX_IRQ_HINT, 0);
+}
+
 #else
 
 .macro IRQ_DISABLE  scratch
index 46f4e5351b2a56e96d440a27201fc612d2e9c195..847e3bbe387fc92f9b4433bf7e08fc0b11e3ec70 100644 (file)
@@ -39,8 +39,8 @@ struct mcip_cmd {
 #define CMD_DEBUG_SET_MASK             0x34
 #define CMD_DEBUG_SET_SELECT           0x36
 
-#define CMD_GRTC_READ_LO               0x42
-#define CMD_GRTC_READ_HI               0x43
+#define CMD_GFRC_READ_LO               0x42
+#define CMD_GFRC_READ_HI               0x43
 
 #define CMD_IDU_ENABLE                 0x71
 #define CMD_IDU_DISABLE                        0x72
index 57af2f05ae8459e2ee2427d231370269894ba95f..d426d4215513333289ac85bebea412015e7562d2 100644 (file)
 #define __S111  PAGE_U_X_W_R
 
 /****************************************************************
- * Page Table Lookup split
+ * 2 tier (PGD:PTE) software page walker
  *
- * We implement 2 tier paging and since this is all software, we are free
- * to customize the span of a PGD / PTE entry to suit us
- *
- *                     32 bit virtual address
+ * [31]                    32 bit virtual address              [0]
  * -------------------------------------------------------
- * | BITS_FOR_PGD    |  BITS_FOR_PTE    |  BITS_IN_PAGE  |
+ * |               | <------------ PGDIR_SHIFT ----------> |
+ * |              |                                     |
+ * | BITS_FOR_PGD  |  BITS_FOR_PTE  | <-- PAGE_SHIFT --> |
  * -------------------------------------------------------
  *       |                  |                |
  *       |                  |                --> off in page frame
- *       |                 |
  *       |                  ---> index into Page Table
- *       |
  *       ----> index into Page Directory
+ *
+ * In a single page size configuration, only PAGE_SHIFT is fixed
+ * So both PGD and PTE sizing can be tweaked
+ *  e.g. 8K page (PAGE_SHIFT 13) can have
+ *  - PGDIR_SHIFT 21  -> 11:8:13 address split
+ *  - PGDIR_SHIFT 24  -> 8:11:13 address split
+ *
+ * If Super Page is configured, PGDIR_SHIFT becomes fixed too,
+ * so the sizing flexibility is gone.
  */
 
-#define BITS_IN_PAGE   PAGE_SHIFT
-
-/* Optimal Sizing of Pg Tbl - based on MMU page size */
-#if defined(CONFIG_ARC_PAGE_SIZE_8K)
-#define BITS_FOR_PTE   8               /* 11:8:13 */
-#elif defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define BITS_FOR_PTE   8               /* 10:8:14 */
-#elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define BITS_FOR_PTE   9               /* 11:9:12 */
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT    24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT    21
+#else
+/*
+ * Only Normal page support so "hackable" (see comment above)
+ * Default value provides 11:8:13 (8K), 11:9:12 (4K)
+ */
+#define PGDIR_SHIFT    21
 #endif
 
-#define BITS_FOR_PGD   (32 - BITS_FOR_PTE - BITS_IN_PAGE)
+#define BITS_FOR_PTE   (PGDIR_SHIFT - PAGE_SHIFT)
+#define BITS_FOR_PGD   (32 - PGDIR_SHIFT)
 
-#define PGDIR_SHIFT    (32 - BITS_FOR_PGD)
 #define PGDIR_SIZE     (1UL << PGDIR_SHIFT)    /* vaddr span, not PDG sz */
 #define PGDIR_MASK     (~(PGDIR_SIZE-1))
 
index cbfec79137bf77735fa675d0eb2be57da217ba63..c1264607bbff3dca457fd47b8f078b62993b7af7 100644 (file)
@@ -45,11 +45,12 @@ VECTOR      reserved                ; Reserved slots
 VECTOR handle_interrupt        ; (16) Timer0
 VECTOR handle_interrupt        ; unused (Timer1)
 VECTOR handle_interrupt        ; unused (WDT)
-VECTOR handle_interrupt        ; (19) ICI (inter core interrupt)
-VECTOR handle_interrupt
-VECTOR handle_interrupt
-VECTOR handle_interrupt
-VECTOR handle_interrupt        ; (23) End of fixed IRQs
+VECTOR handle_interrupt        ; (19) Inter core Interrupt (IPI)
+VECTOR handle_interrupt        ; (20) perf Interrupt
+VECTOR handle_interrupt        ; (21) Software Triggered Intr (Self IPI)
+VECTOR handle_interrupt        ; unused
+VECTOR handle_interrupt        ; (23) unused
+# End of fixed IRQs
 
 .rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8
        VECTOR  handle_interrupt
@@ -211,7 +212,11 @@ debug_marker_syscall:
 ; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
 ; entry was via Exception in DS which got preempted in kernel).
 ;
-; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling
+; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround
+;
+; Solution is return from Intr w/o any delay slot quirks into a kernel trampoline
+; and from pure kernel mode return to delay slot which handles DS bit/BTA correctly
+
 .Lintr_ret_to_delay_slot:
 debug_marker_ds:
 
@@ -222,18 +227,23 @@ debug_marker_ds:
        ld      r2, [sp, PT_ret]
        ld      r3, [sp, PT_status32]
 
+       ; STAT32 for Int return created from scratch
+       ; (No delay dlot, disable Further intr in trampoline)
+
        bic     r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK
        st      r0, [sp, PT_status32]
 
        mov     r1, .Lintr_ret_to_delay_slot_2
        st      r1, [sp, PT_ret]
 
+       ; Orig exception PC/STAT32 safekept @orig_r0 and @event stack slots
        st      r2, [sp, 0]
        st      r3, [sp, 4]
 
        b       .Lisr_ret_fast_path
 
 .Lintr_ret_to_delay_slot_2:
+       ; Trampoline to restore orig exception PC/STAT32/BTA/AUX_USER_SP
        sub     sp, sp, SZ_PT_REGS
        st      r9, [sp, -4]
 
@@ -243,11 +253,19 @@ debug_marker_ds:
        ld      r9, [sp, 4]
        sr      r9, [erstatus]
 
+       ; restore AUX_USER_SP if returning to U mode
+       bbit0   r9, STATUS_U_BIT, 1f
+       ld      r9, [sp, PT_sp]
+       sr      r9, [AUX_USER_SP]
+
+1:
        ld      r9, [sp, 8]
        sr      r9, [erbta]
 
        ld      r9, [sp, -4]
        add     sp, sp, SZ_PT_REGS
+
+       ; return from pure kernel mode to delay slot
        rtie
 
 END(ret_from_exception)
index 0394f9f61b466dea018af3ec371c65a8dbc17acb..942526322ae7125cb1e7910adb0523ed16ae7435 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/irqchip.h>
 #include <asm/irq.h>
 
+static int irq_prio;
+
 /*
  * Early Hardware specific Interrupt setup
  * -Called very early (start_kernel -> setup_arch -> setup_processor)
@@ -24,6 +26,14 @@ void arc_init_IRQ(void)
 {
        unsigned int tmp;
 
+       struct irq_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned int pad:3, firq:1, prio:4, exts:8, irqs:8, ver:8;
+#else
+               unsigned int ver:8, irqs:8, exts:8, prio:4, firq:1, pad:3;
+#endif
+       } irq_bcr;
+
        struct aux_irq_ctrl {
 #ifdef CONFIG_CPU_BIG_ENDIAN
                unsigned int res3:18, save_idx_regs:1, res2:1,
@@ -46,28 +56,25 @@ void arc_init_IRQ(void)
 
        WRITE_AUX(AUX_IRQ_CTRL, ictrl);
 
-       /* setup status32, don't enable intr yet as kernel doesn't want */
-       tmp = read_aux_reg(0xa);
-       tmp |= ISA_INIT_STATUS_BITS;
-       tmp &= ~STATUS_IE_MASK;
-       asm volatile("flag %0   \n"::"r"(tmp));
-
        /*
         * ARCv2 core intc provides multiple interrupt priorities (upto 16).
         * Typical builds though have only two levels (0-high, 1-low)
         * Linux by default uses lower prio 1 for most irqs, reserving 0 for
         * NMI style interrupts in future (say perf)
-        *
-        * Read the intc BCR to confirm that Linux default priority is avail
-        * in h/w
-        *
-        * Note:
-        *  IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level
-        *  is 0 based.
         */
-       tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF;
-       if (ARCV2_IRQ_DEF_PRIO > tmp)
-               panic("Linux default irq prio incorrect\n");
+
+       READ_BCR(ARC_REG_IRQ_BCR, irq_bcr);
+
+       irq_prio = irq_bcr.prio;        /* Encoded as N-1 for N levels */
+       pr_info("archs-intc\t: %d priority levels (default %d)%s\n",
+               irq_prio + 1, irq_prio,
+               irq_bcr.firq ? " FIRQ (not used)":"");
+
+       /* setup status32, don't enable intr yet as kernel doesn't want */
+       tmp = read_aux_reg(0xa);
+       tmp |= STATUS_AD_MASK | (irq_prio << 1);
+       tmp &= ~STATUS_IE_MASK;
+       asm volatile("flag %0   \n"::"r"(tmp));
 }
 
 static void arcv2_irq_mask(struct irq_data *data)
@@ -86,7 +93,7 @@ void arcv2_irq_enable(struct irq_data *data)
 {
        /* set default priority */
        write_aux_reg(AUX_IRQ_SELECT, data->irq);
-       write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+       write_aux_reg(AUX_IRQ_PRIORITY, irq_prio);
 
        /*
         * hw auto enables (linux unmask) all by default
index 06bcedf19b622b4ef26503660349bc07a8a52d2e..224d1c3aa9c41dafdf52554450c2fa3b32ac8689 100644 (file)
@@ -81,9 +81,6 @@ static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
 {
        switch (irq) {
        case TIMER0_IRQ:
-#ifdef CONFIG_SMP
-       case IPI_IRQ:
-#endif
                irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
                break;
        default:
index bd237acdf4f2f9601efbf4c25c45067ddd2b69d9..c41c364b926cab9f50b6550e501f3ea1a8300017 100644 (file)
 #include <linux/smp.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
+#include <asm/irqflags-arcv2.h>
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
+#define IPI_IRQ                19
+#define SOFTIRQ_IRQ    21
+
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
 
@@ -22,6 +26,7 @@ static DEFINE_RAW_SPINLOCK(mcip_lock);
 static void mcip_setup_per_cpu(int cpu)
 {
        smp_ipi_irq_setup(cpu, IPI_IRQ);
+       smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
 }
 
 static void mcip_ipi_send(int cpu)
@@ -29,46 +34,44 @@ static void mcip_ipi_send(int cpu)
        unsigned long flags;
        int ipi_was_pending;
 
+       /* ARConnect can only send IPI to others */
+       if (unlikely(cpu == raw_smp_processor_id())) {
+               arc_softirq_trigger(SOFTIRQ_IRQ);
+               return;
+       }
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+
        /*
-        * NOTE: We must spin here if the other cpu hasn't yet
-        * serviced a previous message. This can burn lots
-        * of time, but we MUST follows this protocol or
-        * ipi messages can be lost!!!
-        * Also, we must release the lock in this loop because
-        * the other side may get to this same loop and not
-        * be able to ack -- thus causing deadlock.
+        * If receiver already has a pending interrupt, elide sending this one.
+        * Linux cross core calling works well with concurrent IPIs
+        * coalesced into one
+        * see arch/arc/kernel/smp.c: ipi_send_msg_one()
         */
+       __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+       ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+       if (!ipi_was_pending)
+               __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
 
-       do {
-               raw_spin_lock_irqsave(&mcip_lock, flags);
-               __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
-               ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
-               if (ipi_was_pending == 0)
-                       break; /* break out but keep lock */
-               raw_spin_unlock_irqrestore(&mcip_lock, flags);
-       } while (1);
-
-       __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
        raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
-       if (ipi_was_pending)
-               pr_info("IPI ACK delayed from cpu %d\n", cpu);
-#endif
 }
 
 static void mcip_ipi_clear(int irq)
 {
        unsigned int cpu, c;
        unsigned long flags;
-       unsigned int __maybe_unused copy;
+
+       if (unlikely(irq == SOFTIRQ_IRQ)) {
+               arc_softirq_clear(irq);
+               return;
+       }
 
        raw_spin_lock_irqsave(&mcip_lock, flags);
 
        /* Who sent the IPI */
        __mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
 
-       copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK);       /* 1,2,4,8... */
+       cpu = read_aux_reg(ARC_REG_MCIP_READBACK);      /* 1,2,4,8... */
 
        /*
         * In rare case, multiple concurrent IPIs sent to same target can
@@ -82,12 +85,6 @@ static void mcip_ipi_clear(int irq)
        } while (cpu);
 
        raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
-       if (c != __ffs(copy))
-               pr_info("IPIs from %x coalesced to %x\n",
-                       copy, raw_smp_processor_id());
-#endif
 }
 
 static void mcip_probe_n_setup(void)
@@ -96,13 +93,13 @@ static void mcip_probe_n_setup(void)
 #ifdef CONFIG_CPU_BIG_ENDIAN
                unsigned int pad3:8,
                             idu:1, llm:1, num_cores:6,
-                            iocoh:1,  grtc:1, dbg:1, pad2:1,
+                            iocoh:1,  gfrc:1, dbg:1, pad2:1,
                             msg:1, sem:1, ipi:1, pad:1,
                             ver:8;
 #else
                unsigned int ver:8,
                             pad:1, ipi:1, sem:1, msg:1,
-                            pad2:1, dbg:1, grtc:1, iocoh:1,
+                            pad2:1, dbg:1, gfrc:1, iocoh:1,
                             num_cores:6, llm:1, idu:1,
                             pad3:8;
 #endif
@@ -111,12 +108,13 @@ static void mcip_probe_n_setup(void)
        READ_BCR(ARC_REG_MCIP_BCR, mp);
 
        sprintf(smp_cpuinfo_buf,
-               "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+               "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n",
                mp.ver, mp.num_cores,
                IS_AVAIL1(mp.ipi, "IPI "),
                IS_AVAIL1(mp.idu, "IDU "),
+               IS_AVAIL1(mp.llm, "LLM "),
                IS_AVAIL1(mp.dbg, "DEBUG "),
-               IS_AVAIL1(mp.grtc, "GRTC"));
+               IS_AVAIL1(mp.gfrc, "GFRC"));
 
        idu_detected = mp.idu;
 
@@ -125,8 +123,8 @@ static void mcip_probe_n_setup(void)
                __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
        }
 
-       if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc)
-               panic("kernel trying to use non-existent GRTC\n");
+       if (IS_ENABLED(CONFIG_ARC_HAS_GFRC) && !mp.gfrc)
+               panic("kernel trying to use non-existent GFRC\n");
 }
 
 struct plat_smp_ops plat_smp_ops = {
index e1b87444ea9a0740b9651ad6b68fd39f701121a0..cdc821df1809209df9792bcf9ecada0bc991753f 100644 (file)
@@ -42,9 +42,57 @@ struct task_struct *_current_task[NR_CPUS];  /* For stack switching */
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
+static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+{
+       if (is_isa_arcompact()) {
+               struct bcr_iccm_arcompact iccm;
+               struct bcr_dccm_arcompact dccm;
+
+               READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+               if (iccm.ver) {
+                       cpu->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */
+                       cpu->iccm.base_addr = iccm.base << 16;
+               }
+
+               READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+               if (dccm.ver) {
+                       unsigned long base;
+                       cpu->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */
+
+                       base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+                       cpu->dccm.base_addr = base & ~0xF;
+               }
+       } else {
+               struct bcr_iccm_arcv2 iccm;
+               struct bcr_dccm_arcv2 dccm;
+               unsigned long region;
+
+               READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+               if (iccm.ver) {
+                       cpu->iccm.sz = 256 << iccm.sz00;        /* 512B to 16M */
+                       if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+                               cpu->iccm.sz <<= iccm.sz01;
+
+                       region = read_aux_reg(ARC_REG_AUX_ICCM);
+                       cpu->iccm.base_addr = region & 0xF0000000;
+               }
+
+               READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+               if (dccm.ver) {
+                       cpu->dccm.sz = 256 << dccm.sz0;
+                       if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+                               cpu->dccm.sz <<= dccm.sz1;
+
+                       region = read_aux_reg(ARC_REG_AUX_DCCM);
+                       cpu->dccm.base_addr = region & 0xF0000000;
+               }
+       }
+}
+
 static void read_arc_build_cfg_regs(void)
 {
        struct bcr_perip uncached_space;
+       struct bcr_timer timer;
        struct bcr_generic bcr;
        struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
        unsigned long perip_space;
@@ -53,7 +101,11 @@ static void read_arc_build_cfg_regs(void)
        READ_BCR(AUX_IDENTITY, cpu->core);
        READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa);
 
-       READ_BCR(ARC_REG_TIMERS_BCR, cpu->timers);
+       READ_BCR(ARC_REG_TIMERS_BCR, timer);
+       cpu->extn.timer0 = timer.t0;
+       cpu->extn.timer1 = timer.t1;
+       cpu->extn.rtc = timer.rtc;
+
        cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
        READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
@@ -71,36 +123,11 @@ static void read_arc_build_cfg_regs(void)
        cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0;        /* 1,3 */
        cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
        cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
-
-       /* Note that we read the CCM BCRs independent of kernel config
-        * This is to catch the cases where user doesn't know that
-        * CCMs are present in hardware build
-        */
-       {
-               struct bcr_iccm iccm;
-               struct bcr_dccm dccm;
-               struct bcr_dccm_base dccm_base;
-               unsigned int bcr_32bit_val;
-
-               bcr_32bit_val = read_aux_reg(ARC_REG_ICCM_BCR);
-               if (bcr_32bit_val) {
-                       iccm = *((struct bcr_iccm *)&bcr_32bit_val);
-                       cpu->iccm.base_addr = iccm.base << 16;
-                       cpu->iccm.sz = 0x2000 << (iccm.sz - 1);
-               }
-
-               bcr_32bit_val = read_aux_reg(ARC_REG_DCCM_BCR);
-               if (bcr_32bit_val) {
-                       dccm = *((struct bcr_dccm *)&bcr_32bit_val);
-                       cpu->dccm.sz = 0x800 << (dccm.sz);
-
-                       READ_BCR(ARC_REG_DCCMBASE_BCR, dccm_base);
-                       cpu->dccm.base_addr = dccm_base.addr << 8;
-               }
-       }
-
        READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem);
 
+       /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
+       read_decode_ccm_bcr(cpu);
+
        read_decode_mmu_bcr();
        read_decode_cache_bcr();
 
@@ -208,9 +235,9 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
                       (unsigned int)(arc_get_core_freq() / 10000) % 100);
 
        n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
-                      IS_AVAIL1(cpu->timers.t0, "Timer0 "),
-                      IS_AVAIL1(cpu->timers.t1, "Timer1 "),
-                      IS_AVAIL2(cpu->timers.rtc, "64-bit RTC ",
+                      IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
+                      IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
+                      IS_AVAIL2(cpu->extn.rtc, "Local-64-bit-Ctr ",
                                 CONFIG_ARC_HAS_RTC));
 
        n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
@@ -232,8 +259,6 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 
                        n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
                }
-               n += scnprintf(buf + n, len - n, "%s",
-                              IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY));
        }
 
        n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
@@ -293,13 +318,13 @@ static void arc_chk_core_config(void)
        struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
        int fpu_enabled;
 
-       if (!cpu->timers.t0)
+       if (!cpu->extn.timer0)
                panic("Timer0 is not present!\n");
 
-       if (!cpu->timers.t1)
+       if (!cpu->extn.timer1)
                panic("Timer1 is not present!\n");
 
-       if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->timers.rtc)
+       if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->extn.rtc)
                panic("RTC is not present\n");
 
 #ifdef CONFIG_ARC_HAS_DCCM
@@ -334,6 +359,7 @@ static void arc_chk_core_config(void)
                panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
 
        if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
+           IS_ENABLED(CONFIG_ARC_HAS_LLSC) &&
            !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
                panic("llock/scond livelock workaround missing\n");
 }
index ef6e9e15b82abf72d946ab45c3e20096854605c5..424e937da5c897d02cca7b1bd5781614f8d5d045 100644 (file)
@@ -336,11 +336,8 @@ irqreturn_t do_IPI(int irq, void *dev_id)
                int rc;
 
                rc = __do_IPI(msg);
-#ifdef CONFIG_ARC_IPI_DBG
-               /* IPI received but no valid @msg */
                if (rc)
                        pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
-#endif
                pending &= ~(1U << msg);
        } while (pending);
 
index dfad287f1db1c6b55b86faacc0b40d2472636795..156d9833ff84b5c77b7a7bd95d2be15c81d7cbf9 100644 (file)
@@ -62,7 +62,7 @@
 
 /********** Clock Source Device *********/
 
-#ifdef CONFIG_ARC_HAS_GRTC
+#ifdef CONFIG_ARC_HAS_GFRC
 
 static int arc_counter_setup(void)
 {
@@ -83,10 +83,10 @@ static cycle_t arc_counter_read(struct clocksource *cs)
 
        local_irq_save(flags);
 
-       __mcip_cmd(CMD_GRTC_READ_LO, 0);
+       __mcip_cmd(CMD_GFRC_READ_LO, 0);
        stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK);
 
-       __mcip_cmd(CMD_GRTC_READ_HI, 0);
+       __mcip_cmd(CMD_GFRC_READ_HI, 0);
        stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK);
 
        local_irq_restore(flags);
@@ -95,7 +95,7 @@ static cycle_t arc_counter_read(struct clocksource *cs)
 }
 
 static struct clocksource arc_counter = {
-       .name   = "ARConnect GRTC",
+       .name   = "ARConnect GFRC",
        .rating = 400,
        .read   = arc_counter_read,
        .mask   = CLOCKSOURCE_MASK(64),
index 5c0e5cc8ed1026373b0f34a97c95bbfc49e4f5c5..c6b6175d020329ac74eeefb0eebf1a6c353d6ea8 100644 (file)
@@ -153,10 +153,9 @@ choice
                  mobile SoCs in the Kona family of chips (e.g. bcm28155,
                  bcm11351, etc...)
 
-       config DEBUG_BCM63XX
+       config DEBUG_BCM63XX_UART
                bool "Kernel low-level debugging on BCM63XX UART"
                depends on ARCH_BCM_63XX
-               select DEBUG_UART_BCM63XX
 
        config DEBUG_BERLIN_UART
                bool "Marvell Berlin SoC Debug UART"
@@ -1414,7 +1413,7 @@ config DEBUG_LL_INCLUDE
        default "debug/vf.S" if DEBUG_VF_UART
        default "debug/vt8500.S" if DEBUG_VT8500_UART0
        default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1
-       default "debug/bcm63xx.S" if DEBUG_UART_BCM63XX
+       default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART
        default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0
        default "mach/debug-macro.S"
 
@@ -1428,10 +1427,6 @@ config DEBUG_UART_8250
                ARCH_IOP13XX || ARCH_IOP32X || ARCH_IOP33X || ARCH_IXP4XX || \
                ARCH_RPC
 
-# Compatibility options for BCM63xx
-config DEBUG_UART_BCM63XX
-       def_bool ARCH_BCM_63XX
-
 config DEBUG_UART_PHYS
        hex "Physical base address of debug UART"
        default 0x00100a00 if DEBUG_NETX_UART
@@ -1529,7 +1524,7 @@ config DEBUG_UART_PHYS
        default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
        default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
        default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
-       default 0xfffe8600 if DEBUG_UART_BCM63XX
+       default 0xfffe8600 if DEBUG_BCM63XX_UART
        default 0xfffff700 if ARCH_IOP33X
        depends on ARCH_EP93XX || \
                DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
@@ -1542,7 +1537,7 @@ config DEBUG_UART_PHYS
                DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \
                DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \
                DEBUG_S3C64XX_UART || \
-               DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+               DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
                DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 || \
                DEBUG_AT91_UART
 
@@ -1588,7 +1583,7 @@ config DEBUG_UART_VIRT
        default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT
        default 0xfc40ab00 if DEBUG_BRCMSTB_UART
        default 0xfc705000 if DEBUG_ZTE_ZX
-       default 0xfcfe8600 if DEBUG_UART_BCM63XX
+       default 0xfcfe8600 if DEBUG_BCM63XX_UART
        default 0xfd000000 if DEBUG_SPEAR3XX || DEBUG_SPEAR13XX
        default 0xfd012000 if DEBUG_MVEBU_UART0_ALTERNATE && ARCH_MV78XX0
        default 0xfd883000 if DEBUG_ALPINE_UART0
@@ -1638,7 +1633,7 @@ config DEBUG_UART_VIRT
                DEBUG_NETX_UART || \
                DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
                DEBUG_S3C64XX_UART || \
-               DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+               DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
                DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0
 
 config DEBUG_UART_8250_SHIFT
index 4c23a68a0917049e4c7e9e864dcebca38d6b75b2..7a6a58ef8aaf815728fd59b0ca7af752495595b0 100644 (file)
@@ -106,6 +106,15 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp_flags := $(call cc-option, -fno-stack-protector)
+CFLAGS_atags_to_fdt.o := $(nossp_flags)
+CFLAGS_fdt.o := $(nossp_flags)
+CFLAGS_fdt_ro.o := $(nossp_flags)
+CFLAGS_fdt_rw.o := $(nossp_flags)
+CFLAGS_fdt_wip.o := $(nossp_flags)
+
 ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
index f3db13d2d90e7d457954c2cd93f3b9df45cb57d2..0cc150b87b86271a7b177afbbc5da952f70aa636 100644 (file)
        };
 };
 
+
+/include/ "tps65217.dtsi"
+
 &tps {
-       compatible = "ti,tps65217";
        /*
         * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
         * mode") at poweroff.  Most BeagleBone versions do not support RTC-only
        ti,pmic-shutdown-controller;
 
        regulators {
-               #address-cells = <1>;
-               #size-cells = <0>;
-
                dcdc1_reg: regulator@0 {
-                       reg = <0>;
                        regulator-name = "vdds_dpr";
                        regulator-always-on;
                };
 
                dcdc2_reg: regulator@1 {
-                       reg = <1>;
                        /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
                        regulator-name = "vdd_mpu";
                        regulator-min-microvolt = <925000>;
                };
 
                dcdc3_reg: regulator@2 {
-                       reg = <2>;
                        /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
                        regulator-name = "vdd_core";
                        regulator-min-microvolt = <925000>;
                };
 
                ldo1_reg: regulator@3 {
-                       reg = <3>;
                        regulator-name = "vio,vrtc,vdds";
                        regulator-always-on;
                };
 
                ldo2_reg: regulator@4 {
-                       reg = <4>;
                        regulator-name = "vdd_3v3aux";
                        regulator-always-on;
                };
 
                ldo3_reg: regulator@5 {
-                       reg = <5>;
                        regulator-name = "vdd_1v8";
                        regulator-always-on;
                };
 
                ldo4_reg: regulator@6 {
-                       reg = <6>;
                        regulator-name = "vdd_3v3a";
                        regulator-always-on;
                };
index fda457b07e15c282887ede12eee576d56c6f3855..857d9894103a9edf6a7754585b5dddd6276d74e6 100644 (file)
 
 };
 
-&tps {
-       compatible = "ti,tps65217";
+/include/ "tps65217.dtsi"
 
+&tps {
        regulators {
-               #address-cells = <1>;
-               #size-cells = <0>;
-
                dcdc1_reg: regulator@0 {
-                       reg = <0>;
                        regulator-name = "vdds_dpr";
                        regulator-always-on;
                };
 
                dcdc2_reg: regulator@1 {
-                       reg = <1>;
                        /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
                        regulator-name = "vdd_mpu";
                        regulator-min-microvolt = <925000>;
                };
 
                dcdc3_reg: regulator@2 {
-                       reg = <2>;
                        /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
                        regulator-name = "vdd_core";
                        regulator-min-microvolt = <925000>;
                };
 
                ldo1_reg: regulator@3 {
-                       reg = <3>;
                        regulator-name = "vio,vrtc,vdds";
                        regulator-boot-on;
                        regulator-always-on;
                };
 
                ldo2_reg: regulator@4 {
-                       reg = <4>;
                        regulator-name = "vdd_3v3aux";
                        regulator-boot-on;
                        regulator-always-on;
                };
 
                ldo3_reg: regulator@5 {
-                       reg = <5>;
                        regulator-name = "vdd_1v8";
                        regulator-boot-on;
                        regulator-always-on;
                };
 
                ldo4_reg: regulator@6 {
-                       reg = <6>;
                        regulator-name = "vdd_3v3d";
                        regulator-boot-on;
                        regulator-always-on;
index 77559a1ded60fb4530d90aef42323ca187ed6ac9..f313999c503e1df2add311257af95594d51e464e 100644 (file)
        wp-gpios = <&gpio3 18 0>;
 };
 
-&tps {
-       compatible = "ti,tps65217";
+#include "tps65217.dtsi"
 
+&tps {
        regulators {
-               #address-cells = <1>;
-               #size-cells = <0>;
-
                dcdc1_reg: regulator@0 {
-                       reg = <0>;
                        /* +1.5V voltage with Â±4% tolerance */
                        regulator-min-microvolt = <1450000>;
                        regulator-max-microvolt = <1550000>;
                };
 
                dcdc2_reg: regulator@1 {
-                       reg = <1>;
                        /* VDD_MPU voltage limits 0.95V - 1.1V with Â±4% tolerance */
                        regulator-name = "vdd_mpu";
                        regulator-min-microvolt = <915000>;
                };
 
                dcdc3_reg: regulator@2 {
-                       reg = <2>;
                        /* VDD_CORE voltage limits 0.95V - 1.1V with Â±4% tolerance */
                        regulator-name = "vdd_core";
                        regulator-min-microvolt = <915000>;
                };
 
                ldo1_reg: regulator@3 {
-                       reg = <3>;
                        /* +1.8V voltage with Â±4% tolerance */
                        regulator-min-microvolt = <1750000>;
                        regulator-max-microvolt = <1870000>;
                };
 
                ldo2_reg: regulator@4 {
-                       reg = <4>;
                        /* +3.3V voltage with Â±4% tolerance */
                        regulator-min-microvolt = <3175000>;
                        regulator-max-microvolt = <3430000>;
                };
 
                ldo3_reg: regulator@5 {
-                       reg = <5>;
                        /* +1.8V voltage with Â±4% tolerance */
                        regulator-min-microvolt = <1750000>;
                        regulator-max-microvolt = <1870000>;
                };
 
                ldo4_reg: regulator@6 {
-                       reg = <6>;
                        /* +3.3V voltage with Â±4% tolerance */
                        regulator-min-microvolt = <3175000>;
                        regulator-max-microvolt = <3430000>;
index 471a3a70ea1f173ce15190f77443e6860e15c328..8867aaaec54d565de7f147af80e2e1cb6c4b2967 100644 (file)
        vin-supply = <&vbat>;
 };
 
-&tps {
-       compatible = "ti,tps65217";
+/include/ "tps65217.dtsi"
 
+&tps {
        backlight {
                isel = <1>; /* ISET1 */
                fdim = <200>; /* TPS65217_BL_FDIM_200HZ */
        };
 
        regulators {
-               #address-cells = <1>;
-               #size-cells = <0>;
-
                dcdc1_reg: regulator@0 {
-                       reg = <0>;
                        /* VDD_1V8 system supply */
                        regulator-always-on;
                };
 
                dcdc2_reg: regulator@1 {
-                       reg = <1>;
                        /* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */
                        regulator-name = "vdd_core";
                        regulator-min-microvolt = <925000>;
                };
 
                dcdc3_reg: regulator@2 {
-                       reg = <2>;
                        /* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */
                        regulator-name = "vdd_mpu";
                        regulator-min-microvolt = <925000>;
                };
 
                ldo1_reg: regulator@3 {
-                       reg = <3>;
                        /* VRTC 1.8V always-on supply */
                        regulator-name = "vrtc,vdds";
                        regulator-always-on;
                };
 
                ldo2_reg: regulator@4 {
-                       reg = <4>;
                        /* 3.3V rail */
                        regulator-name = "vdd_3v3aux";
                        regulator-always-on;
                };
 
                ldo3_reg: regulator@5 {
-                       reg = <5>;
                        /* VDD_3V3A 3.3V rail */
                        regulator-name = "vdd_3v3a";
                        regulator-min-microvolt = <3300000>;
                };
 
                ldo4_reg: regulator@6 {
-                       reg = <6>;
                        /* VDD_3V3B 3.3V rail */
                        regulator-name = "vdd_3v3b";
                        regulator-always-on;
index 1b5b044fcd910bc316863370ba4cfe6cab976530..865de8500f1c85cb8505dcf5d35c07c4a660aed9 100644 (file)
@@ -46,7 +46,7 @@
                        gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>;
                        linux,code = <KEY_BACK>;
                        debounce-interval = <1000>;
-                       gpio-key,wakeup;
+                       wakeup-source;
                };
 
                front_button {
@@ -54,7 +54,7 @@
                        gpios = <&gpio1 25 GPIO_ACTIVE_HIGH>;
                        linux,code = <KEY_FRONT>;
                        debounce-interval = <1000>;
-                       gpio-key,wakeup;
+                       wakeup-source;
                };
        };
 
index d38edfa53bb9a95df14936af36dc600c55e8e2bc..3303c281697b017901f103aa96fee5af0a25ef8a 100644 (file)
        pinctrl-0 = <&uart4_pins>;
 };
 
+#include "tps65217.dtsi"
+
 &tps {
-       compatible = "ti,tps65217";
        ti,pmic-shutdown-controller;
 
        interrupt-parent = <&intc>;
        interrupts = <7>;       /* NNMI */
 
        regulators {
-               #address-cells = <1>;
-               #size-cells = <0>;
-
                dcdc1_reg: regulator@0 {
-                       reg = <0>;
                        /* VDDS_DDR */
                        regulator-min-microvolt = <1500000>;
                        regulator-max-microvolt = <1500000>;
                };
 
                dcdc2_reg: regulator@1 {
-                       reg = <1>;
                        /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
                        regulator-name = "vdd_mpu";
                        regulator-min-microvolt = <925000>;
                };
 
                dcdc3_reg: regulator@2 {
-                       reg = <2>;
                        /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
                        regulator-name = "vdd_core";
                        regulator-min-microvolt = <925000>;
                };
 
                ldo1_reg: regulator@3 {
-                       reg = <3>;
                        /* VRTC / VIO / VDDS*/
                        regulator-always-on;
                        regulator-min-microvolt = <1800000>;
                };
 
                ldo2_reg: regulator@4 {
-                       reg = <4>;
                        /* VDD_3V3AUX */
                        regulator-always-on;
                        regulator-min-microvolt = <3300000>;
                };
 
                ldo3_reg: regulator@5 {
-                       reg = <5>;
                        /* VDD_1V8 */
                        regulator-min-microvolt = <1800000>;
                        regulator-max-microvolt = <1800000>;
                };
 
                ldo4_reg: regulator@6 {
-                       reg = <6>;
                        /* VDD_3V3A */
                        regulator-min-microvolt = <3300000>;
                        regulator-max-microvolt = <3300000>;
index 04885f9f959e21fcffb388940d64be0b6eb32869..1fafaad516ba0481de34cc12320fb9e51c2884ce 100644 (file)
                        ti,mbox-num-users = <4>;
                        ti,mbox-num-fifos = <8>;
                        mbox_wkupm3: wkup_m3 {
+                               ti,mbox-send-noirq;
                                ti,mbox-tx = <0 0 0>;
                                ti,mbox-rx = <0 0 3>;
                        };
index df955ba4dc6203273ff795cc1492e698764eda0d..92068fbf8b577440409c8e029d7cfba28da43cf8 100644 (file)
@@ -73,7 +73,7 @@
        global_timer: timer@48240200 {
                compatible = "arm,cortex-a9-global-timer";
                reg = <0x48240200 0x100>;
-               interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                interrupt-parent = <&gic>;
                clocks = <&mpu_periphclk>;
        };
@@ -81,7 +81,7 @@
        local_timer: timer@48240600 {
                compatible = "arm,cortex-a9-twd-timer";
                reg = <0x48240600 0x100>;
-               interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
                interrupt-parent = <&gic>;
                clocks = <&mpu_periphclk>;
        };
                        ti,mbox-num-users = <4>;
                        ti,mbox-num-fifos = <8>;
                        mbox_wkupm3: wkup_m3 {
+                               ti,mbox-send-noirq;
                                ti,mbox-tx = <0 0 0>;
                                ti,mbox-rx = <0 0 3>;
                        };
index 64d43325bcbc73aa08bf1da025c255883737f233..ecd09ab6d581bf95aa0b8bee8d596c3da8b9ee22 100644 (file)
                pinctrl-names = "default";
                pinctrl-0 = <&pixcir_ts_pins>;
                reg = <0x5c>;
-               interrupt-parent = <&gpio3>;
-               interrupts = <22 0>;
 
                attb-gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
 
                 * 0x264 represents the offset of padconf register of
                 * gpio3_22 from am43xx_pinmux base.
                 */
-               interrupts-extended = <&gpio3 22 IRQ_TYPE_NONE>,
+               interrupts-extended = <&gpio3 22 IRQ_TYPE_EDGE_FALLING>,
                                      <&am43xx_pinmux 0x264>;
                interrupt-names = "tsc", "wakeup";
 
index 746fd2b179587fe4522724f5b42b16202e97edef..d580e2b70f9a65f7dc078799add6d56628ae0169 100644 (file)
                pinctrl-0 = <&pixcir_ts_pins>;
                reg = <0x5c>;
                interrupt-parent = <&gpio1>;
-               interrupts = <17 0>;
+               interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
 
                attb-gpio = <&gpio1 17 GPIO_ACTIVE_HIGH>;
 
index 36c0fa6c362ae32000701ffec7e81df73f2a187a..a0986c65be0cfa71856947c58f57abe8df61c7fd 100644 (file)
 
                sound0_master: simple-audio-card,codec {
                        sound-dai = <&tlv320aic3104>;
+                       assigned-clocks = <&clkoutmux2_clk_mux>;
+                       assigned-clock-parents = <&sys_clk2_dclk_div>;
                        clocks = <&clkout2_clk>;
                };
        };
        pinctrl-names = "default", "sleep";
        pinctrl-0 = <&mcasp3_pins_default>;
        pinctrl-1 = <&mcasp3_pins_sleep>;
+       assigned-clocks = <&mcasp3_ahclkx_mux>;
+       assigned-clock-parents = <&sys_clkin2>;
        status = "okay";
 
        op-mode = <0>;  /* MCASP_IIS_MODE */
index c53882643ae96b6da8b15c6a26e5fd93ba3300cb..1c06cb76da07c14132e60cd29f5406dab9e9db66 100644 (file)
                        DRA7XX_CORE_IOPAD(0x35b8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d20.rgmii1_rd3 */
                        DRA7XX_CORE_IOPAD(0x35bc, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d21.rgmii1_rd2 */
                        DRA7XX_CORE_IOPAD(0x35c0, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d22.rgmii1_rd1 */
-                       DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLUP | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */
+                       DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */
                >;
        };
 
        pinctrl-names = "default";
        pinctrl-0 = <&qspi1_pins>;
 
-       spi-max-frequency = <20000000>;
+       spi-max-frequency = <48000000>;
 
        spi_flash: spi_flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
                compatible = "spansion,m25p80", "jedec,spi-nor";
                reg = <0>;                              /* CS0 */
-               spi-max-frequency = <20000000>;
+               spi-max-frequency = <48000000>;
 
                partition@0 {
                        label = "uboot";
                ti,debounce-tol = /bits/ 16 <10>;
                ti,debounce-rep = /bits/ 16 <1>;
 
-               linux,wakeup;
+               wakeup-source;
        };
 };
 
 
 &cpsw_emac0 {
        phy_id = <&davinci_mdio>, <0>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-txid";
        dual_emac_res_vlan = <0>;
 };
 
 &cpsw_emac1 {
        phy_id = <&davinci_mdio>, <1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-txid";
        dual_emac_res_vlan = <1>;
 };
 
 };
 
 &usb2 {
-       dr_mode = "peripheral";
+       dr_mode = "host";
 };
 
 &mcasp3 {
index 77bb8e17401a26cad24dcfd2ab5a7c9ddbd4d312..988e99632d49953d012b72e674f8fa040883a1c4 100644 (file)
@@ -25,8 +25,8 @@
 &dra7_pmx_core {
        uart3_pins_default: uart3_pins_default {
                pinctrl-single,pins = <
-                       DRA7XX_CORE_IOPAD(0x37f8, PIN_INPUT_SLEW | MUX_MODE2)   /* uart2_ctsn.uart3_rxd */
-                       DRA7XX_CORE_IOPAD(0x37fc, PIN_INPUT_SLEW | MUX_MODE1)   /* uart2_rtsn.uart3_txd */
+                       DRA7XX_CORE_IOPAD(0x3648, PIN_INPUT_SLEW | MUX_MODE0)   /* uart3_rxd */
+                       DRA7XX_CORE_IOPAD(0x364c, PIN_INPUT_SLEW | MUX_MODE0)   /* uart3_txd */
                >;
        };
 
        pinctrl-0 = <&i2c5_pins_default>;
        clock-frequency = <400000>;
 
-       eeprom_base: atmel@50 {
+       eeprom_base: atmel@54 {
                compatible = "atmel,24c08";
-               reg = <0x50>;
+               reg = <0x54>;
                pagesize = <16>;
        };
 
index 13cf69a8d0fb392b264910f014cf7ff8434f9e2b..fb9e1bbf23385b85b0b82ddb153b922b2d2e0178 100644 (file)
                                nand-on-flash-bbt;
 
                                partitions {
+                                       compatible = "fixed-partitions";
                                        #address-cells = <1>;
                                        #size-cells = <1>;
 
index 77ddff036409f7cb84d28342f8b973ce9792d09e..e683856c507c8bedacb5f8746a43cf9e4aef2207 100644 (file)
 
                        macb0: ethernet@f8008000 {
                                pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_macb0_default>;
+                               pinctrl-0 = <&pinctrl_macb0_default &pinctrl_macb0_phy_irq>;
                                phy-mode = "rmii";
                                status = "okay";
+
+                               ethernet-phy@1 {
+                                       reg = <0x1>;
+                                       interrupt-parent = <&pioA>;
+                                       interrupts = <73 IRQ_TYPE_LEVEL_LOW>;
+                               };
                        };
 
                        pdmic@f8018000 {
                                        bias-disable;
                                };
 
+                               pinctrl_macb0_phy_irq: macb0_phy_irq {
+                                       pinmux = <PIN_PC9__GPIO>;
+                               };
+
                                pinctrl_pdmic_default: pdmic_default {
                                        pinmux = <PIN_PB26__PDMIC_DAT>,
                                                <PIN_PB27__PDMIC_CLK>;
index 131614f28e758653e34cc2b993bb6a5a28f20bbb..569026e8f96cadaf25eeb10ca207c02c7f175121 100644 (file)
                        macb0: ethernet@f8020000 {
                                phy-mode = "rmii";
                                status = "okay";
+                               pinctrl-names = "default";
+                               pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
 
                                phy0: ethernet-phy@1 {
                                        interrupt-parent = <&pioE>;
-                                       interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
+                                       interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
                                        reg = <1>;
                                };
                        };
                                                atmel,pins =
                                                        <AT91_PIOE 8 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
                                        };
+                                       pinctrl_macb0_phy_irq: macb0_phy_irq_0 {
+                                               atmel,pins =
+                                                       <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+                                       };
                                };
                        };
                };
index 2d4a33100af6bdc4fcd4a3500467456673f54325..4e98cda974032221dbf5a0917de97807653cf943 100644 (file)
                        };
 
                        macb0: ethernet@f8020000 {
+                               pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
                                phy-mode = "rmii";
                                status = "okay";
+
+                               ethernet-phy@1 {
+                                       reg = <0x1>;
+                                       interrupt-parent = <&pioE>;
+                                       interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+                               };
                        };
 
                        mmc1: mmc@fc000000 {
 
                        pinctrl@fc06a000 {
                                board {
+                                       pinctrl_macb0_phy_irq: macb0_phy_irq {
+                                               atmel,pins =
+                                                       <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+                                       };
                                        pinctrl_mmc0_cd: mmc0_cd {
                                                atmel,pins =
                                                        <AT91_PIOE 5 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
index ca4ddf86817ab64dc6a8a31193191460bfaf8d4b..626c67d666269d5e878c650b76e8babc424bd48a 100644 (file)
        };
 
        panel: panel {
-               compatible = "qd,qd43003c0-40", "simple-panel";
+               compatible = "qiaodian,qd43003c0-40", "simple-panel";
                backlight = <&backlight>;
                power-supply = <&panel_reg>;
                #address-cells = <1>;
index 4f6ae921656f16dfd3814d7a70972be798e0fce9..f74d3db4846dacacd54b37e9cb1d35d707689368 100644 (file)
                                #size-cells = <1>;
                                reg = <0x2100000 0x10000>;
                                ranges = <0 0x2100000 0x10000>;
-                               interrupt-parent = <&intc>;
                                clocks = <&clks IMX6QDL_CLK_CAAM_MEM>,
                                         <&clks IMX6QDL_CLK_CAAM_ACLK>,
                                         <&clks IMX6QDL_CLK_CAAM_IPG>,
index bf4143c6cb8f10991d4abbaae126966f9f9c753c..b84af3da8c84d596b2b950489541c4efd9216783 100644 (file)
@@ -14,7 +14,7 @@
 #include "kirkwood-synology.dtsi"
 
 / {
-       model = "Synology DS111";
+       model = "Synology DS112";
        compatible = "synology,ds111", "marvell,kirkwood";
 
        memory {
index 09eed3cea0afcd0ab74df3a13de8525714f388cc..36eec7392ab491c839f86ca3299cc716af21e356 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WVL/VL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
                button@1 {
                        label = "Function Button";
                        linux,code = <KEY_OPTION>;
-                       gpios = <&gpio0 45 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
                };
 
                button@2 {
                        label = "Power-on Switch";
                        linux,code = <KEY_RESERVED>;
                        linux,input-type = <5>;
-                       gpios = <&gpio0 46 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
                };
 
                button@3 {
                        label = "Power-auto Switch";
                        linux,code = <KEY_ESC>;
                        linux,input-type = <5>;
-                       gpios = <&gpio0 47 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
                };
        };
 
 
                led@1 {
                        label = "lswvl:red:alarm";
-                       gpios = <&gpio0 36 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
                };
 
                led@2 {
                        label = "lswvl:red:func";
-                       gpios = <&gpio0 37 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>;
                };
 
                led@3 {
                        label = "lswvl:amber:info";
-                       gpios = <&gpio0 38 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
                };
 
                led@4 {
                        label = "lswvl:blue:func";
-                       gpios = <&gpio0 39 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
                };
 
                led@5 {
                        label = "lswvl:blue:power";
-                       gpios = <&gpio0 40 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
                        default-state = "keep";
                };
 
                led@6 {
                        label = "lswvl:red:hdderr0";
-                       gpios = <&gpio0 34 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
                };
 
                led@7 {
                        label = "lswvl:red:hdderr1";
-                       gpios = <&gpio0 35 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
                };
        };
 
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio0 43 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio1 11 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
index f5db16a08597e9a732f575c13b00415ce8e40920..b13ec20a708873bb65619ccd203c3552b04bc246 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WXL/WSXL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
                button@1 {
                        label = "Function Button";
                        linux,code = <KEY_OPTION>;
-                       gpios = <&gpio1 41 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
                };
 
                button@2 {
                        label = "Power-on Switch";
                        linux,code = <KEY_RESERVED>;
                        linux,input-type = <5>;
-                       gpios = <&gpio1 42 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 10 GPIO_ACTIVE_LOW>;
                };
 
                button@3 {
                        label = "Power-auto Switch";
                        linux,code = <KEY_ESC>;
                        linux,input-type = <5>;
-                       gpios = <&gpio1 43 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 11 GPIO_ACTIVE_LOW>;
                };
        };
 
 
                led@1 {
                        label = "lswxl:blue:func";
-                       gpios = <&gpio1 36 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
                };
 
                led@2 {
                        label = "lswxl:red:alarm";
-                       gpios = <&gpio1 49 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
                };
 
                led@3 {
 
                led@4 {
                        label = "lswxl:blue:power";
-                       gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
+                       default-state = "keep";
                };
 
                led@5 {
                        label = "lswxl:red:func";
-                       gpios = <&gpio1 5 GPIO_ACTIVE_LOW>;
-                       default-state = "keep";
+                       gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
                };
 
                led@6 {
                        label = "lswxl:red:hdderr0";
-                       gpios = <&gpio1 2 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio0 8 GPIO_ACTIVE_HIGH>;
                };
 
                led@7 {
                        label = "lswxl:red:hdderr1";
-                       gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>;
                };
        };
 
                pinctrl-0 = <&pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
                pinctrl-names = "default";
 
-               gpios = <&gpio0 47 GPIO_ACTIVE_LOW
-                        &gpio0 48 GPIO_ACTIVE_LOW>;
+               gpios = <&gpio1 16 GPIO_ACTIVE_LOW
+                        &gpio1 15 GPIO_ACTIVE_LOW>;
 
                gpio-fan,speed-map = <0 3
                                1500 2
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio1 49 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
                        enable-active-high;
                        regulator-always-on;
                        regulator-boot-on;
-                       gpio = <&gpio0 37 GPIO_ACTIVE_HIGH>;
+                       gpio = <&gpio1 5 GPIO_ACTIVE_HIGH>;
                };
                hdd_power0: regulator@2 {
                        compatible = "regulator-fixed";
index 1db6f2c506cce3209e64418493039046c79fd0ec..8082d64266a37c33c8fa2e7e722d3683519c3d0a 100644 (file)
        chip-delay = <40>;
        status = "okay";
        partitions {
+               compatible = "fixed-partitions";
                #address-cells = <1>;
                #size-cells = <1>;
 
index 7fed0bd4f3deea46281881ead81b2ebd29c5fa4b..00805322367e7eb73bcc4ba5bda124eee5372467 100644 (file)
        clock-frequency = <400000>;
 };
 
-&i2c2 {
-       clock-frequency = <400000>;
-};
-
-&i2c3 {
-       clock-frequency = <400000>;
-};
-
 /*
  * Only found on the wireless SOM. For the SOM without wireless, the pins for
  * MMC3 can be routed with jumpers to the second MMC slot on the devkit and
                interrupt-parent = <&gpio5>;
                interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
                ref-clock-frequency = <26000000>;
+               tcxo-clock-frequency = <26000000>;
        };
 };
 
index 888412c63f97492ef445368c3208434d748912f2..902657d6713b073df82d33158d3d1cd0fac5ef16 100644 (file)
        };
 };
 
+&gpio8 {
+       /* TI trees use GPIO instead of msecure, see also muxing */
+       p234 {
+               gpio-hog;
+               gpios = <10 GPIO_ACTIVE_HIGH>;
+               output-high;
+               line-name = "gpio8_234/msecure";
+       };
+};
+
 &omap5_pmx_core {
        pinctrl-names = "default";
        pinctrl-0 = <
                >;
        };
 
+       /* TI trees use GPIO mode; msecure mode does not work reliably? */
+       palmas_msecure_pins: palmas_msecure_pins {
+               pinctrl-single,pins = <
+                       OMAP5_IOPAD(0x180, PIN_OUTPUT | MUX_MODE6) /* gpio8_234 */
+               >;
+       };
+
        usbhost_pins: pinmux_usbhost_pins {
                pinctrl-single,pins = <
                        OMAP5_IOPAD(0x0c4, PIN_INPUT | MUX_MODE0) /* usbb2_hsic_strobe */
                        &usbhost_wkup_pins
        >;
 
+       palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins {
+               pinctrl-single,pins = <
+                       OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) /* sys_nirq1 */
+               >;
+       };
+
        usbhost_wkup_pins: pinmux_usbhost_wkup_pins {
                pinctrl-single,pins = <
                        OMAP5_IOPAD(0x05a, PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */
                interrupt-controller;
                #interrupt-cells = <2>;
                ti,system-power-controller;
+               pinctrl-names = "default";
+               pinctrl-0 = <&palmas_sys_nirq_pins &palmas_msecure_pins>;
 
                extcon_usb3: palmas_usb {
                        compatible = "ti,palmas-usb-vid";
                        #clock-cells = <0>;
                };
 
+               rtc {
+                       compatible = "ti,palmas-rtc";
+                       interrupt-parent = <&palmas>;
+                       interrupts = <8 IRQ_TYPE_NONE>;
+                       ti,backup-battery-chargeable;
+                       ti,backup-battery-charge-high-current;
+               };
+
                palmas_pmic {
                        compatible = "ti,palmas-pmic";
                        interrupt-parent = <&palmas>;
index 3daec912b4bf118edba55ef54fdf633413e696e5..aae8a7aceab75d53f84a2eb94fc0d6e020e773a9 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WTGL
  *
- * Copyright (C) 2015, Roger Shimizu <rogershimizu@gmail.com>
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -69,8 +70,6 @@
 
                internal-regs {
                        pinctrl: pinctrl@10000 {
-                               pinctrl-0 = <&pmx_usb_power &pmx_power_hdd
-                                       &pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
                                pinctrl-names = "default";
 
                                pmx_led_power: pmx-leds {
                led@1 {
                        label = "lswtgl:blue:power";
                        gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+                       default-state = "keep";
                };
 
                led@2 {
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio0 2 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
        };
 };
 
+&devbus_bootcs {
+       status = "okay";
+       devbus,keep-config;
+
+       flash@0 {
+               compatible = "jedec-flash";
+               reg = <0 0x40000>;
+               bank-width = <1>;
+
+               partitions {
+                       compatible = "fixed-partitions";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       header@0 {
+                               reg = <0 0x30000>;
+                               read-only;
+                       };
+
+                       uboot@30000 {
+                               reg = <0x30000 0xF000>;
+                               read-only;
+                       };
+
+                       uboot_env@3F000 {
+                               reg = <0x3F000 0x1000>;
+                       };
+               };
+       };
+};
+
 &mdio {
        status = "okay";
 
index 78a21f2828dff6d1cbafa9139b12791d4840936d..c548cabb102f5ae1eb063a43117adedecf52b0b0 100644 (file)
 };
 
 &extal1_clk {
-       clock-frequency = <25000000>;
+       clock-frequency = <24000000>;
 };
 &extal2_clk {
        clock-frequency = <48000000>;
index 1afe24629d1f85de2a9ec3f8ef62c6372d014abd..b0c912feaa2f0e016b65ff27e272b82c2b0795ed 100644 (file)
@@ -90,7 +90,7 @@
 #define PIN_PA14__I2SC1_MCK            PINMUX_PIN(PIN_PA14, 4, 2)
 #define PIN_PA14__FLEXCOM3_IO2         PINMUX_PIN(PIN_PA14, 5, 1)
 #define PIN_PA14__D9                   PINMUX_PIN(PIN_PA14, 6, 2)
-#define PIN_PA15                       14
+#define PIN_PA15                       15
 #define PIN_PA15__GPIO                 PINMUX_PIN(PIN_PA15, 0, 0)
 #define PIN_PA15__SPI0_MOSI            PINMUX_PIN(PIN_PA15, 1, 1)
 #define PIN_PA15__TF1                  PINMUX_PIN(PIN_PA15, 2, 1)
index b8032bca462152e6904d299f84514c483dde0165..db1151c18466c3ac530be0ba39ba2f00be3a06bc 100644 (file)
                        dbgu: serial@fc069000 {
                                compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
                                reg = <0xfc069000 0x200>;
-                               interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>;
+                               interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>;
                                pinctrl-names = "default";
                                pinctrl-0 = <&pinctrl_dbgu>;
                                clocks = <&dbgu_clk>;
index d0c74385331803383d5296d7157516aa63c63758..27a333eb89870167b82a170d3553bd5078fafdfb 100644 (file)
                        };
                        mmcsd_default_mode: mmcsd_default {
                                mmcsd_default_cfg1 {
-                                       /* MCCLK */
-                                       pins = "GPIO8_B10";
-                                       ste,output = <0>;
-                               };
-                               mmcsd_default_cfg2 {
-                                       /* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */
-                                       pins = "GPIO10_C11", "GPIO15_A12",
-                                       "GPIO16_C13", "GPIO23_D15";
-                                       ste,output = <1>;
-                               };
-                               mmcsd_default_cfg3 {
-                                       /* MCCMD, MCDAT3-0, MCMSFBCLK */
-                                       pins = "GPIO9_A10", "GPIO11_B11",
-                                       "GPIO12_A11", "GPIO13_C12",
-                                       "GPIO14_B12", "GPIO24_C15";
-                                       ste,input = <1>;
+                                       /*
+                                        * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2
+                                        * MCCMD, MCDAT3-0, MCMSFBCLK
+                                        */
+                                       pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11",
+                                              "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12",
+                                              "GPIO16_C13", "GPIO23_D15", "GPIO24_C15";
+                                       ste,output = <2>;
                                };
                        };
                };
                        clock-names = "mclk", "apb_pclk";
                        interrupt-parent = <&vica>;
                        interrupts = <22>;
-                       max-frequency = <48000000>;
+                       max-frequency = <400000>;
                        bus-width = <4>;
                        cap-mmc-highspeed;
                        cap-sd-highspeed;
+                       full-pwr-cycle;
+                       /*
+                        * The STw4811 circuit used with the Nomadik strictly
+                        * requires that all of these signal direction pins be
+                        * routed and used for its 4-bit levelshifter.
+                        */
+                       st,sig-dir-dat0;
+                       st,sig-dir-dat2;
+                       st,sig-dir-dat31;
+                       st,sig-dir-cmd;
+                       st,sig-pin-fbclk;
                        pinctrl-names = "default";
                        pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>;
                        vmmc-supply = <&vmmc_regulator>;
diff --git a/arch/arm/boot/dts/tps65217.dtsi b/arch/arm/boot/dts/tps65217.dtsi
new file mode 100644 (file)
index 0000000..a632724
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Integrated Power Management Chip
+ * http://www.ti.com/lit/ds/symlink/tps65217.pdf
+ */
+
+&tps {
+       compatible = "ti,tps65217";
+
+       regulators {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               dcdc1_reg: regulator@0 {
+                       reg = <0>;
+                       regulator-compatible = "dcdc1";
+               };
+
+               dcdc2_reg: regulator@1 {
+                       reg = <1>;
+                       regulator-compatible = "dcdc2";
+               };
+
+               dcdc3_reg: regulator@2 {
+                       reg = <2>;
+                       regulator-compatible = "dcdc3";
+               };
+
+               ldo1_reg: regulator@3 {
+                       reg = <3>;
+                       regulator-compatible = "ldo1";
+               };
+
+               ldo2_reg: regulator@4 {
+                       reg = <4>;
+                       regulator-compatible = "ldo2";
+               };
+
+               ldo3_reg: regulator@5 {
+                       reg = <5>;
+                       regulator-compatible = "ldo3";
+               };
+
+               ldo4_reg: regulator@6 {
+                       reg = <6>;
+                       regulator-compatible = "ldo4";
+               };
+       };
+};
index 2dc6da70ae598af4a43f3b8a9bde05d26d386c77..d7ed252708c57fa86cff5d51887bd756b53b48e7 100644 (file)
@@ -16,7 +16,7 @@
  */
 #include <linux/module.h>
 #include <linux/kernel.h>
-
+#include <asm/div64.h>
 #include <asm/hardware/icst.h>
 
 /*
@@ -29,7 +29,11 @@ EXPORT_SYMBOL(icst525_s2div);
 
 unsigned long icst_hz(const struct icst_params *p, struct icst_vco vco)
 {
-       return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * p->s2div[vco.s]);
+       u64 dividend = p->ref * 2 * (u64)(vco.v + 8);
+       u32 divisor = (vco.r + 2) * p->s2div[vco.s];
+
+       do_div(dividend, divisor);
+       return (unsigned long)dividend;
 }
 
 EXPORT_SYMBOL(icst_hz);
@@ -58,6 +62,7 @@ icst_hz_to_vco(const struct icst_params *p, unsigned long freq)
 
                if (f > p->vco_min && f <= p->vco_max)
                        break;
+               i++;
        } while (i < 8);
 
        if (i >= 8)
index 314f6be2dca2b4aec6aaa82cc8cfb2ba06c2bb29..8e8b2ace9b7c5fb624f4e24122cf269538195f3d 100644 (file)
@@ -426,6 +426,7 @@ CONFIG_SUNXI_WATCHDOG=y
 CONFIG_IMX2_WDT=y
 CONFIG_TEGRA_WATCHDOG=m
 CONFIG_MESON_WATCHDOG=y
+CONFIG_DW_WATCHDOG=y
 CONFIG_DIGICOLOR_WATCHDOG=y
 CONFIG_MFD_AS3711=y
 CONFIG_MFD_AS3722=y
index c5e1943e5427db037b98ce779566aeab1b1c9310..d18d6b42fcf52f9db076af65a0f6aad022a1173c 100644 (file)
@@ -50,6 +50,7 @@ CONFIG_SOC_AM33XX=y
 CONFIG_SOC_AM43XX=y
 CONFIG_SOC_DRA7XX=y
 CONFIG_ARM_THUMBEE=y
+CONFIG_ARM_KERNMEM_PERMS=y
 CONFIG_ARM_ERRATA_411920=y
 CONFIG_ARM_ERRATA_430973=y
 CONFIG_SMP=y
@@ -177,6 +178,7 @@ CONFIG_TI_CPTS=y
 CONFIG_AT803X_PHY=y
 CONFIG_SMSC_PHY=y
 CONFIG_USB_USBNET=m
+CONFIG_USB_NET_SMSC75XX=m
 CONFIG_USB_NET_SMSC95XX=m
 CONFIG_USB_ALI_M5632=y
 CONFIG_USB_AN2720=y
@@ -290,24 +292,23 @@ CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
-CONFIG_OMAP2_DSS=m
-CONFIG_OMAP5_DSS_HDMI=y
-CONFIG_OMAP2_DSS_SDI=y
-CONFIG_OMAP2_DSS_DSI=y
+CONFIG_FB_OMAP5_DSS_HDMI=y
+CONFIG_FB_OMAP2_DSS_SDI=y
+CONFIG_FB_OMAP2_DSS_DSI=y
 CONFIG_FB_OMAP2=m
-CONFIG_DISPLAY_ENCODER_TFP410=m
-CONFIG_DISPLAY_ENCODER_TPD12S015=m
-CONFIG_DISPLAY_CONNECTOR_DVI=m
-CONFIG_DISPLAY_CONNECTOR_HDMI=m
-CONFIG_DISPLAY_CONNECTOR_ANALOG_TV=m
-CONFIG_DISPLAY_PANEL_DPI=m
-CONFIG_DISPLAY_PANEL_DSI_CM=m
-CONFIG_DISPLAY_PANEL_SONY_ACX565AKM=m
-CONFIG_DISPLAY_PANEL_LGPHILIPS_LB035Q02=m
-CONFIG_DISPLAY_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DISPLAY_PANEL_TPO_TD028TTEC1=m
-CONFIG_DISPLAY_PANEL_TPO_TD043MTEA1=m
-CONFIG_DISPLAY_PANEL_NEC_NL8048HL11=m
+CONFIG_FB_OMAP2_ENCODER_TFP410=m
+CONFIG_FB_OMAP2_ENCODER_TPD12S015=m
+CONFIG_FB_OMAP2_CONNECTOR_DVI=m
+CONFIG_FB_OMAP2_CONNECTOR_HDMI=m
+CONFIG_FB_OMAP2_CONNECTOR_ANALOG_TV=m
+CONFIG_FB_OMAP2_PANEL_DPI=m
+CONFIG_FB_OMAP2_PANEL_DSI_CM=m
+CONFIG_FB_OMAP2_PANEL_SONY_ACX565AKM=m
+CONFIG_FB_OMAP2_PANEL_LGPHILIPS_LB035Q02=m
+CONFIG_FB_OMAP2_PANEL_SHARP_LS037V7DW01=m
+CONFIG_FB_OMAP2_PANEL_TPO_TD028TTEC1=m
+CONFIG_FB_OMAP2_PANEL_TPO_TD043MTEA1=m
+CONFIG_FB_OMAP2_PANEL_NEC_NL8048HL11=m
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 CONFIG_LCD_CLASS_DEVICE=y
 CONFIG_LCD_PLATFORM=y
@@ -354,6 +355,11 @@ CONFIG_USB_MUSB_DSPS=m
 CONFIG_USB_INVENTRA_DMA=y
 CONFIG_USB_TI_CPPI41_DMA=y
 CONFIG_USB_DWC3=m
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_SIMPLE=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_TEST=m
 CONFIG_AM335X_PHY_USB=y
 CONFIG_USB_GADGET=m
@@ -387,6 +393,7 @@ CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=m
 CONFIG_LEDS_GPIO=m
 CONFIG_LEDS_PWM=m
+CONFIG_LEDS_PCA963X=m
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_ONESHOT=m
@@ -449,6 +456,8 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_SPLIT=y
+CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
index b445a5d56f4342b71540e40c1ce298e0ad786688..89a3a3e592d6646783b9b08533a9704d929da70b 100644 (file)
@@ -364,7 +364,7 @@ static struct crypto_alg aes_algs[] = { {
        .cra_blkcipher = {
                .min_keysize    = AES_MIN_KEY_SIZE,
                .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
+               .ivsize         = 0,
                .setkey         = ce_aes_setkey,
                .encrypt        = ecb_encrypt,
                .decrypt        = ecb_decrypt,
@@ -441,7 +441,7 @@ static struct crypto_alg aes_algs[] = { {
        .cra_ablkcipher = {
                .min_keysize    = AES_MIN_KEY_SIZE,
                .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
+               .ivsize         = 0,
                .setkey         = ablk_set_key,
                .encrypt        = ablk_encrypt,
                .decrypt        = ablk_decrypt,
index 7da5503c0591411f43df6bc767fb8c4eef314789..e08d151840567dcb6085271200601666836b0771 100644 (file)
@@ -117,6 +117,7 @@ static inline u32 gic_read_iar(void)
        u32 irqstat;
 
        asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat));
+       dsb(sy);
        return irqstat;
 }
 
index 0375c8caa06194591e065f94bbb3d57a0565c847..9408a994cc91792bcf89a57d67982e21c677d9d7 100644 (file)
@@ -35,14 +35,21 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
             dma_addr_t dev_addr, unsigned long offset, size_t size,
             enum dma_data_direction dir, struct dma_attrs *attrs)
 {
-       bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page);
+       unsigned long page_pfn = page_to_xen_pfn(page);
+       unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
+       unsigned long compound_pages =
+               (1<<compound_order(page)) * XEN_PFN_PER_PAGE;
+       bool local = (page_pfn <= dev_pfn) &&
+               (dev_pfn - page_pfn < compound_pages);
+
        /*
-        * Dom0 is mapped 1:1, while the Linux page can be spanned accross
-        * multiple Xen page, it's not possible to have a mix of local and
-        * foreign Xen page. So if the first xen_pfn == mfn the page is local
-        * otherwise it's a foreign page grant-mapped in dom0. If the page is
-        * local we can safely call the native dma_ops function, otherwise we
-        * call the xen specific function.
+        * Dom0 is mapped 1:1, while the Linux page can span across
+        * multiple Xen pages, it's not possible for it to contain a
+        * mix of local and foreign Xen pages. So if the first xen_pfn
+        * == mfn the page is local otherwise it's a foreign page
+        * grant-mapped in dom0. If the page is local we can safely
+        * call the native dma_ops function, otherwise we call the xen
+        * specific function.
         */
        if (local)
                __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
index ede692ffa32ed14958f81eec0d7caac8f7efe853..5dd2528e9e45e369d0879185291e6981dbea15e4 100644 (file)
 #define __NR_userfaultfd               (__NR_SYSCALL_BASE+388)
 #define __NR_membarrier                        (__NR_SYSCALL_BASE+389)
 #define __NR_mlock2                    (__NR_SYSCALL_BASE+390)
+#define __NR_copy_file_range           (__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
index ac368bb068d1409af37a8e2585bb837b1ef1f02b..dfc7cd6851ad4bbd09b11ae94ee6056f691b1fcb 100644 (file)
                CALL(sys_userfaultfd)
                CALL(sys_membarrier)
                CALL(sys_mlock2)
+               CALL(sys_copy_file_range)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
index 7f33b2056ae6d92f568ad71fbf28cd52044b11fb..0f6600f05137921b61fe5c9a80b87302e7d03234 100644 (file)
@@ -206,7 +206,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
        run->mmio.is_write      = is_write;
        run->mmio.phys_addr     = fault_ipa;
        run->mmio.len           = len;
-       memcpy(run->mmio.data, data_buf, len);
+       if (is_write)
+               memcpy(run->mmio.data, data_buf, len);
 
        if (!ret) {
                /* We handled the access successfully in the kernel. */
index 809827265fb39d1b8a7178481bb91e87951c4a14..bab814d2f37dcc9a18229d5e912422a653307088 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <asm/setup.h>
 #include <asm/mach/arch.h>
+#include <asm/system_info.h>
 
 #include "common.h"
 
@@ -77,12 +78,31 @@ static const char *const n900_boards_compat[] __initconst = {
        NULL,
 };
 
+/* Set system_rev from atags */
+static void __init rx51_set_system_rev(const struct tag *tags)
+{
+       const struct tag *tag;
+
+       if (tags->hdr.tag != ATAG_CORE)
+               return;
+
+       for_each_tag(tag, tags) {
+               if (tag->hdr.tag == ATAG_REVISION) {
+                       system_rev = tag->u.revision.rev;
+                       break;
+               }
+       }
+}
+
 /* Legacy userspace on Nokia N900 needs ATAGS exported in /proc/atags,
  * save them while the data is still not overwritten
  */
 static void __init rx51_reserve(void)
 {
-       save_atags((const struct tag *)(PAGE_OFFSET + 0x100));
+       const struct tag *tags = (const struct tag *)(PAGE_OFFSET + 0x100);
+
+       save_atags(tags);
+       rx51_set_system_rev(tags);
        omap_reserve();
 }
 
index 9cda974a3009f3295e7bd013709f1bf586752282..d7f1d69daf6d4d083005e15d11d556c412aacfbc 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/pinctrl/machine.h>
-#include <linux/platform_data/mailbox-omap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
@@ -66,32 +65,6 @@ static int __init omap3_l3_init(void)
 }
 omap_postcore_initcall(omap3_l3_init);
 
-#if defined(CONFIG_OMAP2PLUS_MBOX) || defined(CONFIG_OMAP2PLUS_MBOX_MODULE)
-static inline void __init omap_init_mbox(void)
-{
-       struct omap_hwmod *oh;
-       struct platform_device *pdev;
-       struct omap_mbox_pdata *pdata;
-
-       oh = omap_hwmod_lookup("mailbox");
-       if (!oh) {
-               pr_err("%s: unable to find hwmod\n", __func__);
-               return;
-       }
-       if (!oh->dev_attr) {
-               pr_err("%s: hwmod doesn't have valid attrs\n", __func__);
-               return;
-       }
-
-       pdata = (struct omap_mbox_pdata *)oh->dev_attr;
-       pdev = omap_device_build("omap-mailbox", -1, oh, pdata, sizeof(*pdata));
-       WARN(IS_ERR(pdev), "%s: could not build device, err %ld\n",
-                                               __func__, PTR_ERR(pdev));
-}
-#else
-static inline void omap_init_mbox(void) { }
-#endif /* CONFIG_OMAP2PLUS_MBOX */
-
 static inline void omap_init_sti(void) {}
 
 #if defined(CONFIG_SPI_OMAP24XX) || defined(CONFIG_SPI_OMAP24XX_MODULE)
@@ -229,7 +202,6 @@ static int __init omap2_init_devices(void)
                 * please keep these calls, and their implementations above,
                 * in alphabetical order so they're easier to sort through.
                 */
-               omap_init_mbox();
                omap_init_mcspi();
                omap_init_sham();
                omap_init_aes();
index 7b76ce01c21dd3aa3b4cad36ce2431f3c8350900..8633c703546a65c2e5b0071ffca0d5a12b664884 100644 (file)
@@ -101,10 +101,8 @@ static void omap2_onenand_set_async_mode(void __iomem *onenand_base)
 
 static void set_onenand_cfg(void __iomem *onenand_base)
 {
-       u32 reg;
+       u32 reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT;
 
-       reg = readw(onenand_base + ONENAND_REG_SYS_CFG1);
-       reg &= ~((0x7 << ONENAND_SYS_CFG1_BRL_SHIFT) | (0x7 << 9));
        reg |=  (latency << ONENAND_SYS_CFG1_BRL_SHIFT) |
                ONENAND_SYS_CFG1_BL_16;
        if (onenand_flags & ONENAND_FLAG_SYNCREAD)
@@ -123,6 +121,7 @@ static void set_onenand_cfg(void __iomem *onenand_base)
                reg |= ONENAND_SYS_CFG1_VHF;
        else
                reg &= ~ONENAND_SYS_CFG1_VHF;
+
        writew(reg, onenand_base + ONENAND_REG_SYS_CFG1);
 }
 
@@ -289,6 +288,7 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
                }
        }
 
+       onenand_async.sync_write = true;
        omap2_onenand_calc_async_timings(&t);
 
        ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async);
index 0437537751bc0c593dbe3554e216727b845f3a78..f7ff3b9dad8784aeab4d3752061514a8498774ae 100644 (file)
@@ -191,12 +191,22 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct omap_device *od;
+       int err;
 
        switch (event) {
        case BUS_NOTIFY_DEL_DEVICE:
                if (pdev->archdata.od)
                        omap_device_delete(pdev->archdata.od);
                break;
+       case BUS_NOTIFY_UNBOUND_DRIVER:
+               od = to_omap_device(pdev);
+               if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED)) {
+                       dev_info(dev, "enabled after unload, idling\n");
+                       err = omap_device_idle(pdev);
+                       if (err)
+                               dev_err(dev, "failed to idle\n");
+               }
+               break;
        case BUS_NOTIFY_ADD_DEVICE:
                if (pdev->dev.of_node)
                        omap_device_build_from_dt(pdev);
@@ -602,8 +612,10 @@ static int _od_runtime_resume(struct device *dev)
        int ret;
 
        ret = omap_device_enable(pdev);
-       if (ret)
+       if (ret) {
+               dev_err(dev, "use pm_runtime_put_sync_suspend() in driver?\n");
                return ret;
+       }
 
        return pm_generic_runtime_resume(dev);
 }
index e781e4fae13a92fd68aa04ac891b9731bc8fa60e..a935d28443dab40e1a8005b7144aeb8de436c73c 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/platform_data/pinctrl-single.h>
 #include <linux/platform_data/iommu-omap.h>
 #include <linux/platform_data/wkup_m3.h>
+#include <linux/platform_data/pwm_omap_dmtimer.h>
+#include <plat/dmtimer.h>
 
 #include "common.h"
 #include "common-board-devices.h"
@@ -449,6 +451,24 @@ void omap_auxdata_legacy_init(struct device *dev)
        dev->platform_data = &twl_gpio_auxdata;
 }
 
+/* Dual mode timer PWM callbacks platdata */
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+struct pwm_omap_dmtimer_pdata pwm_dmtimer_pdata = {
+       .request_by_node = omap_dm_timer_request_by_node,
+       .free = omap_dm_timer_free,
+       .enable = omap_dm_timer_enable,
+       .disable = omap_dm_timer_disable,
+       .get_fclk = omap_dm_timer_get_fclk,
+       .start = omap_dm_timer_start,
+       .stop = omap_dm_timer_stop,
+       .set_load = omap_dm_timer_set_load,
+       .set_match = omap_dm_timer_set_match,
+       .set_pwm = omap_dm_timer_set_pwm,
+       .set_prescaler = omap_dm_timer_set_prescaler,
+       .write_counter = omap_dm_timer_write_counter,
+};
+#endif
+
 /*
  * Few boards still need auxdata populated before we populate
  * the dev entries in of_platform_populate().
@@ -502,6 +522,9 @@ static struct of_dev_auxdata omap_auxdata_lookup[] __initdata = {
        OF_DEV_AUXDATA("ti,am4372-wkup-m3", 0x44d00000, "44d00000.wkup_m3",
                       &wkup_m3_data),
 #endif
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+       OF_DEV_AUXDATA("ti,omap-dmtimer-pwm", 0, NULL, &pwm_dmtimer_pdata),
+#endif
 #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5)
        OF_DEV_AUXDATA("ti,omap4-iommu", 0x4a066000, "4a066000.mmu",
                       &omap4_iommu_pdata),
index eafd120b53f1bc15c82f2cc47dc8033e31ca566e..1b9f0520dea9154afa31f9668241e03f211fdc6a 100644 (file)
@@ -86,13 +86,18 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
        stmfd   sp!, {lr}       @ save registers on stack
        /* Setup so that we will disable and enable l2 */
        mov     r1, #0x1
-       adrl    r2, l2dis_3630  @ may be too distant for plain adr
-       str     r1, [r2]
+       adrl    r3, l2dis_3630_offset   @ may be too distant for plain adr
+       ldr     r2, [r3]                @ value for offset
+       str     r1, [r2, r3]            @ write to l2dis_3630
        ldmfd   sp!, {pc}       @ restore regs and return
 ENDPROC(enable_omap3630_toggle_l2_on_restore)
 
-       .text
-/* Function to call rom code to save secure ram context */
+/*
+ * Function to call rom code to save secure ram context. This gets
+ * relocated to SRAM, so it can be all in .data section. Otherwise
+ * we need to initialize api_params separately.
+ */
+       .data
        .align  3
 ENTRY(save_secure_ram_context)
        stmfd   sp!, {r4 - r11, lr}     @ save registers on stack
@@ -126,6 +131,8 @@ ENDPROC(save_secure_ram_context)
 ENTRY(save_secure_ram_context_sz)
        .word   . - save_secure_ram_context
 
+       .text
+
 /*
  * ======================
  * == Idle entry point ==
@@ -289,12 +296,6 @@ wait_sdrc_ready:
        bic     r5, r5, #0x40
        str     r5, [r4]
 
-/*
- * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a
- * base instead.
- * Be careful not to clobber r7 when maintaing this code.
- */
-
 is_dll_in_lock_mode:
        /* Is dll in lock mode? */
        ldr     r4, sdrc_dlla_ctrl
@@ -302,11 +303,7 @@ is_dll_in_lock_mode:
        tst     r5, #0x4
        bne     exit_nonoff_modes       @ Return if locked
        /* wait till dll locks */
-       adr     r7, kick_counter
 wait_dll_lock_timed:
-       ldr     r4, wait_dll_lock_counter
-       add     r4, r4, #1
-       str     r4, [r7, #wait_dll_lock_counter - kick_counter]
        ldr     r4, sdrc_dlla_status
        /* Wait 20uS for lock */
        mov     r6, #8
@@ -330,9 +327,6 @@ kick_dll:
        orr     r6, r6, #(1<<3)         @ enable dll
        str     r6, [r4]
        dsb
-       ldr     r4, kick_counter
-       add     r4, r4, #1
-       str     r4, [r7]                @ kick_counter
        b       wait_dll_lock_timed
 
 exit_nonoff_modes:
@@ -360,15 +354,6 @@ sdrc_dlla_status:
        .word   SDRC_DLLA_STATUS_V
 sdrc_dlla_ctrl:
        .word   SDRC_DLLA_CTRL_V
-       /*
-        * When exporting to userspace while the counters are in SRAM,
-        * these 2 words need to be at the end to facilitate retrival!
-        */
-kick_counter:
-       .word   0
-wait_dll_lock_counter:
-       .word   0
-
 ENTRY(omap3_do_wfi_sz)
        .word   . - omap3_do_wfi
 
@@ -437,7 +422,9 @@ ENTRY(omap3_restore)
        cmp     r2, #0x0        @ Check if target power state was OFF or RET
        bne     logic_l1_restore
 
-       ldr     r0, l2dis_3630
+       adr     r1, l2dis_3630_offset   @ address for offset
+       ldr     r0, [r1]                @ value for offset
+       ldr     r0, [r1, r0]            @ value at l2dis_3630
        cmp     r0, #0x1        @ should we disable L2 on 3630?
        bne     skipl2dis
        mrc     p15, 0, r0, c1, c0, 1
@@ -449,12 +436,14 @@ skipl2dis:
        and     r1, #0x700
        cmp     r1, #0x300
        beq     l2_inv_gp
+       adr     r0, l2_inv_api_params_offset
+       ldr     r3, [r0]
+       add     r3, r3, r0              @ r3 points to dummy parameters
        mov     r0, #40                 @ set service ID for PPA
        mov     r12, r0                 @ copy secure Service ID in r12
        mov     r1, #0                  @ set task id for ROM code in r1
        mov     r2, #4                  @ set some flags in r2, r6
        mov     r6, #0xff
-       adr     r3, l2_inv_api_params   @ r3 points to dummy parameters
        dsb                             @ data write barrier
        dmb                             @ data memory barrier
        smc     #1                      @ call SMI monitor (smi #1)
@@ -488,8 +477,8 @@ skipl2dis:
        b       logic_l1_restore
 
        .align
-l2_inv_api_params:
-       .word   0x1, 0x00
+l2_inv_api_params_offset:
+       .long   l2_inv_api_params - .
 l2_inv_gp:
        /* Execute smi to invalidate L2 cache */
        mov r12, #0x1                   @ set up to invalidate L2
@@ -506,7 +495,9 @@ l2_inv_gp:
        mov     r12, #0x2
        smc     #0                      @ Call SMI monitor (smieq)
 logic_l1_restore:
-       ldr     r1, l2dis_3630
+       adr     r0, l2dis_3630_offset   @ adress for offset
+       ldr     r1, [r0]                @ value for offset
+       ldr     r1, [r0, r1]            @ value at l2dis_3630
        cmp     r1, #0x1                @ Test if L2 re-enable needed on 3630
        bne     skipl2reen
        mrc     p15, 0, r1, c1, c0, 1
@@ -535,9 +526,17 @@ control_stat:
        .word   CONTROL_STAT
 control_mem_rta:
        .word   CONTROL_MEM_RTA_CTRL
+l2dis_3630_offset:
+       .long   l2dis_3630 - .
+
+       .data
 l2dis_3630:
        .word   0
 
+       .data
+l2_inv_api_params:
+       .word   0x1, 0x00
+
 /*
  * Internal functions
  */
index 9b09d85d811a1c52b0fa735a2e2743e05346843d..c7a3b4aab4b5441249ddd9a7fdc362ef9370d737 100644 (file)
        dsb
 .endm
 
-ppa_zero_params:
-       .word           0x0
-
-ppa_por_params:
-       .word           1, 0
-
 #ifdef CONFIG_ARCH_OMAP4
 
 /*
@@ -266,7 +260,9 @@ ENTRY(omap4_cpu_resume)
        beq     skip_ns_smp_enable
 ppa_actrl_retry:
        mov     r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
-       adr     r3, ppa_zero_params             @ Pointer to parameters
+       adr     r1, ppa_zero_params_offset
+       ldr     r3, [r1]
+       add     r3, r3, r1                      @ Pointer to ppa_zero_params
        mov     r1, #0x0                        @ Process ID
        mov     r2, #0x4                        @ Flag
        mov     r6, #0xff
@@ -303,7 +299,9 @@ skip_ns_smp_enable:
        ldr     r0, =OMAP4_PPA_L2_POR_INDEX
        ldr     r1, =OMAP44XX_SAR_RAM_BASE
        ldr     r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
-       adr     r3, ppa_por_params
+       adr     r1, ppa_por_params_offset
+       ldr     r3, [r1]
+       add     r3, r3, r1                      @ Pointer to ppa_por_params
        str     r4, [r3, #0x04]
        mov     r1, #0x0                        @ Process ID
        mov     r2, #0x4                        @ Flag
@@ -328,6 +326,8 @@ skip_l2en:
 #endif
 
        b       cpu_resume                      @ Jump to generic resume
+ppa_por_params_offset:
+       .long   ppa_por_params - .
 ENDPROC(omap4_cpu_resume)
 #endif /* CONFIG_ARCH_OMAP4 */
 
@@ -380,4 +380,13 @@ ENTRY(omap_do_wfi)
        nop
 
        ldmfd   sp!, {pc}
+ppa_zero_params_offset:
+       .long   ppa_zero_params - .
 ENDPROC(omap_do_wfi)
+
+       .data
+ppa_zero_params:
+       .word           0
+
+ppa_por_params:
+       .word           1, 0
index def40a0dd60cd290d91233cc47a1a8bad4ea6aa1..70ab4a25a5f853d4d8dade14490618d2523fcd13 100644 (file)
@@ -1,5 +1,6 @@
 menuconfig ARCH_REALVIEW
-       bool "ARM Ltd. RealView family" if ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
+       bool "ARM Ltd. RealView family"
+       depends on ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
        select ARM_AMBA
        select ARM_TIMER_SP804
        select COMMON_CLK_VERSATILE
index 8be6632407d8f0dbb75f87c5878578dffba70fbb..dae8d86ef4ccc75c4bb3dfbb98aba9e7e12eec9f 100644 (file)
@@ -4,10 +4,9 @@
 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
        -I$(srctree)/arch/arm/plat-versatile/include
 
-
+obj-y                                  := core.o
 obj-$(CONFIG_REALVIEW_DT)              += realview-dt.o
 obj-$(CONFIG_SMP)                      += platsmp-dt.o
-obj-y                                  := core.o
 
 ifdef CONFIG_ATAGS
 obj-$(CONFIG_MACH_REALVIEW_EB)         += realview_eb.o
index 65585392655b3ac499dff837b2f6adfe02d5a708..6964e88760614ba1f8fa60c7f6fd12f3b07dfe20 100644 (file)
@@ -80,7 +80,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus)
                     virt_to_phys(versatile_secondary_startup));
 }
 
-struct smp_operations realview_dt_smp_ops __initdata = {
+static const struct smp_operations realview_dt_smp_ops __initconst = {
        .smp_prepare_cpus       = realview_smp_prepare_cpus,
        .smp_secondary_init     = versatile_secondary_init,
        .smp_boot_secondary     = versatile_boot_secondary,
index 9cb11215cebaeb2e4e0a8355589ce6d7e5270b78..b3a4ed5289ec7c8c35661a1b6062d4387d13d097 100644 (file)
@@ -4,7 +4,6 @@
 extern void shmobile_init_delay(void);
 extern void shmobile_boot_vector(void);
 extern unsigned long shmobile_boot_fn;
-extern unsigned long shmobile_boot_arg;
 extern unsigned long shmobile_boot_size;
 extern void shmobile_smp_boot(void);
 extern void shmobile_smp_sleep(void);
index fa5248c52399c9b5e78e3c1cd7c167523f306424..5e503d91ad70ddf2b8f375ffaf36a09361c9be8c 100644 (file)
@@ -38,9 +38,3 @@ ENTRY(shmobile_boot_scu)
 
        b       secondary_startup
 ENDPROC(shmobile_boot_scu)
-
-       .text
-       .align  2
-       .globl  shmobile_scu_base
-shmobile_scu_base:
-       .space  4
index 330c1fc63197df89684e03578c1c0693b8e6f24f..32e0bf6e3ccb9bd317c68be65ef7071a0c05eeed 100644 (file)
@@ -24,7 +24,6 @@
        .arm
        .align  12
 ENTRY(shmobile_boot_vector)
-       ldr     r0, 2f
        ldr     r1, 1f
        bx      r1
 
@@ -34,9 +33,6 @@ ENDPROC(shmobile_boot_vector)
        .globl  shmobile_boot_fn
 shmobile_boot_fn:
 1:     .space  4
-       .globl  shmobile_boot_arg
-shmobile_boot_arg:
-2:     .space  4
        .globl  shmobile_boot_size
 shmobile_boot_size:
        .long   . - shmobile_boot_vector
@@ -46,13 +42,15 @@ shmobile_boot_size:
  */
 
 ENTRY(shmobile_smp_boot)
-                                               @ r0 = MPIDR_HWID_BITMASK
        mrc     p15, 0, r1, c0, c0, 5           @ r1 = MPIDR
-       and     r0, r1, r0                      @ r0 = cpu_logical_map() value
+       and     r0, r1, #0xffffff               @ MPIDR_HWID_BITMASK
+                                               @ r0 = cpu_logical_map() value
        mov     r1, #0                          @ r1 = CPU index
-       adr     r5, 1f                          @ array of per-cpu mpidr values
-       adr     r6, 2f                          @ array of per-cpu functions
-       adr     r7, 3f                          @ array of per-cpu arguments
+       adr     r2, 1f
+       ldmia   r2, {r5, r6, r7}
+       add     r5, r5, r2                      @ array of per-cpu mpidr values
+       add     r6, r6, r2                      @ array of per-cpu functions
+       add     r7, r7, r2                      @ array of per-cpu arguments
 
 shmobile_smp_boot_find_mpidr:
        ldr     r8, [r5, r1, lsl #2]
@@ -80,12 +78,18 @@ ENTRY(shmobile_smp_sleep)
        b       shmobile_smp_boot
 ENDPROC(shmobile_smp_sleep)
 
+       .align  2
+1:     .long   shmobile_smp_mpidr - .
+       .long   shmobile_smp_fn - 1b
+       .long   shmobile_smp_arg - 1b
+
+       .bss
        .globl  shmobile_smp_mpidr
 shmobile_smp_mpidr:
-1:     .space  NR_CPUS * 4
+       .space  NR_CPUS * 4
        .globl  shmobile_smp_fn
 shmobile_smp_fn:
-2:     .space  NR_CPUS * 4
+       .space  NR_CPUS * 4
        .globl  shmobile_smp_arg
 shmobile_smp_arg:
-3:     .space  NR_CPUS * 4
+       .space  NR_CPUS * 4
index 911884f7e28b174c271c6724c863520afe859868..aba75c89f9c1c5eb2d7070def50bb8e77118df3c 100644 (file)
@@ -123,7 +123,6 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus,
 {
        /* install boot code shared by all CPUs */
        shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
-       shmobile_boot_arg = MPIDR_HWID_BITMASK;
 
        /* perform per-cpu setup */
        apmu_parse_cfg(apmu_init_cpu, apmu_config, num);
index 64663110ab6ca0e1d8f06cc04e69175165e2cace..081a097c9219f41785229c9492b95b250151c1f4 100644 (file)
@@ -17,6 +17,9 @@
 #include <asm/smp_scu.h>
 #include "common.h"
 
+
+void __iomem *shmobile_scu_base;
+
 static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb,
                                          unsigned long action, void *hcpu)
 {
@@ -41,7 +44,6 @@ void __init shmobile_smp_scu_prepare_cpus(unsigned int max_cpus)
 {
        /* install boot code shared by all CPUs */
        shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
-       shmobile_boot_arg = MPIDR_HWID_BITMASK;
 
        /* enable SCU and cache coherency on booting CPU */
        scu_enable(shmobile_scu_base);
index b854fe2095ad14616b7c4aae209b47f7e4f7ded3..0b024a9dbd4397e7235827dbdaf62d65a5ad3d16 100644 (file)
@@ -92,8 +92,6 @@ static void __init r8a7779_smp_prepare_cpus(unsigned int max_cpus)
 {
        /* Map the reset vector (in headsmp-scu.S, headsmp.S) */
        __raw_writel(__pa(shmobile_boot_vector), AVECR);
-       shmobile_boot_fn = virt_to_phys(shmobile_boot_scu);
-       shmobile_boot_arg = (unsigned long)shmobile_scu_base;
 
        /* setup r8a7779 specific SCU bits */
        shmobile_scu_base = IOMEM(R8A7779_SCU_BASE);
index d6a3714b096e69801c381d320a0ccb1be3472585..ebe15b93bbe870804eb17759b05096a8f7298167 100644 (file)
@@ -1,5 +1,6 @@
 config ARCH_TANGO
-       bool "Sigma Designs Tango4 (SMP87xx)" if ARCH_MULTI_V7
+       bool "Sigma Designs Tango4 (SMP87xx)"
+       depends on ARCH_MULTI_V7
        # Cortex-A9 MPCore r3p0, PL310 r3p2
        select ARCH_HAS_HOLES_MEMORYMODEL
        select ARM_ERRATA_754322
index a18d5a34e2f5738e4f62bd24bf8a0b9edff7a3e8..a21f55e000d258c734535cbfc9df57744599887a 100644 (file)
@@ -9,7 +9,7 @@ static int tango_boot_secondary(unsigned int cpu, struct task_struct *idle)
        return 0;
 }
 
-static struct smp_operations tango_smp_ops __initdata = {
+static const struct smp_operations tango_smp_ops __initconst = {
        .smp_boot_secondary     = tango_boot_secondary,
 };
 
index a90f3556017fe80ae3d2587e54e063cd619c0e6a..0fa8b84ed657e702a0f34f037927bceec78b8046 100644 (file)
@@ -13,57 +13,5 @@ menuconfig ARCH_TEGRA
        select ARCH_HAS_RESET_CONTROLLER
        select RESET_CONTROLLER
        select SOC_BUS
-       select USB_ULPI if USB_PHY
-       select USB_ULPI_VIEWPORT if USB_PHY
        help
          This enables support for NVIDIA Tegra based systems.
-
-if ARCH_TEGRA
-
-config ARCH_TEGRA_2x_SOC
-       bool "Enable support for Tegra20 family"
-       select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
-       select ARM_ERRATA_720789
-       select ARM_ERRATA_754327 if SMP
-       select ARM_ERRATA_764369 if SMP
-       select PINCTRL_TEGRA20
-       select PL310_ERRATA_727915 if CACHE_L2X0
-       select PL310_ERRATA_769419 if CACHE_L2X0
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra AP20 and T20 processors, based on the
-         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_3x_SOC
-       bool "Enable support for Tegra30 family"
-       select ARM_ERRATA_754322
-       select ARM_ERRATA_764369 if SMP
-       select PINCTRL_TEGRA30
-       select PL310_ERRATA_769419 if CACHE_L2X0
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T30 processor family, based on the
-         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_114_SOC
-       bool "Enable support for Tegra114 family"
-       select ARM_ERRATA_798181 if SMP
-       select ARM_L1_CACHE_SHIFT_6
-       select HAVE_ARM_ARCH_TIMER
-       select PINCTRL_TEGRA114
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T114 processor family, based on the
-         ARM CortexA15MP CPU
-
-config ARCH_TEGRA_124_SOC
-       bool "Enable support for Tegra124 family"
-       select ARM_L1_CACHE_SHIFT_6
-       select HAVE_ARM_ARCH_TIMER
-       select PINCTRL_TEGRA124
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T124 processor family, based on the
-         ARM CortexA15MP CPU
-
-endif
index e6b684e14322cef21e44d1b660e4c6e8d0c7ec43..f5d19667484edd38e65bb1aa1d1a4413a036bfbf 100644 (file)
@@ -231,8 +231,11 @@ ENDPROC(tegra20_cpu_is_resettable_soon)
  * tegra20_tear_down_core in IRAM
  */
 ENTRY(tegra20_sleep_core_finish)
+       mov     r4, r0
        /* Flush, disable the L1 data cache and exit SMP */
+       mov     r0, #TEGRA_FLUSH_CACHE_ALL
        bl      tegra_disable_clean_inv_dcache
+       mov     r0, r4
 
        mov32   r3, tegra_shut_off_mmu
        add     r3, r3, r0
index 9a2f0b051e1035a4f956dbf98a90a9de493416e9..16e5ff03383cad5b709eb42b24340b033f86a2c7 100644 (file)
@@ -242,8 +242,11 @@ ENDPROC(tegra30_cpu_shutdown)
  * tegra30_tear_down_core in IRAM
  */
 ENTRY(tegra30_sleep_core_finish)
+       mov     r4, r0
        /* Flush, disable the L1 data cache and exit SMP */
+       mov     r0, #TEGRA_FLUSH_CACHE_ALL
        bl      tegra_disable_clean_inv_dcache
+       mov     r0, r4
 
        /*
         * Preload all the address literals that are needed for the
index 534a60ae282e702d3b06e8dad289d8f109cd80ef..0eca3812527e614e891113839992b4e20cf3fb8d 100644 (file)
@@ -1200,10 +1200,7 @@ error:
        while (i--)
                if (pages[i])
                        __free_pages(pages[i], 0);
-       if (array_size <= PAGE_SIZE)
-               kfree(pages);
-       else
-               vfree(pages);
+       kvfree(pages);
        return NULL;
 }
 
@@ -1211,7 +1208,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
                               size_t size, struct dma_attrs *attrs)
 {
        int count = size >> PAGE_SHIFT;
-       int array_size = count * sizeof(struct page *);
        int i;
 
        if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
@@ -1222,10 +1218,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
                                __free_pages(pages[i], 0);
        }
 
-       if (array_size <= PAGE_SIZE)
-               kfree(pages);
-       else
-               vfree(pages);
+       kvfree(pages);
        return 0;
 }
 
index 4b4058db0781f63e307d4f5101a74d5330ddf337..66353caa35b9f78fa2aa4754dea3ce813593303f 100644 (file)
@@ -173,7 +173,7 @@ unsigned long arch_mmap_rnd(void)
 {
        unsigned long rnd;
 
-       rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+       rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
        return rnd << PAGE_SHIFT;
 }
index 2c400412b3bce47ef97b7f7eb74b0018d88e7dab..21074f674bdeb707c137a9b87c2a65bde4bbb25a 100644 (file)
@@ -105,18 +105,6 @@ config ARCH_TEGRA
        help
          This enables support for the NVIDIA Tegra SoC family.
 
-config ARCH_TEGRA_132_SOC
-       bool "NVIDIA Tegra132 SoC"
-       depends on ARCH_TEGRA
-       select PINCTRL_TEGRA124
-       select USB_ULPI if USB_PHY
-       select USB_ULPI_VIEWPORT if USB_PHY
-       help
-         Enable support for NVIDIA Tegra132 SoC, based on the Denver
-         ARMv8 CPU.  The Tegra132 SoC is similar to the Tegra124 SoC,
-         but contains an NVIDIA Denver CPU complex in place of
-         Tegra124's "4+1" Cortex-A15 CPU complex.
-
 config ARCH_SPRD
        bool "Spreadtrum SoC platform"
        help
index cd822d8454c0536165810004089c3a7ce5d0068d..b5e3f6d42b88bef5f17d3c21a80f9e97a013b676 100644 (file)
@@ -27,6 +27,8 @@ $(warning LSE atomics not supported by binutils)
 endif
 
 KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS  += $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS  += $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
@@ -86,7 +88,7 @@ Image: vmlinux
 Image.%: vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-zinstall install: vmlinux
+zinstall install:
        $(Q)$(MAKE) $(build)=$(boot) $@
 
 %.dtb: scripts
index abcbba2f01baad4c76f0c414372ba4713beafa11..305c552b5ec16ee903c4b6d0a73066a191dd7d31 100644 (file)
@@ -34,10 +34,10 @@ $(obj)/Image.lzma: $(obj)/Image FORCE
 $(obj)/Image.lzo: $(obj)/Image FORCE
        $(call if_changed,lzo)
 
-install: $(obj)/Image
+install:
        $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
        $(obj)/Image System.map "$(INSTALL_PATH)"
 
-zinstall: $(obj)/Image.gz
+zinstall:
        $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
        $(obj)/Image.gz System.map "$(INSTALL_PATH)"
index 76e7510835b29322957d0d5b05654ce5fc6f4597..f832b8a7453a307b351273a83e320e413e27ce4a 100644 (file)
@@ -9,6 +9,7 @@ dts-dirs += freescale
 dts-dirs += hisilicon
 dts-dirs += marvell
 dts-dirs += mediatek
+dts-dirs += nvidia
 dts-dirs += qcom
 dts-dirs += renesas
 dts-dirs += rockchip
index dd5158eb5872396693bba678cda765a719e25e97..e5b59ca9debb1916764746bb168d969572e17771 100644 (file)
                             <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
index da7b6e61325758c547b521c3df675adeb9568d2b..933cba3599189c7f85c6a6adea77eb661be459f5 100644 (file)
@@ -23,9 +23,8 @@ soc0: soc@000000000 {
                };
        };
 
-       dsa: dsa@c7000000 {
+       dsaf0: dsa@c7000000 {
                compatible = "hisilicon,hns-dsaf-v1";
-               dsa_name = "dsaf0";
                mode = "6port-16rss";
                interrupt-parent = <&mbigen_dsa>;
 
@@ -127,7 +126,7 @@ soc0: soc@000000000 {
 
        eth0: ethernet@0{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <0>;
                local-mac-address = [00 00 00 01 00 58];
                status = "disabled";
@@ -135,14 +134,14 @@ soc0: soc@000000000 {
        };
        eth1: ethernet@1{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <1>;
                status = "disabled";
                dma-coherent;
        };
        eth2: ethernet@2{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <2>;
                local-mac-address = [00 00 00 01 00 5a];
                status = "disabled";
@@ -150,7 +149,7 @@ soc0: soc@000000000 {
        };
        eth3: ethernet@3{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <3>;
                local-mac-address = [00 00 00 01 00 5b];
                status = "disabled";
@@ -158,7 +157,7 @@ soc0: soc@000000000 {
        };
        eth4: ethernet@4{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <4>;
                local-mac-address = [00 00 00 01 00 5c];
                status = "disabled";
@@ -166,7 +165,7 @@ soc0: soc@000000000 {
        };
        eth5: ethernet@5{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <5>;
                local-mac-address = [00 00 00 01 00 5d];
                status = "disabled";
@@ -174,7 +173,7 @@ soc0: soc@000000000 {
        };
        eth6: ethernet@6{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <6>;
                local-mac-address = [00 00 00 01 00 5e];
                status = "disabled";
@@ -182,7 +181,7 @@ soc0: soc@000000000 {
        };
        eth7: ethernet@7{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <7>;
                local-mac-address = [00 00 00 01 00 5f];
                status = "disabled";
diff --git a/arch/arm64/boot/dts/nvidia/Makefile b/arch/arm64/boot/dts/nvidia/Makefile
new file mode 100644 (file)
index 0000000..a7e865d
--- /dev/null
@@ -0,0 +1,7 @@
+dtb-$(CONFIG_ARCH_TEGRA_132_SOC) += tegra132-norrin.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-0000.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-2180.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2571.dtb
+
+always         := $(dtb-y)
+clean-files    := *.dtb
diff --git a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
new file mode 100644 (file)
index 0000000..62f33fc
--- /dev/null
@@ -0,0 +1,1132 @@
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra132.dtsi"
+
+/ {
+       model = "NVIDIA Tegra132 Norrin";
+       compatible = "nvidia,norrin", "nvidia,tegra132", "nvidia,tegra124";
+
+       aliases {
+               rtc0 = "/i2c@0,7000d000/as3722@40";
+               rtc1 = "/rtc@0,7000e000";
+       };
+
+       chosen { };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x0 0x80000000>;
+       };
+
+       host1x@0,50000000 {
+               hdmi@0,54280000 {
+                       status = "disabled";
+
+                       vdd-supply = <&vdd_3v3_hdmi>;
+                       pll-supply = <&vdd_hdmi_pll>;
+                       hdmi-supply = <&vdd_5v0_hdmi>;
+
+                       nvidia,ddc-i2c-bus = <&hdmi_ddc>;
+                       nvidia,hpd-gpio =
+                               <&gpio TEGRA_GPIO(N, 7) GPIO_ACTIVE_HIGH>;
+               };
+
+               sor@0,54540000 {
+                       status = "okay";
+
+                       nvidia,dpaux = <&dpaux>;
+                       nvidia,panel = <&panel>;
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       vdd-supply = <&vdd_3v3_panel>;
+                       status = "okay";
+               };
+       };
+
+       gpu@0,57000000 {
+               status = "okay";
+
+               vdd-supply = <&vdd_gpu>;
+       };
+
+       pinmux@0,70000868 {
+               pinctrl-names = "default";
+               pinctrl-0 = <&pinmux_default>;
+
+               pinmux_default: pinmux@0 {
+                       dap_mclk1_pw4 {
+                               nvidia,pins = "dap_mclk1_pw4";
+                               nvidia,function = "extperiph1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_pa4 {
+                               nvidia,pins = "dap2_din_pa4";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap2_dout_pa5 {
+                               nvidia,pins = "dap2_dout_pa5",
+                                             "dap2_fs_pa2",
+                                             "dap2_sclk_pa3";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap3_dout_pp2 {
+                               nvidia,pins = "dap3_dout_pp2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_px0 {
+                               nvidia,pins = "dvfs_pwm_px0",
+                                             "dvfs_clk_px2";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ulpi_clk_py0 {
+                               nvidia,pins = "ulpi_clk_py0",
+                                             "ulpi_nxt_py2",
+                                             "ulpi_stp_py3";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ulpi_dir_py1 {
+                               nvidia,pins = "ulpi_dir_py1";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       cam_i2c_scl_pbb1 {
+                               nvidia,pins = "cam_i2c_scl_pbb1",
+                                             "cam_i2c_sda_pbb2";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_scl_pt5 {
+                               nvidia,pins = "gen2_i2c_scl_pt5",
+                                             "gen2_i2c_sda_pt6";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       pj7 {
+                               nvidia,pins = "pj7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pk6 {
+                               nvidia,pins = "spdif_in_pk6";
+                               nvidia,function = "spdif";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pg4 {
+                               nvidia,pins = "pg4",
+                                             "pg5",
+                                             "pg6",
+                                             "pi3";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pg7 {
+                               nvidia,pins = "pg7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ph1 {
+                               nvidia,pins = "ph1";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0",
+                                             "kb_row15_ps7",
+                                             "clk_32k_out_pa0";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc1_clk_pz0 {
+                               nvidia,pins = "sdmmc1_clk_pz0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc1_cmd_pz1 {
+                               nvidia,pins = "sdmmc1_cmd_pz1",
+                                             "sdmmc1_dat0_py7",
+                                             "sdmmc1_dat1_py6",
+                                             "sdmmc1_dat2_py5",
+                                             "sdmmc1_dat3_py4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc3_clk_pa6 {
+                               nvidia,pins = "sdmmc3_clk_pa6";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc3_cmd_pa7 {
+                               nvidia,pins = "sdmmc3_cmd_pa7",
+                                             "sdmmc3_dat0_pb7",
+                                             "sdmmc3_dat1_pb6",
+                                             "sdmmc3_dat2_pb5",
+                                             "sdmmc3_dat3_pb4",
+                                             "kb_col4_pq4",
+                                             "sdmmc3_clk_lb_out_pee4",
+                                             "sdmmc3_clk_lb_in_pee5",
+                                             "sdmmc3_cd_n_pv2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc4_clk_pcc4 {
+                               nvidia,pins = "sdmmc4_clk_pcc4";
+                               nvidia,function = "sdmmc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc4_cmd_pt7 {
+                               nvidia,pins = "sdmmc4_cmd_pt7",
+                                             "sdmmc4_dat0_paa0",
+                                             "sdmmc4_dat1_paa1",
+                                             "sdmmc4_dat2_paa2",
+                                             "sdmmc4_dat3_paa3",
+                                             "sdmmc4_dat4_paa4",
+                                             "sdmmc4_dat5_paa5",
+                                             "sdmmc4_dat6_paa6",
+                                             "sdmmc4_dat7_paa7";
+                               nvidia,function = "sdmmc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       mic_det_l {
+                               nvidia,pins = "kb_row7_pr7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       kb_row10_ps2 {
+                               nvidia,pins = "kb_row10_ps2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       kb_row9_ps1 {
+                               nvidia,pins = "kb_row9_ps1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_pz6 {
+                               nvidia,pins = "pwr_i2c_scl_pz6",
+                                             "pwr_i2c_sda_pz7";
+                               nvidia,function = "i2cpwr";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "rtck";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "pwron";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       kb_col0_ap {
+                               nvidia,pins = "kb_col0_pq0";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       en_vdd_sd {
+                               nvidia,pins = "kb_row0_pr0";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       lid_open {
+                               nvidia,pins = "kb_row4_pr4";
+                               nvidia,function = "rsvd3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       reset_out_n {
+                               nvidia,pins = "reset_out_n";
+                               nvidia,function = "reset_out_n";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk3_out_pee0 {
+                               nvidia,pins = "clk3_out_pee0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pc4 {
+                               nvidia,pins = "gen1_i2c_scl_pc4",
+                                             "gen1_i2c_sda_pc5";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_cec_pee3 {
+                               nvidia,pins = "hdmi_cec_pee3";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_int_pn7 {
+                               nvidia,pins = "hdmi_int_pn7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ddc_scl_pv4 {
+                               nvidia,pins = "ddc_scl_pv4",
+                                             "ddc_sda_pv5";
+                               nvidia,function = "i2c4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,rcv-sel = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en0_pn4 {
+                               nvidia,pins = "usb_vbus_en0_pn4",
+                                             "usb_vbus_en1_pn5",
+                                             "usb_vbus_en2_pff1";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       drive_sdio1 {
+                               nvidia,pins = "drive_sdio1";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <36>;
+                               nvidia,pull-up-strength = <20>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_SLOW>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_SLOW>;
+                       };
+                       drive_sdio3 {
+                               nvidia,pins = "drive_sdio3";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <22>;
+                               nvidia,pull-up-strength = <36>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                       };
+                       drive_gma {
+                               nvidia,pins = "drive_gma";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <2>;
+                               nvidia,pull-up-strength = <1>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,drive-type = <1>;
+                       };
+                       ac_ok {
+                               nvidia,pins = "pj0";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       codec_irq_l {
+                               nvidia,pins = "ph4";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       lcd_bl_en {
+                               nvidia,pins = "ph2";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_irq_l {
+                               nvidia,pins = "gpio_w3_aud_pw3";
+                               nvidia,function = "spi6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       tpm_davint_l {
+                               nvidia,pins = "ph6";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ts_irq_l {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ts_reset_l {
+                               nvidia,pins = "pk4";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <1>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ts_shdn_l {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph7 {
+                               nvidia,pins = "ph7";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sensor_irq_l {
+                               nvidia,pins = "pi6";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       wifi_en {
+                               nvidia,pins = "gpio_x7_aud_px7";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       chromeos_write_protect {
+                               nvidia,pins = "kb_row1_pr1";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       hp_det_l {
+                               nvidia,pins = "pi7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       soc_warm_reset_l {
+                               nvidia,pins = "pi5";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       pwm: pwm@0,7000a000 {
+               status = "okay";
+       };
+
+       /* HDMI DDC */
+       hdmi_ddc: i2c@0,7000c700 {
+               status = "okay";
+               clock-frequency = <100000>;
+       };
+
+       i2c@0,7000d000 {
+               status = "okay";
+               clock-frequency = <400000>;
+
+               as3722: pmic@40 {
+                       compatible = "ams,as3722";
+                       reg = <0x40>;
+                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+
+                       ams,system-power-controller;
+
+                       #interrupt-cells = <2>;
+                       interrupt-controller;
+
+                       #gpio-cells = <2>;
+                       gpio-controller;
+
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&as3722_default>;
+
+                       as3722_default: pinmux@0 {
+                               gpio0 {
+                                       pins = "gpio0";
+                                       function = "gpio";
+                                       bias-pull-down;
+                               };
+
+                               gpio1 {
+                                       pins = "gpio1";
+                                       function = "gpio";
+                                       bias-pull-up;
+                               };
+
+                               gpio2_4_7 {
+                                       pins = "gpio2", "gpio4", "gpio7";
+                                       function = "gpio";
+                                       bias-pull-up;
+                               };
+
+                               gpio3 {
+                                       pins = "gpio3";
+                                       function = "gpio";
+                                       bias-high-impedance;
+                               };
+
+                               gpio5 {
+                                       pins = "gpio5";
+                                       function = "clk32k-out";
+                                       bias-pull-down;
+                               };
+
+                               gpio6 {
+                                       pins = "gpio6";
+                                       function = "clk32k-out";
+                                       bias-pull-down;
+                               };
+                       };
+
+                       regulators {
+                               vsup-sd2-supply = <&vdd_5v0_sys>;
+                               vsup-sd3-supply = <&vdd_5v0_sys>;
+                               vsup-sd4-supply = <&vdd_5v0_sys>;
+                               vsup-sd5-supply = <&vdd_5v0_sys>;
+                               vin-ldo0-supply = <&vdd_1v35_lp0>;
+                               vin-ldo1-6-supply = <&vdd_3v3_sys>;
+                               vin-ldo2-5-7-supply = <&vddio_1v8>;
+                               vin-ldo3-4-supply = <&vdd_3v3_sys>;
+                               vin-ldo9-10-supply = <&vdd_5v0_sys>;
+                               vin-ldo11-supply = <&vdd_3v3_run>;
+
+                               sd0 {
+                                       regulator-name = "+VDD_CPU_AP";
+                                       regulator-min-microvolt = <700000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-max-microamp = <3500000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <2>;
+                               };
+
+                               sd1 {
+                                       regulator-name = "+VDD_CORE";
+                                       regulator-min-microvolt = <700000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-max-microamp = <4000000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <1>;
+                               };
+
+                               vdd_1v35_lp0: sd2 {
+                                       regulator-name = "+1.35V_LP0(sd2)";
+                                       regulator-min-microvolt = <1350000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               sd3 {
+                                       regulator-name = "+1.35V_LP0(sd3)";
+                                       regulator-min-microvolt = <1350000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               vdd_1v05_run: sd4 {
+                                       regulator-name = "+1.05V_RUN";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                               };
+
+                               vddio_1v8: sd5 {
+                                       regulator-name = "+1.8V_VDDIO";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               vdd_gpu: sd6 {
+                                       regulator-name = "+VDD_GPU_AP";
+                                       regulator-min-microvolt = <800000>;
+                                       regulator-max-microvolt = <1200000>;
+                                       regulator-min-microamp = <3500000>;
+                                       regulator-max-microamp = <3500000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               ldo0 {
+                                       regulator-name = "+1.05_RUN_AVDD";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <1>;
+                               };
+
+                               ldo1 {
+                                       regulator-name = "+1.8V_RUN_CAM";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                               };
+
+                               ldo2 {
+                                       regulator-name = "+1.2V_GEN_AVDD";
+                                       regulator-min-microvolt = <1200000>;
+                                       regulator-max-microvolt = <1200000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               ldo3 {
+                                       regulator-name = "+1.00V_LP0_VDD_RTC";
+                                       regulator-min-microvolt = <1000000>;
+                                       regulator-max-microvolt = <1000000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,enable-tracking;
+                               };
+
+                               vdd_run_cam: ldo4 {
+                                       regulator-name = "+2.8V_RUN_CAM";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo5 {
+                                       regulator-name = "+1.2V_RUN_CAM_FRONT";
+                                       regulator-min-microvolt = <1200000>;
+                                       regulator-max-microvolt = <1200000>;
+                               };
+
+                               vddio_sdmmc3: ldo6 {
+                                       regulator-name = "+VDDIO_SDMMC3";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <3300000>;
+                               };
+
+                               ldo7 {
+                                       regulator-name = "+1.05V_RUN_CAM_REAR";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                               };
+
+                               ldo9 {
+                                       regulator-name = "+2.8V_RUN_TOUCH";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo10 {
+                                       regulator-name = "+2.8V_RUN_CAM_AF";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo11 {
+                                       regulator-name = "+1.8V_RUN_VPP_FUSE";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                               };
+                       };
+               };
+       };
+
+       spi@0,7000d400 {
+               status = "okay";
+
+               ec: cros-ec@0 {
+                       compatible = "google,cros-ec-spi";
+                       spi-max-frequency = <3000000>;
+                       interrupt-parent = <&gpio>;
+                       interrupts = <TEGRA_GPIO(C, 7) IRQ_TYPE_LEVEL_LOW>;
+                       reg = <0>;
+
+                       google,cros-ec-spi-msg-delay = <2000>;
+
+                       i2c_20: i2c-tunnel {
+                               compatible = "google,cros-ec-i2c-tunnel";
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               google,remote-bus = <0>;
+
+                               charger: bq24735 {
+                                       compatible = "ti,bq24735";
+                                       reg = <0x9>;
+                                       interrupt-parent = <&gpio>;
+                                       interrupts = <TEGRA_GPIO(J, 0)
+                                                       GPIO_ACTIVE_HIGH>;
+                                       ti,ac-detect-gpios = <&gpio
+                                                       TEGRA_GPIO(J, 0)
+                                                       GPIO_ACTIVE_HIGH>;
+                               };
+
+                               battery: smart-battery {
+                                       compatible = "sbs,sbs-battery";
+                                       reg = <0xb>;
+                                       battery-name = "battery";
+                                       sbs,i2c-retry-count = <2>;
+                                       sbs,poll-retry-count = <10>;
+                               /*      power-supplies = <&charger>; */
+                               };
+                       };
+
+                       keyboard-controller {
+                               compatible = "google,cros-ec-keyb";
+                               keypad,num-rows = <8>;
+                               keypad,num-columns = <13>;
+                               google,needs-ghost-filter;
+                               linux,keymap =
+                                       <MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)
+                                        MATRIX_KEY(0x00, 0x02, KEY_F1)
+                                        MATRIX_KEY(0x00, 0x03, KEY_B)
+                                        MATRIX_KEY(0x00, 0x04, KEY_F10)
+                                        MATRIX_KEY(0x00, 0x06, KEY_N)
+                                        MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
+                                        MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
+
+                                        MATRIX_KEY(0x01, 0x01, KEY_ESC)
+                                        MATRIX_KEY(0x01, 0x02, KEY_F4)
+                                        MATRIX_KEY(0x01, 0x03, KEY_G)
+                                        MATRIX_KEY(0x01, 0x04, KEY_F7)
+                                        MATRIX_KEY(0x01, 0x06, KEY_H)
+                                        MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
+                                        MATRIX_KEY(0x01, 0x09, KEY_F9)
+                                        MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
+
+                                        MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
+                                        MATRIX_KEY(0x02, 0x01, KEY_TAB)
+                                        MATRIX_KEY(0x02, 0x02, KEY_F3)
+                                        MATRIX_KEY(0x02, 0x03, KEY_T)
+                                        MATRIX_KEY(0x02, 0x04, KEY_F6)
+                                        MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)
+                                        MATRIX_KEY(0x02, 0x06, KEY_Y)
+                                        MATRIX_KEY(0x02, 0x07, KEY_102ND)
+                                        MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
+                                        MATRIX_KEY(0x02, 0x09, KEY_F8)
+
+                                        MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
+                                        MATRIX_KEY(0x03, 0x02, KEY_F2)
+                                        MATRIX_KEY(0x03, 0x03, KEY_5)
+                                        MATRIX_KEY(0x03, 0x04, KEY_F5)
+                                        MATRIX_KEY(0x03, 0x06, KEY_6)
+                                        MATRIX_KEY(0x03, 0x08, KEY_MINUS)
+                                        MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
+
+                                        MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
+                                        MATRIX_KEY(0x04, 0x01, KEY_A)
+                                        MATRIX_KEY(0x04, 0x02, KEY_D)
+                                        MATRIX_KEY(0x04, 0x03, KEY_F)
+                                        MATRIX_KEY(0x04, 0x04, KEY_S)
+                                        MATRIX_KEY(0x04, 0x05, KEY_K)
+                                        MATRIX_KEY(0x04, 0x06, KEY_J)
+                                        MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)
+                                        MATRIX_KEY(0x04, 0x09, KEY_L)
+                                        MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)
+                                        MATRIX_KEY(0x04, 0x0b, KEY_ENTER)
+
+                                        MATRIX_KEY(0x05, 0x01, KEY_Z)
+                                        MATRIX_KEY(0x05, 0x02, KEY_C)
+                                        MATRIX_KEY(0x05, 0x03, KEY_V)
+                                        MATRIX_KEY(0x05, 0x04, KEY_X)
+                                        MATRIX_KEY(0x05, 0x05, KEY_COMMA)
+                                        MATRIX_KEY(0x05, 0x06, KEY_M)
+                                        MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)
+                                        MATRIX_KEY(0x05, 0x08, KEY_SLASH)
+                                        MATRIX_KEY(0x05, 0x09, KEY_DOT)
+                                        MATRIX_KEY(0x05, 0x0b, KEY_SPACE)
+
+                                        MATRIX_KEY(0x06, 0x01, KEY_1)
+                                        MATRIX_KEY(0x06, 0x02, KEY_3)
+                                        MATRIX_KEY(0x06, 0x03, KEY_4)
+                                        MATRIX_KEY(0x06, 0x04, KEY_2)
+                                        MATRIX_KEY(0x06, 0x05, KEY_8)
+                                        MATRIX_KEY(0x06, 0x06, KEY_7)
+                                        MATRIX_KEY(0x06, 0x08, KEY_0)
+                                        MATRIX_KEY(0x06, 0x09, KEY_9)
+                                        MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT)
+                                        MATRIX_KEY(0x06, 0x0b, KEY_DOWN)
+                                        MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)
+
+                                        MATRIX_KEY(0x07, 0x01, KEY_Q)
+                                        MATRIX_KEY(0x07, 0x02, KEY_E)
+                                        MATRIX_KEY(0x07, 0x03, KEY_R)
+                                        MATRIX_KEY(0x07, 0x04, KEY_W)
+                                        MATRIX_KEY(0x07, 0x05, KEY_I)
+                                        MATRIX_KEY(0x07, 0x06, KEY_U)
+                                        MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT)
+                                        MATRIX_KEY(0x07, 0x08, KEY_P)
+                                        MATRIX_KEY(0x07, 0x09, KEY_O)
+                                        MATRIX_KEY(0x07, 0x0b, KEY_UP)
+                                        MATRIX_KEY(0x07, 0x0c, KEY_LEFT)>;
+                       };
+               };
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+               nvidia,suspend-mode = <0>;
+               #wake-cells = <3>;
+               nvidia,cpu-pwr-good-time = <500>;
+               nvidia,cpu-pwr-off-time = <300>;
+               nvidia,core-pwr-good-time = <641 3845>;
+               nvidia,core-pwr-off-time = <61036>;
+               nvidia,core-power-req-active-high;
+               nvidia,sys-clock-req-active-high;
+               nvidia,reset-gpio = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>;
+       };
+
+       /* WIFI/BT module */
+       sdhci@0,700b0000 {
+               status = "disabled";
+       };
+
+       /* external SD/MMC */
+       sdhci@0,700b0400 {
+               cd-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
+               power-gpios = <&gpio TEGRA_GPIO(R, 0) GPIO_ACTIVE_HIGH>;
+               wp-gpios = <&gpio TEGRA_GPIO(Q, 4) GPIO_ACTIVE_HIGH>;
+               status = "okay";
+               bus-width = <4>;
+               vqmmc-supply = <&vddio_sdmmc3>;
+       };
+
+       /* EMMC 4.51 */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       usb@0,7d000000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d000000 {
+               status = "okay";
+               vbus-supply = <&vdd_usb1_vbus>;
+       };
+
+       usb@0,7d004000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d004000 {
+               status = "okay";
+               vbus-supply = <&vdd_run_cam>;
+       };
+
+       usb@0,7d008000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d008000 {
+               status = "okay";
+               vbus-supply = <&vdd_usb3_vbus>;
+       };
+
+       backlight: backlight {
+               compatible = "pwm-backlight";
+
+               enable-gpios = <&gpio TEGRA_GPIO(H, 2) GPIO_ACTIVE_HIGH>;
+               power-supply = <&vdd_led>;
+               pwms = <&pwm 1 1000000>;
+
+               brightness-levels = <0 4 8 16 32 64 128 255>;
+               default-brightness-level = <6>;
+
+               backlight-boot-off;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg=<0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               lid {
+                       label = "Lid";
+                       gpios = <&gpio TEGRA_GPIO(R, 4) GPIO_ACTIVE_LOW>;
+                       linux,input-type = <5>;
+                       linux,code = <0>;
+                       debounce-interval = <1>;
+                       gpio-key,wakeup;
+               };
+
+               power {
+                       label = "Power";
+                       gpios = <&gpio TEGRA_GPIO(Q, 0) GPIO_ACTIVE_LOW>;
+                       linux,code = <KEY_POWER>;
+                       debounce-interval = <10>;
+                       gpio-key,wakeup;
+               };
+       };
+
+       panel: panel {
+               compatible = "innolux,n116bge", "simple-panel";
+               backlight = <&backlight>;
+               ddc-i2c-bus = <&dpaux>;
+       };
+
+       regulators {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               vdd_mux: regulator@0 {
+                       compatible = "regulator-fixed";
+                       reg = <0>;
+                       regulator-name = "+VDD_MUX";
+                       regulator-min-microvolt = <19000000>;
+                       regulator-max-microvolt = <19000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+               };
+
+               vdd_5v0_sys: regulator@1 {
+                       compatible = "regulator-fixed";
+                       reg = <1>;
+                       regulator-name = "+5V_SYS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_3v3_sys: regulator@2 {
+                       compatible = "regulator-fixed";
+                       reg = <2>;
+                       regulator-name = "+3.3V_SYS";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_3v3_run: regulator@3 {
+                       compatible = "regulator-fixed";
+                       reg = <3>;
+                       regulator-name = "+3.3V_RUN";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&as3722 1 GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_3v3_sys>;
+               };
+
+               vdd_3v3_hdmi: regulator@4 {
+                       compatible = "regulator-fixed";
+                       reg = <4>;
+                       regulator-name = "+3.3V_AVDD_HDMI_AP_GATED";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       vin-supply = <&vdd_3v3_run>;
+               };
+
+               vdd_led: regulator@5 {
+                       compatible = "regulator-fixed";
+                       reg = <5>;
+                       regulator-name = "+VDD_LED";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&gpio TEGRA_GPIO(P, 2) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_usb1_vbus: regulator@6 {
+                       compatible = "regulator-fixed";
+                       reg = <6>;
+                       regulator-name = "+5V_USB_HS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(N, 4) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       gpio-open-drain;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_usb3_vbus: regulator@7 {
+                       compatible = "regulator-fixed";
+                       reg = <7>;
+                       regulator-name = "+5V_USB_SS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(N, 5) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       gpio-open-drain;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_3v3_panel: regulator@8 {
+                       compatible = "regulator-fixed";
+                       reg = <8>;
+                       regulator-name = "+3.3V_PANEL";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&as3722 4 GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_3v3_sys>;
+               };
+
+               vdd_hdmi_pll: regulator@9 {
+                       compatible = "regulator-fixed";
+                       reg = <9>;
+                       regulator-name = "+1.05V_RUN_AVDD_HDMI_PLL_AP_GATE";
+                       regulator-min-microvolt = <1050000>;
+                       regulator-max-microvolt = <1050000>;
+                       gpio = <&gpio TEGRA_GPIO(H, 7) GPIO_ACTIVE_LOW>;
+                       vin-supply = <&vdd_1v05_run>;
+               };
+
+               vdd_5v0_hdmi: regulator@10 {
+                       compatible = "regulator-fixed";
+                       reg = <10>;
+                       regulator-name = "+5V_HDMI_CON";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(K, 6) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_5v0_ts: regulator@11 {
+                       compatible = "regulator-fixed";
+                       reg = <11>;
+                       regulator-name = "+5V_VDD_TS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&gpio TEGRA_GPIO(K, 1) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi
new file mode 100644 (file)
index 0000000..e8bb460
--- /dev/null
@@ -0,0 +1,990 @@
+#include <dt-bindings/clock/tegra124-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra124-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+       compatible = "nvidia,tegra132", "nvidia,tegra124";
+       interrupt-parent = <&lic>;
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       pcie-controller@0,01003000 {
+               compatible = "nvidia,tegra124-pcie";
+               device_type = "pci";
+               reg = <0x0 0x01003000 0x0 0x00000800   /* PADS registers */
+                      0x0 0x01003800 0x0 0x00000800   /* AFI registers */
+                      0x0 0x02000000 0x0 0x10000000>; /* configuration space */
+               reg-names = "pads", "afi", "cs";
+               interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>, /* controller interrupt */
+                            <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>; /* MSI interrupt */
+               interrupt-names = "intr", "msi";
+
+               #interrupt-cells = <1>;
+               interrupt-map-mask = <0 0 0 0>;
+               interrupt-map = <0 0 0 0 &gic GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+
+               bus-range = <0x00 0xff>;
+               #address-cells = <3>;
+               #size-cells = <2>;
+
+               ranges = <0x82000000 0 0x01000000 0x0 0x01000000 0 0x00001000   /* port 0 configuration space */
+                         0x82000000 0 0x01001000 0x0 0x01001000 0 0x00001000   /* port 1 configuration space */
+                         0x81000000 0 0x0        0x0 0x12000000 0 0x00010000   /* downstream I/O (64 KiB) */
+                         0x82000000 0 0x13000000 0x0 0x13000000 0 0x0d000000   /* non-prefetchable memory (208 MiB) */
+                         0xc2000000 0 0x20000000 0x0 0x20000000 0 0x20000000>; /* prefetchable memory (512 MiB) */
+
+               clocks = <&tegra_car TEGRA124_CLK_PCIE>,
+                        <&tegra_car TEGRA124_CLK_AFI>,
+                        <&tegra_car TEGRA124_CLK_PLL_E>,
+                        <&tegra_car TEGRA124_CLK_CML0>;
+               clock-names = "pex", "afi", "pll_e", "cml";
+               resets = <&tegra_car 70>,
+                        <&tegra_car 72>,
+                        <&tegra_car 74>;
+               reset-names = "pex", "afi", "pcie_x";
+               status = "disabled";
+
+               phys = <&padctl TEGRA_XUSB_PADCTL_PCIE>;
+               phy-names = "pcie";
+
+               pci@1,0 {
+                       device_type = "pci";
+                       assigned-addresses = <0x82000800 0 0x01000000 0 0x1000>;
+                       reg = <0x000800 0 0 0 0>;
+                       status = "disabled";
+
+                       #address-cells = <3>;
+                       #size-cells = <2>;
+                       ranges;
+
+                       nvidia,num-lanes = <2>;
+               };
+
+               pci@2,0 {
+                       device_type = "pci";
+                       assigned-addresses = <0x82001000 0 0x01001000 0 0x1000>;
+                       reg = <0x001000 0 0 0 0>;
+                       status = "disabled";
+
+                       #address-cells = <3>;
+                       #size-cells = <2>;
+                       ranges;
+
+                       nvidia,num-lanes = <1>;
+               };
+       };
+
+       host1x@0,50000000 {
+               compatible = "nvidia,tegra124-host1x", "simple-bus";
+               reg = <0x0 0x50000000 0x0 0x00034000>;
+               interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+                            <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+               clocks = <&tegra_car TEGRA124_CLK_HOST1X>;
+               clock-names = "host1x";
+               resets = <&tegra_car 28>;
+               reset-names = "host1x";
+
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               ranges = <0 0x54000000 0 0x54000000 0 0x01000000>;
+
+               dc@0,54200000 {
+                       compatible = "nvidia,tegra124-dc";
+                       reg = <0x0 0x54200000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DISP1>,
+                                <&tegra_car TEGRA124_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 27>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DC>;
+
+                       nvidia,head = <0>;
+               };
+
+               dc@0,54240000 {
+                       compatible = "nvidia,tegra124-dc";
+                       reg = <0x0 0x54240000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DISP2>,
+                                <&tegra_car TEGRA124_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 26>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+                       nvidia,head = <1>;
+               };
+
+               hdmi@0,54280000 {
+                       compatible = "nvidia,tegra124-hdmi";
+                       reg = <0x0 0x54280000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_HDMI>,
+                                <&tegra_car TEGRA124_CLK_PLL_D2_OUT0>;
+                       clock-names = "hdmi", "parent";
+                       resets = <&tegra_car 51>;
+                       reset-names = "hdmi";
+                       status = "disabled";
+               };
+
+               sor@0,54540000 {
+                       compatible = "nvidia,tegra124-sor";
+                       reg = <0x0 0x54540000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_SOR0>,
+                                <&tegra_car TEGRA124_CLK_PLL_D_OUT0>,
+                                <&tegra_car TEGRA124_CLK_PLL_DP>,
+                                <&tegra_car TEGRA124_CLK_CLK_M>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 182>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       compatible = "nvidia,tegra124-dpaux";
+                       reg = <0x0 0x545c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DPAUX>,
+                                <&tegra_car TEGRA124_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 181>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+       };
+
+       gic: interrupt-controller@0,50041000 {
+               compatible = "arm,cortex-a15-gic";
+               #interrupt-cells = <3>;
+               interrupt-controller;
+               reg = <0x0 0x50041000 0x0 0x1000>,
+                     <0x0 0x50042000 0x0 0x2000>,
+                     <0x0 0x50044000 0x0 0x2000>,
+                     <0x0 0x50046000 0x0 0x2000>;
+               interrupts = <GIC_PPI 9
+                       (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+               interrupt-parent = <&gic>;
+       };
+
+       gpu@0,57000000 {
+               compatible = "nvidia,gk20a";
+               reg = <0x0 0x57000000 0x0 0x01000000>,
+                     <0x0 0x58000000 0x0 0x01000000>;
+               interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "stall", "nonstall";
+               clocks = <&tegra_car TEGRA124_CLK_GPU>,
+                        <&tegra_car TEGRA124_CLK_PLL_P_OUT5>;
+               clock-names = "gpu", "pwr";
+               resets = <&tegra_car 184>;
+               reset-names = "gpu";
+               status = "disabled";
+       };
+
+       lic: interrupt-controller@60004000 {
+               compatible = "nvidia,tegra124-ictlr", "nvidia,tegra30-ictlr";
+               reg = <0x0 0x60004000 0x0 0x100>,
+                     <0x0 0x60004100 0x0 0x100>,
+                     <0x0 0x60004200 0x0 0x100>,
+                     <0x0 0x60004300 0x0 0x100>,
+                     <0x0 0x60004400 0x0 0x100>;
+               interrupt-controller;
+               #interrupt-cells = <3>;
+               interrupt-parent = <&gic>;
+       };
+
+       timer@0,60005000 {
+               compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer";
+               reg = <0x0 0x60005000 0x0 0x400>;
+               interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_TIMER>;
+               clock-names = "timer";
+       };
+
+       tegra_car: clock@0,60006000 {
+               compatible = "nvidia,tegra132-car";
+               reg = <0x0 0x60006000 0x0 0x1000>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+               nvidia,external-memory-controller = <&emc>;
+       };
+
+       flow-controller@0,60007000 {
+               compatible = "nvidia,tegra124-flowctrl";
+               reg = <0x0 0x60007000 0x0 0x1000>;
+       };
+
+       actmon@0,6000c800 {
+               compatible = "nvidia,tegra124-actmon";
+               reg = <0x0 0x6000c800 0x0 0x400>;
+               interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
+                        <&tegra_car TEGRA124_CLK_EMC>;
+               clock-names = "actmon", "emc";
+               resets = <&tegra_car 119>;
+               reset-names = "actmon";
+       };
+
+       gpio: gpio@0,6000d000 {
+               compatible = "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+               reg = <0x0 0x6000d000 0x0 0x1000>;
+               interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               #interrupt-cells = <2>;
+               interrupt-controller;
+       };
+
+       apbdma: dma@0,60020000 {
+               compatible = "nvidia,tegra124-apbdma", "nvidia,tegra148-apbdma";
+               reg = <0x0 0x60020000 0x0 0x1400>;
+               interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_APBDMA>;
+               clock-names = "dma";
+               resets = <&tegra_car 34>;
+               reset-names = "dma";
+               #dma-cells = <1>;
+       };
+
+       apbmisc@0,70000800 {
+               compatible = "nvidia,tegra124-apbmisc", "nvidia,tegra20-apbmisc";
+               reg = <0x0 0x70000800 0x0 0x64>,   /* Chip revision */
+                     <0x0 0x7000e864 0x0 0x04>;   /* Strapping options */
+       };
+
+       pinmux: pinmux@0,70000868 {
+               compatible = "nvidia,tegra124-pinmux";
+               reg = <0x0 0x70000868 0x0 0x164>, /* Pad control registers */
+                     <0x0 0x70003000 0x0 0x434>, /* Mux registers */
+                     <0x0 0x70000820 0x0 0x008>; /* MIPI pad control */
+       };
+
+       /*
+        * There are two serial driver i.e. 8250 based simple serial
+        * driver and APB DMA based serial driver for higher baudrate
+        * and performace. To enable the 8250 based driver, the compatible
+        * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+        * the APB DMA based serial driver, the comptible is
+        * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+        */
+       uarta: serial@0,70006000 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006000 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTA>;
+               clock-names = "serial";
+               resets = <&tegra_car 6>;
+               reset-names = "serial";
+               dmas = <&apbdma 8>, <&apbdma 8>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartb: serial@0,70006040 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006040 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTB>;
+               clock-names = "serial";
+               resets = <&tegra_car 7>;
+               reset-names = "serial";
+               dmas = <&apbdma 9>, <&apbdma 9>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartc: serial@0,70006200 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006200 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTC>;
+               clock-names = "serial";
+               resets = <&tegra_car 55>;
+               reset-names = "serial";
+               dmas = <&apbdma 10>, <&apbdma 10>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartd: serial@0,70006300 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006300 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTD>;
+               clock-names = "serial";
+               resets = <&tegra_car 65>;
+               reset-names = "serial";
+               dmas = <&apbdma 19>, <&apbdma 19>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       pwm: pwm@0,7000a000 {
+               compatible = "nvidia,tegra124-pwm", "nvidia,tegra20-pwm";
+               reg = <0x0 0x7000a000 0x0 0x100>;
+               #pwm-cells = <2>;
+               clocks = <&tegra_car TEGRA124_CLK_PWM>;
+               clock-names = "pwm";
+               resets = <&tegra_car 17>;
+               reset-names = "pwm";
+               status = "disabled";
+       };
+
+       i2c@0,7000c000 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c000 0x0 0x100>;
+               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C1>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 12>;
+               reset-names = "i2c";
+               dmas = <&apbdma 21>, <&apbdma 21>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c400 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c400 0x0 0x100>;
+               interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C2>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 54>;
+               reset-names = "i2c";
+               dmas = <&apbdma 22>, <&apbdma 22>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c500 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c500 0x0 0x100>;
+               interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C3>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 67>;
+               reset-names = "i2c";
+               dmas = <&apbdma 23>, <&apbdma 23>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c700 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c700 0x0 0x100>;
+               interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C4>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 103>;
+               reset-names = "i2c";
+               dmas = <&apbdma 26>, <&apbdma 26>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d000 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d000 0x0 0x100>;
+               interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C5>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 47>;
+               reset-names = "i2c";
+               dmas = <&apbdma 24>, <&apbdma 24>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d100 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d100 0x0 0x100>;
+               interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C6>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 166>;
+               reset-names = "i2c";
+               dmas = <&apbdma 30>, <&apbdma 30>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d400 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d400 0x0 0x200>;
+               interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC1>;
+               clock-names = "spi";
+               resets = <&tegra_car 41>;
+               reset-names = "spi";
+               dmas = <&apbdma 15>, <&apbdma 15>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d600 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d600 0x0 0x200>;
+               interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC2>;
+               clock-names = "spi";
+               resets = <&tegra_car 44>;
+               reset-names = "spi";
+               dmas = <&apbdma 16>, <&apbdma 16>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d800 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d800 0x0 0x200>;
+               interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC3>;
+               clock-names = "spi";
+               resets = <&tegra_car 46>;
+               reset-names = "spi";
+               dmas = <&apbdma 17>, <&apbdma 17>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000da00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000da00 0x0 0x200>;
+               interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC4>;
+               clock-names = "spi";
+               resets = <&tegra_car 68>;
+               reset-names = "spi";
+               dmas = <&apbdma 18>, <&apbdma 18>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000dc00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000dc00 0x0 0x200>;
+               interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC5>;
+               clock-names = "spi";
+               resets = <&tegra_car 104>;
+               reset-names = "spi";
+               dmas = <&apbdma 27>, <&apbdma 27>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000de00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000de00 0x0 0x200>;
+               interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC6>;
+               clock-names = "spi";
+               resets = <&tegra_car 105>;
+               reset-names = "spi";
+               dmas = <&apbdma 28>, <&apbdma 28>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       rtc@0,7000e000 {
+               compatible = "nvidia,tegra124-rtc", "nvidia,tegra20-rtc";
+               reg = <0x0 0x7000e000 0x0 0x100>;
+               interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_RTC>;
+               clock-names = "rtc";
+       };
+
+       pmc@0,7000e400 {
+               compatible = "nvidia,tegra124-pmc";
+               reg = <0x0 0x7000e400 0x0 0x400>;
+               clocks = <&tegra_car TEGRA124_CLK_PCLK>, <&clk32k_in>;
+               clock-names = "pclk", "clk32k_in";
+       };
+
+       fuse@0,7000f800 {
+               compatible = "nvidia,tegra124-efuse";
+               reg = <0x0 0x7000f800 0x0 0x400>;
+               clocks = <&tegra_car TEGRA124_CLK_FUSE>;
+               clock-names = "fuse";
+               resets = <&tegra_car 39>;
+               reset-names = "fuse";
+       };
+
+       mc: memory-controller@0,70019000 {
+               compatible = "nvidia,tegra132-mc";
+               reg = <0x0 0x70019000 0x0 0x1000>;
+               clocks = <&tegra_car TEGRA124_CLK_MC>;
+               clock-names = "mc";
+
+               interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+               #iommu-cells = <1>;
+       };
+
+       emc: emc@0,7001b000 {
+               compatible = "nvidia,tegra132-emc", "nvidia,tegra124-emc";
+               reg = <0x0 0x7001b000 0x0 0x1000>;
+
+               nvidia,memory-controller = <&mc>;
+       };
+
+       sata@0,70020000 {
+               compatible = "nvidia,tegra124-ahci";
+               reg = <0x0 0x70027000 0x0 0x2000>, /* AHCI */
+                     <0x0 0x70020000 0x0 0x7000>; /* SATA */
+               interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SATA>,
+                        <&tegra_car TEGRA124_CLK_SATA_OOB>,
+                        <&tegra_car TEGRA124_CLK_CML1>,
+                        <&tegra_car TEGRA124_CLK_PLL_E>;
+               clock-names = "sata", "sata-oob", "cml1", "pll_e";
+               resets = <&tegra_car 124>,
+                        <&tegra_car 123>,
+                        <&tegra_car 129>;
+               reset-names = "sata", "sata-oob", "sata-cold";
+               phys = <&padctl TEGRA_XUSB_PADCTL_SATA>;
+               phy-names = "sata-phy";
+               status = "disabled";
+       };
+
+       hda@0,70030000 {
+               compatible = "nvidia,tegra132-hda", "nvidia,tegra124-hda",
+                            "nvidia,tegra30-hda";
+               reg = <0x0 0x70030000 0x0 0x10000>;
+               interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_HDA>,
+                        <&tegra_car TEGRA124_CLK_HDA2HDMI>,
+                        <&tegra_car TEGRA124_CLK_HDA2CODEC_2X>;
+               clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+               resets = <&tegra_car 125>, /* hda */
+                        <&tegra_car 128>, /* hda2hdmi */
+                        <&tegra_car 111>; /* hda2codec_2x */
+               reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+               status = "disabled";
+       };
+
+       padctl: padctl@0,7009f000 {
+               compatible = "nvidia,tegra132-xusb-padctl",
+                            "nvidia,tegra124-xusb-padctl";
+               reg = <0x0 0x7009f000 0x0 0x1000>;
+               resets = <&tegra_car 142>;
+               reset-names = "padctl";
+
+               #phy-cells = <1>;
+
+               phys {
+                       pcie-0 {
+                               status = "disabled";
+                       };
+
+                       sata-0 {
+                               status = "disabled";
+                       };
+
+                       usb3-0 {
+                               status = "disabled";
+                       };
+
+                       usb3-1 {
+                               status = "disabled";
+                       };
+
+                       utmi-0 {
+                               status = "disabled";
+                       };
+
+                       utmi-1 {
+                               status = "disabled";
+                       };
+
+                       utmi-2 {
+                               status = "disabled";
+                       };
+               };
+       };
+
+       sdhci@0,700b0000 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0000 0x0 0x200>;
+               interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC1>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 14>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0200 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0200 0x0 0x200>;
+               interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC2>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 9>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0400 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0400 0x0 0x200>;
+               interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC3>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 69>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0600 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0600 0x0 0x200>;
+               interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC4>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 15>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       soctherm: thermal-sensor@0,700e2000 {
+               compatible = "nvidia,tegra124-soctherm";
+               reg = <0x0 0x700e2000 0x0 0x1000>;
+               interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+                       <&tegra_car TEGRA124_CLK_SOC_THERM>;
+               clock-names = "tsensor", "soctherm";
+               resets = <&tegra_car 78>;
+               reset-names = "soctherm";
+               #thermal-sensor-cells = <1>;
+       };
+
+       ahub@0,70300000 {
+               compatible = "nvidia,tegra124-ahub";
+               reg = <0x0 0x70300000 0x0 0x200>,
+                     <0x0 0x70300800 0x0 0x800>,
+                     <0x0 0x70300200 0x0 0x600>;
+               interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_D_AUDIO>,
+                        <&tegra_car TEGRA124_CLK_APBIF>;
+               clock-names = "d_audio", "apbif";
+               resets = <&tegra_car 106>, /* d_audio */
+                        <&tegra_car 107>, /* apbif */
+                        <&tegra_car 30>,  /* i2s0 */
+                        <&tegra_car 11>,  /* i2s1 */
+                        <&tegra_car 18>,  /* i2s2 */
+                        <&tegra_car 101>, /* i2s3 */
+                        <&tegra_car 102>, /* i2s4 */
+                        <&tegra_car 108>, /* dam0 */
+                        <&tegra_car 109>, /* dam1 */
+                        <&tegra_car 110>, /* dam2 */
+                        <&tegra_car 10>,  /* spdif */
+                        <&tegra_car 153>, /* amx */
+                        <&tegra_car 185>, /* amx1 */
+                        <&tegra_car 154>, /* adx */
+                        <&tegra_car 180>, /* adx1 */
+                        <&tegra_car 186>, /* afc0 */
+                        <&tegra_car 187>, /* afc1 */
+                        <&tegra_car 188>, /* afc2 */
+                        <&tegra_car 189>, /* afc3 */
+                        <&tegra_car 190>, /* afc4 */
+                        <&tegra_car 191>; /* afc5 */
+               reset-names = "d_audio", "apbif", "i2s0", "i2s1", "i2s2",
+                             "i2s3", "i2s4", "dam0", "dam1", "dam2",
+                             "spdif", "amx", "amx1", "adx", "adx1",
+                             "afc0", "afc1", "afc2", "afc3", "afc4", "afc5";
+               dmas = <&apbdma 1>, <&apbdma 1>,
+                      <&apbdma 2>, <&apbdma 2>,
+                      <&apbdma 3>, <&apbdma 3>,
+                      <&apbdma 4>, <&apbdma 4>,
+                      <&apbdma 6>, <&apbdma 6>,
+                      <&apbdma 7>, <&apbdma 7>,
+                      <&apbdma 12>, <&apbdma 12>,
+                      <&apbdma 13>, <&apbdma 13>,
+                      <&apbdma 14>, <&apbdma 14>,
+                      <&apbdma 29>, <&apbdma 29>;
+               dma-names = "rx0", "tx0", "rx1", "tx1", "rx2", "tx2",
+                           "rx3", "tx3", "rx4", "tx4", "rx5", "tx5",
+                           "rx6", "tx6", "rx7", "tx7", "rx8", "tx8",
+                           "rx9", "tx9";
+               ranges;
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               tegra_i2s0: i2s@0,70301000 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301000 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <4 4>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S0>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 30>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s1: i2s@0,70301100 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301100 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <5 5>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S1>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 11>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s2: i2s@0,70301200 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301200 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <6 6>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S2>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 18>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s3: i2s@0,70301300 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301300 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <7 7>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S3>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 101>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s4: i2s@0,70301400 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301400 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <8 8>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S4>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 102>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+       };
+
+       usb@0,7d000000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d000000 0x0 0x4000>;
+               interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "usb";
+               resets = <&tegra_car 22>;
+               reset-names = "usb";
+               nvidia,phy = <&phy1>;
+               status = "disabled";
+       };
+
+       phy1: usb-phy@0,7d000000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d000000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USBD>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 22>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               nvidia,has-utmi-pad-registers;
+               status = "disabled";
+       };
+
+       usb@0,7d004000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d004000 0x0 0x4000>;
+               interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB2>;
+               clock-names = "usb";
+               resets = <&tegra_car 58>;
+               reset-names = "usb";
+               nvidia,phy = <&phy2>;
+               status = "disabled";
+       };
+
+       phy2: usb-phy@0,7d004000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d004000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB2>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 58>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       usb@0,7d008000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d008000 0x0 0x4000>;
+               interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB3>;
+               clock-names = "usb";
+               resets = <&tegra_car 59>;
+               reset-names = "usb";
+               nvidia,phy = <&phy3>;
+               status = "disabled";
+       };
+
+       phy3: usb-phy@0,7d008000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d008000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB3>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 59>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       compatible = "nvidia,denver", "arm,armv8";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       device_type = "cpu";
+                       compatible = "nvidia,denver", "arm,armv8";
+                       reg = <1>;
+               };
+       };
+
+       timer {
+               compatible = "arm,armv7-timer";
+               interrupts = <GIC_PPI 13
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 10
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+               interrupt-parent = <&gic>;
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
new file mode 100644 (file)
index 0000000..2b7f889
--- /dev/null
@@ -0,0 +1,45 @@
+#include "tegra210.dtsi"
+
+/ {
+       model = "NVIDIA Jetson TX1";
+       compatible = "nvidia,p2180", "nvidia,tegra210";
+
+       aliases {
+               rtc1 = "/rtc@0,7000e000";
+               serial0 = &uarta;
+       };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x1 0x0>;
+       };
+
+       /* debug port */
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+       };
+
+       /* eMMC */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg = <0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts
new file mode 100644 (file)
index 0000000..1ddd851
--- /dev/null
@@ -0,0 +1,9 @@
+/dts-v1/;
+
+#include "tegra210-p2530.dtsi"
+#include "tegra210-p2595.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2371 (P2530/P2595) reference design";
+       compatible = "nvidia,p2371-0000", "nvidia,tegra210";
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts
new file mode 100644 (file)
index 0000000..683b339
--- /dev/null
@@ -0,0 +1,9 @@
+/dts-v1/;
+
+#include "tegra210-p2180.dtsi"
+#include "tegra210-p2597.dtsi"
+
+/ {
+       model = "NVIDIA Jetson TX1 Developer Kit";
+       compatible = "nvidia,p2371-2180", "nvidia,tegra210";
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi
new file mode 100644 (file)
index 0000000..ece0dec
--- /dev/null
@@ -0,0 +1,50 @@
+#include "tegra210.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2530 main board";
+       compatible = "nvidia,p2530", "nvidia,tegra210";
+
+       aliases {
+               rtc1 = "/rtc@0,7000e000";
+               serial0 = &uarta;
+       };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x0 0xc0000000>;
+       };
+
+       /* debug port */
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       i2c@0,7000d000 {
+               status = "okay";
+               clock-frequency = <400000>;
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+       };
+
+       /* eMMC */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg = <0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2571.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2571.dts
new file mode 100644 (file)
index 0000000..58d27dd
--- /dev/null
@@ -0,0 +1,1302 @@
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra210-p2530.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2571 reference design";
+       compatible = "nvidia,p2571", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,function = "aud";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "sys";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi
new file mode 100644 (file)
index 0000000..f3f9139
--- /dev/null
@@ -0,0 +1,1272 @@
+/ {
+       model = "NVIDIA Tegra210 P2595 I/O board";
+       compatible = "nvidia,p2595", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "pe";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "dmic1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "dmic1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "dmic2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "dmic2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "displaya";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,function = "aud";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "sys";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
new file mode 100644 (file)
index 0000000..be3eccb
--- /dev/null
@@ -0,0 +1,1270 @@
+/ {
+       model = "NVIDIA Tegra210 P2597 I/O board";
+       compatible = "nvidia,p2597", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "pe";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "sata";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,function = "touch";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "displaya";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,function = "dp";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+
+       /* MMC/SD */
+       sdhci@0,700b0000 {
+               status = "okay";
+               bus-width = <4>;
+               no-1-8-v;
+
+               cd-gpios = <&gpio TEGRA_GPIO(Z, 1) GPIO_ACTIVE_LOW>;
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
new file mode 100644 (file)
index 0000000..bc23f4d
--- /dev/null
@@ -0,0 +1,805 @@
+#include <dt-bindings/clock/tegra210-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra210-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+       compatible = "nvidia,tegra210";
+       interrupt-parent = <&lic>;
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       host1x@0,50000000 {
+               compatible = "nvidia,tegra210-host1x", "simple-bus";
+               reg = <0x0 0x50000000 0x0 0x00034000>;
+               interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+                            <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+               clocks = <&tegra_car TEGRA210_CLK_HOST1X>;
+               clock-names = "host1x";
+               resets = <&tegra_car 28>;
+               reset-names = "host1x";
+
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               ranges = <0x0 0x54000000 0x0 0x54000000 0x0 0x01000000>;
+
+               dpaux1: dpaux@0,54040000 {
+                       compatible = "nvidia,tegra210-dpaux";
+                       reg = <0x0 0x54040000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DPAUX1>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 207>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+
+               vi@0,54080000 {
+                       compatible = "nvidia,tegra210-vi";
+                       reg = <0x0 0x54080000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               tsec@0,54100000 {
+                       compatible = "nvidia,tegra210-tsec";
+                       reg = <0x0 0x54100000 0x0 0x00040000>;
+               };
+
+               dc@0,54200000 {
+                       compatible = "nvidia,tegra210-dc";
+                       reg = <0x0 0x54200000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DISP1>,
+                                <&tegra_car TEGRA210_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 27>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DC>;
+
+                       nvidia,head = <0>;
+               };
+
+               dc@0,54240000 {
+                       compatible = "nvidia,tegra210-dc";
+                       reg = <0x0 0x54240000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DISP2>,
+                                <&tegra_car TEGRA210_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 26>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+                       nvidia,head = <1>;
+               };
+
+               dsi@0,54300000 {
+                       compatible = "nvidia,tegra210-dsi";
+                       reg = <0x0 0x54300000 0x0 0x00040000>;
+                       clocks = <&tegra_car TEGRA210_CLK_DSIA>,
+                                <&tegra_car TEGRA210_CLK_DSIALP>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+                       clock-names = "dsi", "lp", "parent";
+                       resets = <&tegra_car 48>;
+                       reset-names = "dsi";
+                       nvidia,mipi-calibrate = <&mipi 0x0c0>; /* DSIA & DSIB pads */
+
+                       status = "disabled";
+
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+               };
+
+               vic@0,54340000 {
+                       compatible = "nvidia,tegra210-vic";
+                       reg = <0x0 0x54340000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               nvjpg@0,54380000 {
+                       compatible = "nvidia,tegra210-nvjpg";
+                       reg = <0x0 0x54380000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               dsi@0,54400000 {
+                       compatible = "nvidia,tegra210-dsi";
+                       reg = <0x0 0x54400000 0x0 0x00040000>;
+                       clocks = <&tegra_car TEGRA210_CLK_DSIB>,
+                                <&tegra_car TEGRA210_CLK_DSIBLP>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+                       clock-names = "dsi", "lp", "parent";
+                       resets = <&tegra_car 82>;
+                       reset-names = "dsi";
+                       nvidia,mipi-calibrate = <&mipi 0x300>; /* DSIC & DSID pads */
+
+                       status = "disabled";
+
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+               };
+
+               nvdec@0,54480000 {
+                       compatible = "nvidia,tegra210-nvdec";
+                       reg = <0x0 0x54480000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               nvenc@0,544c0000 {
+                       compatible = "nvidia,tegra210-nvenc";
+                       reg = <0x0 0x544c0000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               tsec@0,54500000 {
+                       compatible = "nvidia,tegra210-tsec";
+                       reg = <0x0 0x54500000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               sor@0,54540000 {
+                       compatible = "nvidia,tegra210-sor";
+                       reg = <0x0 0x54540000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_SOR0>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>,
+                                <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 182>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               sor@0,54580000 {
+                       compatible = "nvidia,tegra210-sor1";
+                       reg = <0x0 0x54580000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_SOR1>,
+                                <&tegra_car TEGRA210_CLK_PLL_D2_OUT0>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>,
+                                <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 183>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       compatible = "nvidia,tegra124-dpaux";
+                       reg = <0x0 0x545c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DPAUX>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 181>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+
+               isp@0,54600000 {
+                       compatible = "nvidia,tegra210-isp";
+                       reg = <0x0 0x54600000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               isp@0,54680000 {
+                       compatible = "nvidia,tegra210-isp";
+                       reg = <0x0 0x54680000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               i2c@0,546c0000 {
+                       compatible = "nvidia,tegra210-i2c-vi";
+                       reg = <0x0 0x546c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+       };
+
+       gic: interrupt-controller@0,50041000 {
+               compatible = "arm,gic-400";
+               #interrupt-cells = <3>;
+               interrupt-controller;
+               reg = <0x0 0x50041000 0x0 0x1000>,
+                     <0x0 0x50042000 0x0 0x2000>,
+                     <0x0 0x50044000 0x0 0x2000>,
+                     <0x0 0x50046000 0x0 0x2000>;
+               interrupts = <GIC_PPI 9
+                       (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+               interrupt-parent = <&gic>;
+       };
+
+       gpu@0,57000000 {
+               compatible = "nvidia,gm20b";
+               reg = <0x0 0x57000000 0x0 0x01000000>,
+                     <0x0 0x58000000 0x0 0x01000000>;
+               interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "stall", "nonstall";
+               clocks = <&tegra_car TEGRA210_CLK_GPU>,
+                        <&tegra_car TEGRA210_CLK_PLL_P_OUT5>;
+               clock-names = "gpu", "pwr";
+               resets = <&tegra_car 184>;
+               reset-names = "gpu";
+               status = "disabled";
+       };
+
+       lic: interrupt-controller@0,60004000 {
+               compatible = "nvidia,tegra210-ictlr";
+               reg = <0x0 0x60004000 0x0 0x40>, /* primary controller */
+                     <0x0 0x60004100 0x0 0x40>, /* secondary controller */
+                     <0x0 0x60004200 0x0 0x40>, /* tertiary controller */
+                     <0x0 0x60004300 0x0 0x40>, /* quaternary controller */
+                     <0x0 0x60004400 0x0 0x40>, /* quinary controller */
+                     <0x0 0x60004500 0x0 0x40>; /* senary controller */
+               interrupt-controller;
+               #interrupt-cells = <3>;
+               interrupt-parent = <&gic>;
+       };
+
+       timer@0,60005000 {
+               compatible = "nvidia,tegra210-timer", "nvidia,tegra20-timer";
+               reg = <0x0 0x60005000 0x0 0x400>;
+               interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_TIMER>;
+               clock-names = "timer";
+       };
+
+       tegra_car: clock@0,60006000 {
+               compatible = "nvidia,tegra210-car";
+               reg = <0x0 0x60006000 0x0 0x1000>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+       };
+
+       flow-controller@0,60007000 {
+               compatible = "nvidia,tegra210-flowctrl";
+               reg = <0x0 0x60007000 0x0 0x1000>;
+       };
+
+       gpio: gpio@0,6000d000 {
+               compatible = "nvidia,tegra210-gpio", "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+               reg = <0x0 0x6000d000 0x0 0x1000>;
+               interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               #interrupt-cells = <2>;
+               interrupt-controller;
+       };
+
+       apbdma: dma@0,60020000 {
+               compatible = "nvidia,tegra210-apbdma", "nvidia,tegra148-apbdma";
+               reg = <0x0 0x60020000 0x0 0x1400>;
+               interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_APBDMA>;
+               clock-names = "dma";
+               resets = <&tegra_car 34>;
+               reset-names = "dma";
+               #dma-cells = <1>;
+       };
+
+       apbmisc@0,70000800 {
+               compatible = "nvidia,tegra210-apbmisc", "nvidia,tegra20-apbmisc";
+               reg = <0x0 0x70000800 0x0 0x64>,   /* Chip revision */
+                     <0x0 0x7000e864 0x0 0x04>;   /* Strapping options */
+       };
+
+       pinmux: pinmux@0,700008d4 {
+               compatible = "nvidia,tegra210-pinmux";
+               reg = <0x0 0x700008d4 0x0 0x29c>, /* Pad control registers */
+                     <0x0 0x70003000 0x0 0x294>; /* Mux registers */
+       };
+
+       /*
+        * There are two serial driver i.e. 8250 based simple serial
+        * driver and APB DMA based serial driver for higher baudrate
+        * and performace. To enable the 8250 based driver, the compatible
+        * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+        * the APB DMA based serial driver, the comptible is
+        * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+        */
+       uarta: serial@0,70006000 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006000 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTA>;
+               clock-names = "serial";
+               resets = <&tegra_car 6>;
+               reset-names = "serial";
+               dmas = <&apbdma 8>, <&apbdma 8>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartb: serial@0,70006040 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006040 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTB>;
+               clock-names = "serial";
+               resets = <&tegra_car 7>;
+               reset-names = "serial";
+               dmas = <&apbdma 9>, <&apbdma 9>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartc: serial@0,70006200 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006200 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTC>;
+               clock-names = "serial";
+               resets = <&tegra_car 55>;
+               reset-names = "serial";
+               dmas = <&apbdma 10>, <&apbdma 10>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartd: serial@0,70006300 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006300 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTD>;
+               clock-names = "serial";
+               resets = <&tegra_car 65>;
+               reset-names = "serial";
+               dmas = <&apbdma 19>, <&apbdma 19>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       pwm: pwm@0,7000a000 {
+               compatible = "nvidia,tegra210-pwm", "nvidia,tegra20-pwm";
+               reg = <0x0 0x7000a000 0x0 0x100>;
+               #pwm-cells = <2>;
+               clocks = <&tegra_car TEGRA210_CLK_PWM>;
+               clock-names = "pwm";
+               resets = <&tegra_car 17>;
+               reset-names = "pwm";
+               status = "disabled";
+       };
+
+       i2c@0,7000c000 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c000 0x0 0x100>;
+               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C1>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 12>;
+               reset-names = "i2c";
+               dmas = <&apbdma 21>, <&apbdma 21>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c400 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c400 0x0 0x100>;
+               interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C2>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 54>;
+               reset-names = "i2c";
+               dmas = <&apbdma 22>, <&apbdma 22>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c500 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c500 0x0 0x100>;
+               interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C3>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 67>;
+               reset-names = "i2c";
+               dmas = <&apbdma 23>, <&apbdma 23>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c700 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c700 0x0 0x100>;
+               interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C4>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 103>;
+               reset-names = "i2c";
+               dmas = <&apbdma 26>, <&apbdma 26>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d000 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d000 0x0 0x100>;
+               interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C5>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 47>;
+               reset-names = "i2c";
+               dmas = <&apbdma 24>, <&apbdma 24>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d100 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d100 0x0 0x100>;
+               interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C6>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 166>;
+               reset-names = "i2c";
+               dmas = <&apbdma 30>, <&apbdma 30>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d400 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d400 0x0 0x200>;
+               interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC1>;
+               clock-names = "spi";
+               resets = <&tegra_car 41>;
+               reset-names = "spi";
+               dmas = <&apbdma 15>, <&apbdma 15>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d600 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d600 0x0 0x200>;
+               interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC2>;
+               clock-names = "spi";
+               resets = <&tegra_car 44>;
+               reset-names = "spi";
+               dmas = <&apbdma 16>, <&apbdma 16>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d800 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d800 0x0 0x200>;
+               interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC3>;
+               clock-names = "spi";
+               resets = <&tegra_car 46>;
+               reset-names = "spi";
+               dmas = <&apbdma 17>, <&apbdma 17>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000da00 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000da00 0x0 0x200>;
+               interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC4>;
+               clock-names = "spi";
+               resets = <&tegra_car 68>;
+               reset-names = "spi";
+               dmas = <&apbdma 18>, <&apbdma 18>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       rtc@0,7000e000 {
+               compatible = "nvidia,tegra210-rtc", "nvidia,tegra20-rtc";
+               reg = <0x0 0x7000e000 0x0 0x100>;
+               interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_RTC>;
+               clock-names = "rtc";
+       };
+
+       pmc: pmc@0,7000e400 {
+               compatible = "nvidia,tegra210-pmc";
+               reg = <0x0 0x7000e400 0x0 0x400>;
+               clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
+               clock-names = "pclk", "clk32k_in";
+
+               #power-domain-cells = <1>;
+       };
+
+       fuse@0,7000f800 {
+               compatible = "nvidia,tegra210-efuse";
+               reg = <0x0 0x7000f800 0x0 0x400>;
+               clocks = <&tegra_car TEGRA210_CLK_FUSE>;
+               clock-names = "fuse";
+               resets = <&tegra_car 39>;
+               reset-names = "fuse";
+       };
+
+       mc: memory-controller@0,70019000 {
+               compatible = "nvidia,tegra210-mc";
+               reg = <0x0 0x70019000 0x0 0x1000>;
+               clocks = <&tegra_car TEGRA210_CLK_MC>;
+               clock-names = "mc";
+
+               interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+               #iommu-cells = <1>;
+       };
+
+       hda@0,70030000 {
+               compatible = "nvidia,tegra210-hda", "nvidia,tegra30-hda";
+               reg = <0x0 0x70030000 0x0 0x10000>;
+               interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_HDA>,
+                        <&tegra_car TEGRA210_CLK_HDA2HDMI>,
+                        <&tegra_car TEGRA210_CLK_HDA2CODEC_2X>;
+               clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+               resets = <&tegra_car 125>, /* hda */
+                        <&tegra_car 128>, /* hda2hdmi */
+                        <&tegra_car 111>; /* hda2codec_2x */
+               reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0000 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0000 0x0 0x200>;
+               interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 14>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0200 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0200 0x0 0x200>;
+               interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC2>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 9>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0400 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0400 0x0 0x200>;
+               interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC3>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 69>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0600 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0600 0x0 0x200>;
+               interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC4>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 15>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       mipi: mipi@0,700e3000 {
+               compatible = "nvidia,tegra210-mipi";
+               reg = <0x0 0x700e3000 0x0 0x100>;
+               clocks = <&tegra_car TEGRA210_CLK_MIPI_CAL>;
+               clock-names = "mipi-cal";
+               #nvidia,mipi-calibrate-cells = <1>;
+       };
+
+       spi@0,70410000 {
+               compatible = "nvidia,tegra210-qspi";
+               reg = <0x0 0x70410000 0x0 0x1000>;
+               interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_QSPI>;
+               clock-names = "qspi";
+               resets = <&tegra_car 211>;
+               reset-names = "qspi";
+               dmas = <&apbdma 5>, <&apbdma 5>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       usb@0,7d000000 {
+               compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d000000 0x0 0x4000>;
+               interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "usb";
+               resets = <&tegra_car 22>;
+               reset-names = "usb";
+               nvidia,phy = <&phy1>;
+               status = "disabled";
+       };
+
+       phy1: usb-phy@0,7d000000 {
+               compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d000000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USBD>,
+                        <&tegra_car TEGRA210_CLK_PLL_U>,
+                        <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 22>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               nvidia,has-utmi-pad-registers;
+               status = "disabled";
+       };
+
+       usb@0,7d004000 {
+               compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d004000 0x0 0x4000>;
+               interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USB2>;
+               clock-names = "usb";
+               resets = <&tegra_car 58>;
+               reset-names = "usb";
+               nvidia,phy = <&phy2>;
+               status = "disabled";
+       };
+
+       phy2: usb-phy@0,7d004000 {
+               compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d004000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USB2>,
+                        <&tegra_car TEGRA210_CLK_PLL_U>,
+                        <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 58>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <1>;
+               };
+
+               cpu@2 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <2>;
+               };
+
+               cpu@3 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <3>;
+               };
+       };
+
+       timer {
+               compatible = "arm,armv8-timer";
+               interrupts = <GIC_PPI 13
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 10
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+               interrupt-parent = <&gic>;
+       };
+};
index 12ed78aa6f0c27d4a76066b8064d8a3654db9ed0..d91e1f0225733750097f4533c7a2a9045c9ab175 100644 (file)
 #   $4 - default install path (blank if root directory)
 #
 
+verify () {
+       if [ ! -f "$1" ]; then
+               echo ""                                                   1>&2
+               echo " *** Missing file: $1"                              1>&2
+               echo ' *** You need to run "make" before "make install".' 1>&2
+               echo ""                                                   1>&2
+               exit 1
+       fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
 # User may have a custom install script
 if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
 if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
index 18ca9fb9e65f667c0679b3dbdb80166442723bb7..86581f793e398ba29eb1e12e77498405cdeb2e6d 100644 (file)
@@ -16,7 +16,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@ CONFIG_ARCH_EXYNOS7=y
 CONFIG_ARCH_LAYERSCAPE=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_SPRD=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@ CONFIG_ARCH_XGENE=y
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
 CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
 CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_XEN=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -100,7 +102,11 @@ CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -117,25 +123,23 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -239,6 +246,7 @@ CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_FTRACE is not set
 CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
index 05d9e16c0dfdd81a8a6ffe141e5c6f5cc96b3e7e..7a3d22a46faf502224fa397c527ab6338bac8933 100644 (file)
@@ -294,7 +294,7 @@ static struct crypto_alg aes_algs[] = { {
        .cra_blkcipher = {
                .min_keysize    = AES_MIN_KEY_SIZE,
                .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
+               .ivsize         = 0,
                .setkey         = aes_setkey,
                .encrypt        = ecb_encrypt,
                .decrypt        = ecb_decrypt,
@@ -371,7 +371,7 @@ static struct crypto_alg aes_algs[] = { {
        .cra_ablkcipher = {
                .min_keysize    = AES_MIN_KEY_SIZE,
                .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
+               .ivsize         = 0,
                .setkey         = ablk_set_key,
                .encrypt        = ablk_encrypt,
                .decrypt        = ablk_decrypt,
index 2731d3b25ed2e338e76966b5efd86658b7e22263..8ec88e5b290f9bc12a416841e73ef56414972b59 100644 (file)
@@ -103,6 +103,7 @@ static inline u64 gic_read_iar_common(void)
        u64 irqstat;
 
        asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+       dsb(sy);
        return irqstat;
 }
 
index 007a69fc4f408d5f2f7e58f4070b6cb354a5e022..5f3ab8c1db55cca8dbf4c9e1fc315e90335b6d60 100644 (file)
@@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                return -EFAULT;
 
        asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      sub     %w3, %w1, %w4\n"
@@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "      .align  3\n"
 "      .quad   1b, 4b, 2b, 4b\n"
 "      .popsection\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
        : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
        : "r" (oldval), "r" (newval), "Ir" (-EFAULT)
        : "memory");
index 738a95f93e493e3002ac8749857f8599c5ae3404..d201d4b396d15fee10129494b6d274bbf77a2320 100644 (file)
 #define TCR_EL2_MASK   (TCR_EL2_TG0 | TCR_EL2_SH0 | \
                         TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
 
-#define TCR_EL2_FLAGS  (TCR_EL2_RES1 | TCR_EL2_PS_40B)
-
 /* VTCR_EL2 Registers bits */
 #define VTCR_EL2_RES1          (1 << 31)
 #define VTCR_EL2_PS_MASK       (7 << 16)
 #define CPTR_EL2_TCPAC (1 << 31)
 #define CPTR_EL2_TTA   (1 << 20)
 #define CPTR_EL2_TFP   (1 << CPTR_EL2_TFP_SHIFT)
+#define CPTR_EL2_DEFAULT       0x000033ff
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_TDRA          (1 << 11)
index 3066328cd86b69a91274e0cb841059b428666140..779a5872a2c5fb5f9aa9b49af6f77391aefc2336 100644 (file)
@@ -127,10 +127,14 @@ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 {
-       u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+       u32 mode;
 
-       if (vcpu_mode_is_32bit(vcpu))
+       if (vcpu_mode_is_32bit(vcpu)) {
+               mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
                return mode > COMPAT_PSR_MODE_USR;
+       }
+
+       mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
 
        return mode != PSR_MODE_EL0t;
 }
index 9b2f5a9d019df493fa6021ee3ca6b4779401d8c4..ae615b9d9a551bab47bb4900e867c8d15db02bb1 100644 (file)
@@ -39,6 +39,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
 #include <asm/pgtable-types.h>
 
 extern void __cpu_clear_user_page(void *p, unsigned long user);
index 2d545d7aa80ba715f65cc82b671213fa5505b495..bf464de33f52f77d2520db0a12e53b3749ced6cc 100644 (file)
@@ -67,11 +67,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT           (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL       (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL            __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO         __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX        __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -153,6 +153,7 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)         (!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)          (!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)          (!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)          (!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)      (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@ extern struct page *empty_zero_page;
 #define pte_dirty(pte)         (pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)         (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-       ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
        ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
@@ -278,13 +277,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
-       if (pte_valid_user(pte)) {
-               if (!pte_special(pte) && pte_exec(pte))
-                       __sync_icache_dcache(pte, addr);
+       if (pte_valid(pte)) {
                if (pte_sw_dirty(pte) && pte_write(pte))
                        pte_val(pte) &= ~PTE_RDONLY;
                else
                        pte_val(pte) |= PTE_RDONLY;
+               if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+                       __sync_icache_dcache(pte, addr);
        }
 
        /*
index 8aee3aeec3e687edde6f5be67233299e7a4f7d4f..c536c9e307b9a484dca6d03572f28cbf9f8c0667 100644 (file)
@@ -226,11 +226,28 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
        return retval;
 }
 
+static void send_user_sigtrap(int si_code)
+{
+       struct pt_regs *regs = current_pt_regs();
+       siginfo_t info = {
+               .si_signo       = SIGTRAP,
+               .si_errno       = 0,
+               .si_code        = si_code,
+               .si_addr        = (void __user *)instruction_pointer(regs),
+       };
+
+       if (WARN_ON(!user_mode(regs)))
+               return;
+
+       if (interrupts_enabled(regs))
+               local_irq_enable();
+
+       force_sig_info(SIGTRAP, &info, current);
+}
+
 static int single_step_handler(unsigned long addr, unsigned int esr,
                               struct pt_regs *regs)
 {
-       siginfo_t info;
-
        /*
         * If we are stepping a pending breakpoint, call the hw_breakpoint
         * handler first.
@@ -239,11 +256,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
                return 0;
 
        if (user_mode(regs)) {
-               info.si_signo = SIGTRAP;
-               info.si_errno = 0;
-               info.si_code  = TRAP_HWBKPT;
-               info.si_addr  = (void __user *)instruction_pointer(regs);
-               force_sig_info(SIGTRAP, &info, current);
+               send_user_sigtrap(TRAP_HWBKPT);
 
                /*
                 * ptrace will disable single step unless explicitly
@@ -307,17 +320,8 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
 static int brk_handler(unsigned long addr, unsigned int esr,
                       struct pt_regs *regs)
 {
-       siginfo_t info;
-
        if (user_mode(regs)) {
-               info = (siginfo_t) {
-                       .si_signo = SIGTRAP,
-                       .si_errno = 0,
-                       .si_code  = TRAP_BRKPT,
-                       .si_addr  = (void __user *)instruction_pointer(regs),
-               };
-
-               force_sig_info(SIGTRAP, &info, current);
+               send_user_sigtrap(TRAP_BRKPT);
        } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
                pr_warning("Unexpected kernel BRK exception at EL1\n");
                return -EFAULT;
@@ -328,7 +332,6 @@ static int brk_handler(unsigned long addr, unsigned int esr,
 
 int aarch32_break_handler(struct pt_regs *regs)
 {
-       siginfo_t info;
        u32 arm_instr;
        u16 thumb_instr;
        bool bp = false;
@@ -359,14 +362,7 @@ int aarch32_break_handler(struct pt_regs *regs)
        if (!bp)
                return -EFAULT;
 
-       info = (siginfo_t) {
-               .si_signo = SIGTRAP,
-               .si_errno = 0,
-               .si_code  = TRAP_BRKPT,
-               .si_addr  = pc,
-       };
-
-       force_sig_info(SIGTRAP, &info, current);
+       send_user_sigtrap(TRAP_BRKPT);
        return 0;
 }
 
index ffe9c2b6431bd5c0cb1e9982afe84e13a82e2043..917d98108b3f05d9b1013020f9f576a3db776bc6 100644 (file)
@@ -514,9 +514,14 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )       // Clear EE and E0E on LE systems
 #endif
 
        /* EL2 debug */
+       mrs     x0, id_aa64dfr0_el1             // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    x0, x0, #8, #4
+       cmp     x0, #1
+       b.lt    4f                              // Skip if no PMU present
        mrs     x0, pmcr_el0                    // Disable debug access traps
        ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
        msr     mdcr_el2, x0                    // all PMU counters from EL1
+4:
 
        /* Stage-2 translation */
        msr     vttbr_el2, xzr
index bc2abb8b1599576ae2dec02bce0c46c48fc707dd..352f7abd91c998f297b229d00e5cbb2f4882e81c 100644 (file)
 
 #ifdef CONFIG_EFI
 
+/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)     ABSOLUTE(sym)
+
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp               = __pi_memcmp;
-__efistub_memchr               = __pi_memchr;
-__efistub_memcpy               = __pi_memcpy;
-__efistub_memmove              = __pi_memmove;
-__efistub_memset               = __pi_memset;
-__efistub_strlen               = __pi_strlen;
-__efistub_strcmp               = __pi_strcmp;
-__efistub_strncmp              = __pi_strncmp;
-__efistub___flush_dcache_area  = __pi___flush_dcache_area;
+__efistub_memcmp               = KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr               = KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy               = KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove              = KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset               = KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen               = KALLSYMS_HIDE(__pi_strlen);
+__efistub_strnlen              = KALLSYMS_HIDE(__pi_strnlen);
+__efistub_strcmp               = KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp              = KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area  = KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy             = __pi_memcpy;
-__efistub___memmove            = __pi_memmove;
-__efistub___memset             = __pi_memset;
+__efistub___memcpy             = KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove            = KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset             = KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text                        = _text;
-__efistub__end                 = _end;
-__efistub__edata               = _edata;
+__efistub__text                        = KALLSYMS_HIDE(_text);
+__efistub__end                 = KALLSYMS_HIDE(_end);
+__efistub__edata               = KALLSYMS_HIDE(_edata);
 
 #endif
 
index 4fad9787ab46ed04bf36873e8f7f7eb724beb262..d9751a4769e758b220181d6a400bd5a238c2cedb 100644 (file)
@@ -44,14 +44,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
        unsigned long irq_stack_ptr;
 
        /*
-        * Use raw_smp_processor_id() to avoid false-positives from
-        * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping
-        * task stacks, we can be pre-empted in this case, so
-        * {raw_,}smp_processor_id() may give us the wrong value. Sleeping
-        * tasks can't ever be on an interrupt stack, so regardless of cpu,
-        * the checks will always fail.
+        * Switching between stacks is valid when tracing current and in
+        * non-preemptible context.
         */
-       irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
+       if (tsk == current && !preemptible())
+               irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+       else
+               irq_stack_ptr = 0;
 
        low  = frame->sp;
        /* irq stacks are not THREAD_SIZE aligned */
@@ -64,8 +63,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
                return -EINVAL;
 
        frame->sp = fp + 0x10;
-       frame->fp = *(unsigned long *)(fp);
-       frame->pc = *(unsigned long *)(fp + 8);
+       frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
+       frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        if (tsk && tsk->ret_stack &&
index cbedd724f48efcf487e443fa3fc21516d8d898a6..c5392081b49ba4ac4f48d9a0782442ac888ed222 100644 (file)
@@ -146,9 +146,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
        struct stackframe frame;
-       unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+       unsigned long irq_stack_ptr;
        int skip;
 
+       /*
+        * Switching between stacks is valid when tracing current and in
+        * non-preemptible context.
+        */
+       if (tsk == current && !preemptible())
+               irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+       else
+               irq_stack_ptr = 0;
+
        pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
        if (!tsk)
index 3e568dcd907b8c82806ee4c2bf4f355e64cf7d51..d073b5a216f72b1db65fbef79efd942b28df6f6e 100644 (file)
@@ -64,7 +64,7 @@ __do_hyp_init:
        mrs     x4, tcr_el1
        ldr     x5, =TCR_EL2_MASK
        and     x4, x4, x5
-       ldr     x5, =TCR_EL2_FLAGS
+       mov     x5, #TCR_EL2_RES1
        orr     x4, x4, x5
 
 #ifndef CONFIG_ARM64_VA_BITS_48
@@ -85,15 +85,17 @@ __do_hyp_init:
        ldr_l   x5, idmap_t0sz
        bfi     x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
 #endif
-       msr     tcr_el2, x4
-
-       ldr     x4, =VTCR_EL2_FLAGS
        /*
         * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
-        * VTCR_EL2.
+        * TCR_EL2 and VTCR_EL2.
         */
        mrs     x5, ID_AA64MMFR0_EL1
        bfi     x4, x5, #16, #3
+
+       msr     tcr_el2, x4
+
+       ldr     x4, =VTCR_EL2_FLAGS
+       bfi     x4, x5, #16, #3
        /*
         * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
         * VTCR_EL2.
index ca8f5a5e2f965748fca28ced482c9c195270e5ab..f0e7bdfae134a727ec7c0ac76466020fdb65e0fb 100644 (file)
@@ -36,7 +36,11 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
        write_sysreg(val, hcr_el2);
        /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
        write_sysreg(1 << 15, hstr_el2);
-       write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
+
+       val = CPTR_EL2_DEFAULT;
+       val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+       write_sysreg(val, cptr_el2);
+
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 }
 
@@ -45,7 +49,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
        write_sysreg(HCR_RW, hcr_el2);
        write_sysreg(0, hstr_el2);
        write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
-       write_sysreg(0, cptr_el2);
+       write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
index 9142e082f5f3996df9428670f2e107cfc948d2e3..5dd2a26444ec116cd46d51f118c62ddcd78752ae 100644 (file)
@@ -147,16 +147,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
        max_lr_idx = vtr_to_max_lr_idx(val);
        nr_pri_bits = vtr_to_nr_pri_bits(val);
 
-       switch (nr_pri_bits) {
-       case 7:
-                write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
-                write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
-       case 6:
-                write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
-       default:
-                write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
-       }                                          
-                                                  
        switch (nr_pri_bits) {
        case 7:
                 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
@@ -167,6 +157,16 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
                 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
        }
 
+       switch (nr_pri_bits) {
+       case 7:
+                write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+                write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+       case 6:
+                write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+       default:
+                write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+       }
+
        switch (max_lr_idx) {
        case 15:
                write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
index 648112e90ed546d2d052ccf7d9f66866d2390d06..4d1ac81870d27e6f272abde088e0f5e8290c80d1 100644 (file)
 
 #define PSTATE_FAULT_BITS_64   (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
                                 PSR_I_BIT | PSR_D_BIT)
-#define EL1_EXCEPT_SYNC_OFFSET 0x200
+
+#define CURRENT_EL_SP_EL0_VECTOR       0x0
+#define CURRENT_EL_SP_ELx_VECTOR       0x200
+#define LOWER_EL_AArch64_VECTOR                0x400
+#define LOWER_EL_AArch32_VECTOR                0x600
 
 static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
 {
@@ -97,6 +101,34 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
                *fsr = 0x14;
 }
 
+enum exception_type {
+       except_type_sync        = 0,
+       except_type_irq         = 0x80,
+       except_type_fiq         = 0x100,
+       except_type_serror      = 0x180,
+};
+
+static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
+{
+       u64 exc_offset;
+
+       switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
+       case PSR_MODE_EL1t:
+               exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+               break;
+       case PSR_MODE_EL1h:
+               exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+               break;
+       case PSR_MODE_EL0t:
+               exc_offset = LOWER_EL_AArch64_VECTOR;
+               break;
+       default:
+               exc_offset = LOWER_EL_AArch32_VECTOR;
+       }
+
+       return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
        unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -108,8 +140,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
        *vcpu_spsr(vcpu) = cpsr;
        *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+       *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
        *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-       *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
        vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 
@@ -143,8 +175,8 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
        *vcpu_spsr(vcpu) = cpsr;
        *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+       *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
        *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-       *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
        /*
         * Build an unknown exception, depending on the instruction
index eec3598b4184077b83b5a1f24321891cb110f5bb..2e90371cfb378b0e064667506a2b74c9275cacfb 100644 (file)
@@ -1007,10 +1007,9 @@ static int emulate_cp(struct kvm_vcpu *vcpu,
                if (likely(r->access(vcpu, params, r))) {
                        /* Skip instruction, since it was emulated */
                        kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+                       /* Handled */
+                       return 0;
                }
-
-               /* Handled */
-               return 0;
        }
 
        /* Not handled */
@@ -1043,7 +1042,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
 }
 
 /**
- * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
@@ -1095,7 +1094,7 @@ out:
 }
 
 /**
- * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
index 2ca665711bf21e590b79f89578dbe4cbba0ca9b3..eae38da6e0bb3911a5cad1fb2f6da46b4bb0090a 100644 (file)
@@ -168,4 +168,4 @@ CPU_LE( lsr tmp2, tmp2, tmp4 )      /* Shift (tmp1 & 63).  */
 .Lhit_limit:
        mov     len, limit
        ret
-ENDPROC(strnlen)
+ENDPIPROC(strnlen)
index 331c4ca6205c4e7211d4bc7bc4832891d26080cc..a6e757cbab7785ed411e919b95c6d13caaf21726 100644 (file)
@@ -933,6 +933,10 @@ static int __init __iommu_dma_init(void)
                ret = register_iommu_dma_ops_notifier(&platform_bus_type);
        if (!ret)
                ret = register_iommu_dma_ops_notifier(&amba_bustype);
+
+       /* handle devices queued before this arch_initcall */
+       if (!ret)
+               __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL);
        return ret;
 }
 arch_initcall(__iommu_dma_init);
index 5a22a119a74c87b4b5b54e114701b3c6eed233e6..0adbebbc28037110afd911a5cfe14693101e19ea 100644 (file)
@@ -46,7 +46,7 @@ enum address_markers_idx {
        PCI_START_NR,
        PCI_END_NR,
        MODULES_START_NR,
-       MODUELS_END_NR,
+       MODULES_END_NR,
        KERNEL_SPACE_NR,
 };
 
index 92ddac1e8ca2f35b8f24028514bb761aed3591a4..abe2a9542b3a367c778a96051dcf3618881d6fa1 100644 (file)
@@ -371,6 +371,13 @@ static int __kprobes do_translation_fault(unsigned long addr,
        return 0;
 }
 
+static int do_alignment_fault(unsigned long addr, unsigned int esr,
+                             struct pt_regs *regs)
+{
+       do_bad_area(addr, esr, regs);
+       return 0;
+}
+
 /*
  * This abort handler always returns "fault".
  */
@@ -418,7 +425,7 @@ static struct fault_info {
        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk)" },
        { do_bad,               SIGBUS,  0,             "synchronous parity error (translation table walk)" },
        { do_bad,               SIGBUS,  0,             "unknown 32"                    },
-       { do_bad,               SIGBUS,  BUS_ADRALN,    "alignment fault"               },
+       { do_alignment_fault,   SIGBUS,  BUS_ADRALN,    "alignment fault"               },
        { do_bad,               SIGBUS,  0,             "unknown 34"                    },
        { do_bad,               SIGBUS,  0,             "unknown 35"                    },
        { do_bad,               SIGBUS,  0,             "unknown 36"                    },
index cf038c7d9fa994c7d86e05920ffa8961aecc4ad8..cab7a5be40aa85cbd933635d48208d2af54bcf1c 100644 (file)
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
 void __init kasan_init(void)
 {
        struct memblock_region *reg;
+       int i;
 
        /*
         * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
                                pfn_to_nid(virt_to_pfn(start)));
        }
 
+       /*
+        * KAsan may reuse the contents of kasan_zero_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_zero_pte[i],
+                       pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
        memset(kasan_zero_page, 0, PAGE_SIZE);
        cpu_set_ttbr1(__pa(swapper_pg_dir));
        flush_tlb_all();
index 4c893b5189ddd027537511c5daaca70f158577c4..232f787a088ae8e992c52cba450159a43a900174 100644 (file)
@@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void)
 
 #ifdef CONFIG_COMPAT
        if (test_thread_flag(TIF_32BIT))
-               rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
        else
 #endif
-               rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
        return rnd << PAGE_SHIFT;
 }
 
index 3571c7309c5e79f0d2d3e20986a581ffb6454dae..0795c3a36d8f0d140cf4952bd91b14962e7f81b4 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -44,6 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
        unsigned long end = start + size;
        int ret;
        struct page_change_data data;
+       struct vm_struct *area;
 
        if (!PAGE_ALIGNED(addr)) {
                start &= PAGE_MASK;
@@ -51,11 +53,27 @@ static int change_memory_common(unsigned long addr, int numpages,
                WARN_ON_ONCE(1);
        }
 
-       if (start < MODULES_VADDR || start >= MODULES_END)
+       /*
+        * Kernel VA mappings are always live, and splitting live section
+        * mappings into page mappings may cause TLB conflicts. This means
+        * we have to ensure that changing the permission bits of the range
+        * we are operating on does not result in such splitting.
+        *
+        * Let's restrict ourselves to mappings created by vmalloc (or vmap).
+        * Those are guaranteed to consist entirely of page mappings, and
+        * splitting is never needed.
+        *
+        * So check whether the [addr, addr + size) interval is entirely
+        * covered by precisely one VM area that has the VM_ALLOC flag set.
+        */
+       area = find_vm_area((void *)addr);
+       if (!area ||
+           end > (unsigned long)area->addr + area->size ||
+           !(area->flags & VM_ALLOC))
                return -EINVAL;
 
-       if (end < MODULES_VADDR || end >= MODULES_END)
-               return -EINVAL;
+       if (!numpages)
+               return 0;
 
        data.set_mask = set_mask;
        data.clear_mask = clear_mask;
index 146bd99a7532bcc6f53ad509a839561640eeaf3e..e6a30e1268a8c7570c8298797d7cf27dcb0d649f 100644 (file)
        b.lo    9998b
        dsb     \domain
        .endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+       .macro  reset_pmuserenr_el0, tmpreg
+       mrs     \tmpreg, id_aa64dfr0_el1        // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    \tmpreg, \tmpreg, #8, #4
+       cmp     \tmpreg, #1                     // Skip if no PMU present
+       b.lt    9000f
+       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+9000:
+       .endm
index a3d867e723b4f0c5c23283c89264d4a2f7f51561..c164d2cb35c05f12e7266592ee52a3b425483670 100644 (file)
@@ -117,7 +117,7 @@ ENTRY(cpu_do_resume)
         */
        ubfx    x11, x11, #1, #1
        msr     oslar_el1, x11
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        mov     x0, x12
        dsb     nsh             // Make sure local tlb invalidation completed
        isb
@@ -154,7 +154,7 @@ ENTRY(__cpu_setup)
        msr     cpacr_el1, x0                   // Enable FP/ASIMD
        mov     x0, #1 << 12                    // Reset mdscr_el1 and disable
        msr     mdscr_el1, x0                   // access to the DCC from EL0
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        /*
         * Memory region attributes for LPAE:
         *
index 74c132d901bd290bcc803b7c0a0266428a2ec1cb..6a8685051b676196780cab79a495fc6d906ccce7 100644 (file)
@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls                    323 /* length of syscall table */
+#define NR_syscalls                    324 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
index 762edce7572eb33f03a3402e4357de69626764c9..41369a103838a6e3d8776f550885399d9006e60e 100644 (file)
 #define __NR_membarrier                        1344
 #define __NR_kcmp                      1345
 #define __NR_mlock2                    1346
+#define __NR_copy_file_range           1347
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
index 534a74acb849d736726b5f066675f0a3a4c87ed4..477c55e244ecd45ce879dea27779967a16e0d13c 100644 (file)
@@ -1772,5 +1772,6 @@ sys_call_table:
        data8 sys_membarrier
        data8 sys_kcmp                          // 1345
        data8 sys_mlock2
+       data8 sys_copy_file_range
 
        .org sys_call_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls
index 836ac5a963c83140d18dcd762459d19e1fd0c101..2841c0a3fd3bb201a4bed53d823858b32a0a009b 100644 (file)
@@ -276,6 +276,7 @@ source "kernel/Kconfig.preempt"
 
 config SMP
        bool "Symmetric multi-processing support"
+       depends on MMU
        ---help---
          This enables support for systems with more than one CPU. If you have
          a system with only one CPU, say N. If you have a system with more
index fc96e814188e57aa9dee8ed516c62200c543ad56..d1fc4796025edb8769ee01195e64b91821f3625a 100644 (file)
@@ -108,6 +108,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -266,6 +268,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -366,6 +374,7 @@ CONFIG_ARIADNE=y
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_HYDRA=y
 CONFIG_APNE=y
 CONFIG_ZORRO8390=y
index 05c904f08d9d496fb7455df0d1506fb8d8c9bc23..9bfe8be3658c18231ca4473d8c075a1fb2ac4d5e 100644 (file)
@@ -106,6 +106,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -264,6 +266,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -344,6 +352,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
index d572b731c510fdb2dc60616ee1ff0ce7e10516ce..ebdcfae555801cd1c7367d4ca40974b3d39099cb 100644 (file)
@@ -106,6 +106,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -264,6 +266,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -353,6 +361,7 @@ CONFIG_ATARILANCE=y
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
index 11a30c65ad44cb52347929d51f80301c8aca648b..8acc65e54995388614666716febdab1dda706376 100644 (file)
@@ -104,6 +104,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -262,6 +264,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -343,6 +351,7 @@ CONFIG_BVME6000_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
index 6630a5154b9d797ebea93cdbe70886bc673f60cd..0c6a3d52b26e2b2559f24963040d3ba65a91ed47 100644 (file)
@@ -106,6 +106,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -264,6 +266,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -345,6 +353,7 @@ CONFIG_HPLANCE=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
index 1d90b71d09038da90cfad0cab267d60dae0fd752..12a8a6cb32f4914f06c1f5d4c8e5dd6ba31381ef 100644 (file)
@@ -105,6 +105,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -266,6 +268,12 @@ CONFIG_DEV_APPLETALK=m
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -362,6 +370,7 @@ CONFIG_MAC89x0=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
index 1fd21c1ca87fd8da85ace8c64930cfeed4383370..64ff2dcb34c89a3e60e26f9c468d654ed943e97f 100644 (file)
@@ -115,6 +115,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -276,6 +278,12 @@ CONFIG_DEV_APPLETALK=m
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -404,6 +412,7 @@ CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_HYDRA=y
 CONFIG_MAC8390=y
 CONFIG_NE2000=y
index 74e10f79d7b1f9475574d422451314b7bcc1af64..07fc6abcfe0c50e4b656a63a9da36c728b13cec4 100644 (file)
@@ -103,6 +103,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -261,6 +263,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -343,6 +351,7 @@ CONFIG_MVME147_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
index 7034e716f166be8f869f872b12f9cbf960054029..69903ded88f71d1d51ad4b430acbd2f745fdf867 100644 (file)
@@ -104,6 +104,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -262,6 +264,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -343,6 +351,7 @@ CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
index f7deb5f702a6484dda646577f48ade90b902bf18..bd8401686ddef143bf036159cb3f4ea650772f32 100644 (file)
@@ -104,6 +104,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -262,6 +264,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -352,6 +360,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
index 0ce79eb0d80503140d928c3c6c77061a2ead34d9..5f9fb3ab9636808d46b75f3696f477ab91e6dd79 100644 (file)
@@ -101,6 +101,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -259,6 +261,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -340,6 +348,7 @@ CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
index 4cb787e4991fcfd02646b1144999419b40454f83..5d1c674530e2ba73ca43ffc4940f139772962152 100644 (file)
@@ -101,6 +101,8 @@ CONFIG_NFT_NAT=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -259,6 +261,12 @@ CONFIG_L2TP=m
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -341,6 +349,7 @@ CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
index f9d96bf869109c028e5a9f1f12ad3e9fe8b933ba..bafaff6dcd7bda8a28101f140159f9a7a76638db 100644 (file)
@@ -4,7 +4,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls            376
+#define NR_syscalls            377
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
index 36cf129de663a7ca22f1bf1bba5a6245b7b04c03..0ca729665f29e9d67851aed2c1c52be75bbd078b 100644 (file)
 #define __NR_userfaultfd       373
 #define __NR_membarrier                374
 #define __NR_mlock2            375
+#define __NR_copy_file_range   376
 
 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
index 282cd903f4c469197738eb9e840eaa75c77ec11a..8bb94261ff97d953fcfbe7c305e6a7ecfce27a97 100644 (file)
@@ -396,3 +396,4 @@ ENTRY(sys_call_table)
        .long sys_userfaultfd
        .long sys_membarrier
        .long sys_mlock2                /* 375 */
+       .long sys_copy_file_range
index a96c81d1d22e6afce0973161639024e93346fd0b..c5cd63a4b6d53b399e91e8ebad7358a3f45fce8c 100644 (file)
@@ -21,6 +21,7 @@ platforms += mti-malta
 platforms += mti-sead3
 platforms += netlogic
 platforms += paravirt
+platforms += pic32
 platforms += pistachio
 platforms += pmcs-msp71xx
 platforms += pnx833x
index fbf3f6670b69a58f23bf3878eb0d5d2df3edeaec..74a3db92da1b52edc15165ac06d3b1558b8b78c0 100644 (file)
@@ -169,6 +169,7 @@ config BMIPS_GENERIC
        select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+       select ARCH_WANT_OPTIONAL_GPIOLIB
        help
          Build a generic DT-based kernel image that boots on select
          BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
@@ -480,6 +481,14 @@ config MIPS_MALTA
          This enables support for the MIPS Technologies Malta evaluation
          board.
 
+config MACH_PIC32
+       bool "Microchip PIC32 Family"
+       help
+         This enables support for the Microchip PIC32 family of platforms.
+
+         Microchip PIC32 is a family of general-purpose 32 bit MIPS core
+         microcontrollers.
+
 config MIPS_SEAD3
        bool "MIPS SEAD3 board"
        select BOOT_ELF32
@@ -979,6 +988,7 @@ source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
 source "arch/mips/lantiq/Kconfig"
 source "arch/mips/lasat/Kconfig"
+source "arch/mips/pic32/Kconfig"
 source "arch/mips/pistachio/Kconfig"
 source "arch/mips/pmcs-msp71xx/Kconfig"
 source "arch/mips/ralink/Kconfig"
@@ -1755,6 +1765,10 @@ config SYS_SUPPORTS_ZBOOT_UART16550
        bool
        select SYS_SUPPORTS_ZBOOT
 
+config SYS_SUPPORTS_ZBOOT_UART_PROM
+       bool
+       select SYS_SUPPORTS_ZBOOT
+
 config CPU_LOONGSON2
        bool
        select CPU_SUPPORTS_32BIT_KERNEL
@@ -2017,7 +2031,8 @@ config KVM_GUEST
        bool "KVM Guest Kernel"
        depends on BROKEN_ON_SMP
        help
-         Select this option if building a guest kernel for KVM (Trap & Emulate) mode
+         Select this option if building a guest kernel for KVM (Trap & Emulate)
+         mode.
 
 config KVM_GUEST_TIMER_FREQ
        int "Count/Compare Timer Frequency (MHz)"
@@ -2070,7 +2085,7 @@ config PAGE_SIZE_32KB
 
 config PAGE_SIZE_64KB
        bool "64kB"
-       depends on !CPU_R3000 && !CPU_TX39XX
+       depends on !CPU_R3000 && !CPU_TX39XX && !CPU_R6000
        help
          Using 64kB page size will result in higher performance kernel at
          the price of higher memory consumption.  This option is available on
index 3f70ba54ae21e909973ad9913eb71151fcc50492..e78d60dbdffdad93994421e684a87e2f34f8b802 100644 (file)
@@ -166,16 +166,6 @@ cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -Wa,-march=octeon
 endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)     += -march=mips32 -Wa,-mips32 -Wa,--trap
-#
-# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
-# as MIPS64 R1; older versions as just R1.  This leaves the possibility open
-# that GCC might generate R2 code for -march=loongson3a which then is rejected
-# by GAS.  The cc-option can't probe for this behaviour so -march=loongson3a
-# can't easily be used safely within the kbuild framework.
-#
-cflags-$(CONFIG_CPU_LOONGSON3)  +=                                     \
-       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
-       -Wa,-mips64r2 -Wa,--trap
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS) += $(call cc-option,-mfix-r4400,)
index f9bc4f520440142b2be4b033c8166f684bd1fc75..84548f704035391d9715d2aeee0a8845735f8dd1 100644 (file)
@@ -40,7 +40,7 @@
 
 static int gpio2_get(struct gpio_chip *chip, unsigned offset)
 {
-       return alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
+       return !!alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
 }
 
 static void gpio2_set(struct gpio_chip *chip, unsigned offset, int value)
@@ -68,7 +68,7 @@ static int gpio2_to_irq(struct gpio_chip *chip, unsigned offset)
 
 static int gpio1_get(struct gpio_chip *chip, unsigned offset)
 {
-       return alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
+       return !!alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
 }
 
 static void gpio1_set(struct gpio_chip *chip,
@@ -119,7 +119,7 @@ struct gpio_chip alchemy_gpio_chip[] = {
 
 static int alchemy_gpic_get(struct gpio_chip *chip, unsigned int off)
 {
-       return au1300_gpio_get_value(off + AU1300_GPIO_BASE);
+       return !!au1300_gpio_get_value(off + AU1300_GPIO_BASE);
 }
 
 static void alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v)
index f4930456eb8e8181098c6035dccdd1203d72ea18..f969f583c68c66c0512d7c648dc16d22c18158f6 100644 (file)
@@ -37,7 +37,7 @@ static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
                                container_of(chip, struct ar7_gpio_chip, chip);
        void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT;
 
-       return readl(gpio_in) & (1 << gpio);
+       return !!(readl(gpio_in) & (1 << gpio));
 }
 
 static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
index ca7cc19adfea7f01154c55f44e47b5f863499843..870c6b2e97e835127dc88c6ca4bc4219faa44d92 100644 (file)
@@ -23,7 +23,6 @@ void ath79_clocks_init(void);
 unsigned long ath79_get_sys_clk_rate(const char *id);
 
 void ath79_ddr_ctrl_init(void);
-void ath79_ddr_wb_flush(unsigned int reg);
 
 void ath79_gpio_init(void);
 
index eeb3953ed8ac8d8051ccbe3df9af8270915329b3..511c06560dc1f5a95866f832ec9ae2180048b9d5 100644 (file)
 #include "common.h"
 #include "machtypes.h"
 
+static void __init ath79_misc_intc_domain_init(
+       struct device_node *node, int irq);
+
 static void ath79_misc_irq_handler(struct irq_desc *desc)
 {
-       void __iomem *base = ath79_reset_base;
+       struct irq_domain *domain = irq_desc_get_handler_data(desc);
+       void __iomem *base = domain->host_data;
        u32 pending;
 
        pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
@@ -42,15 +46,15 @@ static void ath79_misc_irq_handler(struct irq_desc *desc)
        while (pending) {
                int bit = __ffs(pending);
 
-               generic_handle_irq(ATH79_MISC_IRQ(bit));
+               generic_handle_irq(irq_linear_revmap(domain, bit));
                pending &= ~BIT(bit);
        }
 }
 
 static void ar71xx_misc_irq_unmask(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -62,8 +66,8 @@ static void ar71xx_misc_irq_unmask(struct irq_data *d)
 
 static void ar71xx_misc_irq_mask(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -75,8 +79,8 @@ static void ar71xx_misc_irq_mask(struct irq_data *d)
 
 static void ar724x_misc_irq_ack(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
@@ -94,12 +98,6 @@ static struct irq_chip ath79_misc_irq_chip = {
 
 static void __init ath79_misc_irq_init(void)
 {
-       void __iomem *base = ath79_reset_base;
-       int i;
-
-       __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-       __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
        if (soc_is_ar71xx() || soc_is_ar913x())
                ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
        else if (soc_is_ar724x() ||
@@ -110,13 +108,7 @@ static void __init ath79_misc_irq_init(void)
        else
                BUG();
 
-       for (i = ATH79_MISC_IRQ_BASE;
-            i < ATH79_MISC_IRQ_BASE + ATH79_MISC_IRQ_COUNT; i++) {
-               irq_set_chip_and_handler(i, &ath79_misc_irq_chip,
-                                        handle_level_irq);
-       }
-
-       irq_set_chained_handler(ATH79_CPU_IRQ(6), ath79_misc_irq_handler);
+       ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
 }
 
 static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
@@ -256,10 +248,10 @@ asmlinkage void plat_irq_dispatch(void)
        }
 }
 
-#ifdef CONFIG_IRQCHIP
 static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 {
        irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
+       irq_set_chip_data(irq, d->host_data);
        return 0;
 }
 
@@ -268,19 +260,14 @@ static const struct irq_domain_ops misc_irq_domain_ops = {
        .map = misc_map,
 };
 
-static int __init ath79_misc_intc_of_init(
-       struct device_node *node, struct device_node *parent)
+static void __init ath79_misc_intc_domain_init(
+       struct device_node *node, int irq)
 {
        void __iomem *base = ath79_reset_base;
        struct irq_domain *domain;
-       int irq;
-
-       irq = irq_of_parse_and_map(node, 0);
-       if (!irq)
-               panic("Failed to get MISC IRQ");
 
        domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
-                       ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, NULL);
+                       ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
        if (!domain)
                panic("Failed to add MISC irqdomain");
 
@@ -288,9 +275,19 @@ static int __init ath79_misc_intc_of_init(
        __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
        __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
 
+       irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
+}
 
-       irq_set_chained_handler(irq, ath79_misc_irq_handler);
+static int __init ath79_misc_intc_of_init(
+       struct device_node *node, struct device_node *parent)
+{
+       int irq;
 
+       irq = irq_of_parse_and_map(node, 0);
+       if (!irq)
+               panic("Failed to get MISC IRQ");
+
+       ath79_misc_intc_domain_init(node, irq);
        return 0;
 }
 
@@ -349,8 +346,6 @@ static int __init ar79_cpu_intc_of_init(
 IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
                ar79_cpu_intc_of_init);
 
-#endif
-
 void __init arch_init_irq(void)
 {
        if (mips_machtype == ATH79_MACH_GENERIC_OF) {
index 8755d618e116ec32fd6fff90d0081b0bb2e4bb9c..be451ee4a5eaf5b8ba6de0d11fc8c76f6d6db7fa 100644 (file)
 
 #define ATH79_SYS_TYPE_LEN     64
 
-#define AR71XX_BASE_FREQ       40000000
-#define AR724X_BASE_FREQ       5000000
-#define AR913X_BASE_FREQ       5000000
-
 static char ath79_sys_type[ATH79_SYS_TYPE_LEN];
 
 static void ath79_restart(char *command)
@@ -272,15 +268,10 @@ void __init device_tree_init(void)
        unflatten_and_copy_device_tree();
 }
 
-static void __init ath79_generic_init(void)
-{
-       /* Nothing to do */
-}
-
 MIPS_MACHINE(ATH79_MACH_GENERIC,
             "Generic",
             "Generic AR71XX/AR724X/AR913X based board",
-            ath79_generic_init);
+            NULL);
 
 MIPS_MACHINE(ATH79_MACH_GENERIC_OF,
             "DTB",
index a7e569c7968ea7e8bc1a1ce3e629fc54cc2710ee..959c145a0a2c1654d1ac3b58a98db92ef435a713 100644 (file)
@@ -666,9 +666,15 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out)
        switch (bus->hosttype) {
        case BCMA_HOSTTYPE_PCI:
                memset(out, 0, sizeof(struct ssb_sprom));
-               snprintf(buf, sizeof(buf), "pci/%u/%u/",
-                        bus->host_pci->bus->number + 1,
-                        PCI_SLOT(bus->host_pci->devfn));
+               /* On BCM47XX all PCI buses share the same domain */
+               if (config_enabled(CONFIG_BCM47XX))
+                       snprintf(buf, sizeof(buf), "pci/%u/%u/",
+                                bus->host_pci->bus->number + 1,
+                                PCI_SLOT(bus->host_pci->devfn));
+               else
+                       snprintf(buf, sizeof(buf), "pci/%u/%u/",
+                                pci_domain_nr(bus->host_pci->bus) + 1,
+                                bus->host_pci->bus->number);
                bcm47xx_sprom_apply_prefix_alias(buf, sizeof(buf));
                prefix = buf;
                break;
index 4b50d40f7451ad86eba859b8a02ea792ff060b35..05757aed016cd6f72bedbd0ac31f379e8f0cd884 100644 (file)
@@ -10,6 +10,7 @@
 
 #define pr_fmt(fmt) "bcm63xx_nvram: " fmt
 
+#include <linux/bcm963xx_nvram.h>
 #include <linux/init.h>
 #include <linux/crc32.h>
 #include <linux/export.h>
 
 #include <bcm63xx_nvram.h>
 
-/*
- * nvram structure
- */
-struct bcm963xx_nvram {
-       u32     version;
-       u8      reserved1[256];
-       u8      name[16];
-       u32     main_tp_number;
-       u32     psi_size;
-       u32     mac_addr_count;
-       u8      mac_addr_base[ETH_ALEN];
-       u8      reserved2[2];
-       u32     checksum_old;
-       u8      reserved3[720];
-       u32     checksum_high;
-};
-
 #define BCM63XX_DEFAULT_PSI_SIZE       64
 
 static struct bcm963xx_nvram nvram;
@@ -42,27 +26,14 @@ static int mac_addr_used;
 
 void __init bcm63xx_nvram_init(void *addr)
 {
-       unsigned int check_len;
        u32 crc, expected_crc;
        u8 hcs_mac_addr[ETH_ALEN] = { 0x00, 0x10, 0x18, 0xff, 0xff, 0xff };
 
        /* extract nvram data */
-       memcpy(&nvram, addr, sizeof(nvram));
+       memcpy(&nvram, addr, BCM963XX_NVRAM_V5_SIZE);
 
        /* check checksum before using data */
-       if (nvram.version <= 4) {
-               check_len = offsetof(struct bcm963xx_nvram, reserved3);
-               expected_crc = nvram.checksum_old;
-               nvram.checksum_old = 0;
-       } else {
-               check_len = sizeof(nvram);
-               expected_crc = nvram.checksum_high;
-               nvram.checksum_high = 0;
-       }
-
-       crc = crc32_le(~0, (u8 *)&nvram, check_len);
-
-       if (crc != expected_crc)
+       if (bcm963xx_nvram_checksum(&nvram, &expected_crc, &crc))
                pr_warn("nvram checksum failed, contents may be invalid (expected %08x, got %08x)\n",
                        expected_crc, crc);
 
index 5b16d2955fbb864afae0103753bd8d0d115eba0e..35535284b39efb323cb63b4f8ec37dcdfbdb7f60 100644 (file)
@@ -105,6 +105,7 @@ static const struct bmips_quirk bmips_quirk_list[] = {
        { "brcm,bcm33843-viper",        &bcm3384_viper_quirks           },
        { "brcm,bcm6328",               &bcm6328_quirks                 },
        { "brcm,bcm6368",               &bcm6368_quirks                 },
+       { "brcm,bcm63168",              &bcm6368_quirks                 },
        { },
 };
 
index d5bdee115f22f73058d464e5cc88aee8abc7cf46..4eff1ef02eff9abec4ae03d4248f144ef8fa0058 100644 (file)
@@ -29,20 +29,23 @@ KBUILD_AFLAGS := $(LINUXINCLUDE) $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
        -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
        -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
 
-targets := head.o decompress.o string.o dbg.o uart-16550.o uart-alchemy.o
-
 # decompressor objects (linked with vmlinuz)
 vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
 
 ifdef CONFIG_DEBUG_ZBOOT
 vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT)                 += $(obj)/dbg.o
 vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART16550) += $(obj)/uart-16550.o
+vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART_PROM) += $(obj)/uart-prom.o
 vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY)                += $(obj)/uart-alchemy.o
 endif
 
-ifdef CONFIG_KERNEL_XZ
-vmlinuzobjs-y += $(obj)/../../lib/ashldi3.o
-endif
+vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
+
+$(obj)/ashldi3.o: KBUILD_CFLAGS += -I$(srctree)/arch/mips/lib
+$(obj)/ashldi3.c: $(srctree)/arch/mips/lib/ashldi3.c
+       $(call cmd,shipped)
+
+targets := $(notdir $(vmlinuzobjs-y))
 
 targets += vmlinux.bin
 OBJCOPYFLAGS_vmlinux.bin := $(OBJCOPYFLAGS) -O binary -R .comment -S
@@ -60,7 +63,7 @@ targets += vmlinux.bin.z
 $(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE
        $(call if_changed,$(tool_y))
 
-targets += piggy.o
+targets += piggy.o dummy.o
 OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \
                        --set-section-flags=.image=contents,alloc,load,readonly,data
 $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
diff --git a/arch/mips/boot/compressed/uart-prom.c b/arch/mips/boot/compressed/uart-prom.c
new file mode 100644 (file)
index 0000000..1c3d51b
--- /dev/null
@@ -0,0 +1,7 @@
+
+extern void prom_putchar(unsigned char ch);
+
+void putc(char c)
+{
+       prom_putchar(c);
+}
index a0bf516ec3948827acbc526c6ebac5a7827e1a22..fc7a0a98e9bfc159ef3ba75ba2c791aa76e5c9a2 100644 (file)
@@ -4,6 +4,7 @@ dts-dirs        += ingenic
 dts-dirs       += lantiq
 dts-dirs       += mti
 dts-dirs       += netlogic
+dts-dirs       += pic32
 dts-dirs       += qca
 dts-dirs       += ralink
 dts-dirs       += xilfpga
index d52ce3d07f16dee81c5982212fe69f8cddc015d0..d61b1616b604552be9a30f89eb42539de6838b14 100644 (file)
@@ -31,6 +31,7 @@
        };
 
        aliases {
+               leds0 = &leds0;
                uart0 = &uart0;
        };
 
@@ -73,6 +74,7 @@
                timer: timer@10000040 {
                        compatible = "syscon";
                        reg = <0x10000040 0x2c>;
+                       little-endian;
                };
 
                reboot {
                        offset = <0x28>;
                        mask = <0x1>;
                };
+
+               leds0: led-controller@10000800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6328-leds";
+                       reg = <0x10000800 0x24>;
+                       status = "disabled";
+               };
        };
 };
index 45152bc22117f081d15a8b6dd2740305b35efbf6..9c8d3fe28b3114e51aaf128a640bde630d31a65b 100644 (file)
@@ -32,6 +32,7 @@
        };
 
        aliases {
+               leds0 = &leds0;
                uart0 = &uart0;
        };
 
                compatible = "simple-bus";
                ranges;
 
+               periph_cntl: syscon@10000000 {
+                       compatible = "syscon";
+                       reg = <0x10000000 0x14>;
+                       little-endian;
+               };
+
+               reboot: syscon-reboot@10000008 {
+                       compatible = "syscon-reboot";
+                       regmap = <&periph_cntl>;
+                       offset = <0x8>;
+                       mask = <0x1>;
+               };
+
                periph_intc: periph_intc@10000020 {
                        compatible = "brcm,bcm3380-l2-intc";
                        reg = <0x10000024 0x4 0x1000002c 0x4>,
                        interrupts = <2>;
                };
 
+               leds0: led-controller@100000d0 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6358-leds";
+                       reg = <0x100000d0 0x8>;
+                       status = "disabled";
+               };
+
                uart0: serial@10000100 {
                        compatible = "brcm,bcm6345-uart";
                        reg = <0x10000100 0x18>;
index 4fc7ecee273c105027ff20cea02f2bd5cf86c9fe..1a7efa883c5e3fd2e046b554485270036193b382 100644 (file)
@@ -98,6 +98,7 @@
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7125-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x60c>;
+                       little-endian;
                };
 
                reboot {
index a3039bb53477d40a426fec1d1318f3156ede8be3..d4bf52cfcf170ee8ac84daa874495e0a6420e542 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7346-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 4274ff41ec2122ac0bfd52d814aee8432c26553e..8e2501694d03fbd93827aeda79ef22f7cfd5d094 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7358-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 0dcc9163c27bdd0022e5b66f3ffd65da7fd6ce31..7e5f76040fb898b19a4bbc301c8a20f3b9368aa4 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7360-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 2f3f9fc2c478df36ef57c5990dd81f6757240e3a..c739ea77acb0dfe17363ec52cf390cace407e54c 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7362-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index bee221b3b56857c8d84dac3e2fa9bfe8b3c54a85..5f55d0a50a28622614ec6142eb0ff19746dfaade 100644 (file)
@@ -99,6 +99,7 @@
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7420-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x60c>;
+                       little-endian;
                };
 
                reboot {
index 571f30f52e3ff5780ec4fd72e18e72737e51537d..e24d41ab4e30f9163605180d78605fc02a477db6 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 614ee211f71a89356dd1eb814a38ec3071885747..8b9432cc062bc7e89898f2f1f2213926193389f8 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 9fcb9e7d1f5752902d08ccdb174b9deeca5afad0..1652d8d60b1e4b8673acaa7d2ec182ce7eb7ff98 100644 (file)
 &uart4 {
        status = "okay";
 };
+
+&nemc {
+       status = "okay";
+
+       nandc: nand-controller@1 {
+               compatible = "ingenic,jz4780-nand";
+               reg = <1 0 0x1000000>;
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ingenic,bch-controller = <&bch>;
+
+               ingenic,nemc-tAS = <10>;
+               ingenic,nemc-tAH = <5>;
+               ingenic,nemc-tBP = <10>;
+               ingenic,nemc-tAW = <15>;
+               ingenic,nemc-tSTRV = <100>;
+
+               nand@1 {
+                       reg = <1>;
+
+                       nand-ecc-step-size = <1024>;
+                       nand-ecc-strength = <24>;
+                       nand-ecc-mode = "hw";
+                       nand-on-flash-bbt;
+
+                       partitions {
+                               compatible = "fixed-partitions";
+                               #address-cells = <2>;
+                               #size-cells = <2>;
+
+                               partition@0 {
+                                       label = "u-boot-spl";
+                                       reg = <0x0 0x0 0x0 0x800000>;
+                               };
+
+                               partition@0x800000 {
+                                       label = "u-boot";
+                                       reg = <0x0 0x800000 0x0 0x200000>;
+                               };
+
+                               partition@0xa00000 {
+                                       label = "u-boot-env";
+                                       reg = <0x0 0xa00000 0x0 0x200000>;
+                               };
+
+                               partition@0xc00000 {
+                                       label = "boot";
+                                       reg = <0x0 0xc00000 0x0 0x4000000>;
+                               };
+
+                               partition@0x8c00000 {
+                                       label = "system";
+                                       reg = <0x0 0x4c00000 0x1 0xfb400000>;
+                               };
+                       };
+               };
+       };
+};
+
+&bch {
+       status = "okay";
+};
index 65389f6027333c8c4bed688fb38ed11a31d4bd2e..b868b429add23af4c14f42a366b96760e49eee66 100644 (file)
 
                status = "disabled";
        };
+
+       nemc: nemc@13410000 {
+               compatible = "ingenic,jz4780-nemc";
+               reg = <0x13410000 0x10000>;
+               #address-cells = <2>;
+               #size-cells = <1>;
+               ranges = <1 0 0x1b000000 0x1000000
+                         2 0 0x1a000000 0x1000000
+                         3 0 0x19000000 0x1000000
+                         4 0 0x18000000 0x1000000
+                         5 0 0x17000000 0x1000000
+                         6 0 0x16000000 0x1000000>;
+
+               clocks = <&cgu JZ4780_CLK_NEMC>;
+
+               status = "disabled";
+       };
+
+       bch: bch@134d0000 {
+               compatible = "ingenic,jz4780-bch";
+               reg = <0x134d0000 0x10000>;
+
+               clocks = <&cgu JZ4780_CLK_BCH>;
+
+               status = "disabled";
+       };
 };
diff --git a/arch/mips/boot/dts/pic32/Makefile b/arch/mips/boot/dts/pic32/Makefile
new file mode 100644 (file)
index 0000000..7ac7905
--- /dev/null
@@ -0,0 +1,12 @@
+dtb-$(CONFIG_DTB_PIC32_MZDA_SK)                += pic32mzda_sk.dtb
+
+dtb-$(CONFIG_DTB_PIC32_NONE)           += \
+                                       pic32mzda_sk.dtb
+
+obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-                           += dummy.o
+
+always                         := $(dtb-y)
+clean-files                    := *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi b/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi
new file mode 100644 (file)
index 0000000..ef13350
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Device Tree Source for PIC32MZDA clock data
+ *
+ * Purna Chandra Mandal <purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+/* all fixed rate clocks */
+
+/ {
+       POSC:posc_clk { /* On-chip primary oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+       };
+
+       FRC:frc_clk { /* internal FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <8000000>;
+       };
+
+       BFRC:bfrc_clk { /* internal backup FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <8000000>;
+       };
+
+       LPRC:lprc_clk { /* internal low-power FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <32000>;
+       };
+
+       /* UPLL provides clock to USBCORE */
+       UPLL:usb_phy_clk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+               clock-output-names = "usbphy_clk";
+       };
+
+       TxCKI:txcki_clk { /* external clock input on TxCLKI pin */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <4000000>;
+               status = "disabled";
+       };
+
+       /* external clock input on REFCLKIx pin */
+       REFIx:refix_clk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+               status = "disabled";
+       };
+
+       /* PIC32 specific clks */
+       pic32_clktree {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               reg = <0x1f801200 0x200>;
+               compatible = "microchip,pic32mzda-clk";
+               ranges = <0 0x1f801200 0x200>;
+
+               /* secondary oscillator; external input on SOSCI pin */
+               SOSC:sosc_clk@0 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-sosc";
+                       clock-frequency = <32768>;
+                       reg = <0x000 0x10>,   /* enable reg */
+                             <0x1d0 0x10>; /* status reg */
+                       microchip,bit-mask = <0x02>; /* enable mask */
+                       microchip,status-bit-mask = <0x10>; /* status-mask*/
+               };
+
+               FRCDIV:frcdiv_clk {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-frcdivclk";
+                       clocks = <&FRC>;
+                       clock-output-names = "frcdiv_clk";
+               };
+
+               /* System PLL clock */
+               SYSPLL:spll_clk@020 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-syspll";
+                       reg = <0x020 0x10>, /* SPLL register */
+                             <0x1d0 0x10>; /* CLKSTAT register */
+                       clocks = <&POSC>, <&FRC>;
+                       clock-output-names = "sys_pll";
+                       microchip,status-bit-mask = <0x80>; /* SPLLRDY */
+               };
+
+               /* system clock; mux with postdiv & slew */
+               SYSCLK:sys_clk@1c0 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-sysclk-v2";
+                       reg = <0x1c0 0x04>; /* SLEWCON */
+                       clocks = <&FRCDIV>, <&SYSPLL>, <&POSC>, <&SOSC>,
+                                <&LPRC>, <&FRCDIV>;
+                       microchip,clock-indices = <0>, <1>, <2>, <4>,
+                                                 <5>, <7>;
+                       clock-output-names = "sys_clk";
+               };
+
+               /* Peripheral bus1 clock */
+               PBCLK1:pb1_clk@140 {
+                       reg = <0x140 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb1_clk";
+                       /* used by system modules, not gateable */
+                       microchip,ignore-unused;
+               };
+
+               /* Peripheral bus2 clock */
+               PBCLK2:pb2_clk@150 {
+                       reg = <0x150 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb2_clk";
+                       /* avoid gating even if unused */
+                       microchip,ignore-unused;
+               };
+
+               /* Peripheral bus3 clock */
+               PBCLK3:pb3_clk@160 {
+                       reg = <0x160 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb3_clk";
+               };
+
+               /* Peripheral bus4 clock(I/O ports, GPIO) */
+               PBCLK4:pb4_clk@170 {
+                       reg = <0x170 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb4_clk";
+               };
+
+               /* Peripheral bus clock */
+               PBCLK5:pb5_clk@180 {
+                       reg = <0x180 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb5_clk";
+               };
+
+               /* Peripheral Bus6 clock; */
+               PBCLK6:pb6_clk@190 {
+                       reg = <0x190 0x10>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       #clock-cells = <0>;
+               };
+
+               /* Peripheral bus7 clock */
+               PBCLK7:pb7_clk@1a0 {
+                       reg = <0x1a0 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       /* CPU is driven by this clock; so named */
+                       clock-output-names = "cpu_clk";
+                       clocks = <&SYSCLK>;
+               };
+
+               /* Reference Oscillator clock for SPI/I2S */
+               REFCLKO1:refo1_clk@80 {
+                       reg = <0x080 0x20>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       clock-output-names = "refo1_clk";
+               };
+
+               /* Reference Oscillator clock for SQI */
+               REFCLKO2:refo2_clk@a0 {
+                       reg = <0x0a0 0x20>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       clock-output-names = "refo2_clk";
+               };
+
+               /* Reference Oscillator clock, ADC */
+               REFCLKO3:refo3_clk@c0 {
+                       reg = <0x0c0 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo3_clk";
+               };
+
+               /* Reference Oscillator clock */
+               REFCLKO4:refo4_clk@e0 {
+                       reg = <0x0e0 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo4_clk";
+               };
+
+               /* Reference Oscillator clock, LCD */
+               REFCLKO5:refo5_clk@100 {
+                       reg = <0x100 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>,<&PBCLK1>,<&POSC>,<&FRC>,<&LPRC>,
+                                <&SOSC>,<&SYSPLL>,<&REFIx>,<&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo5_clk";
+               };
+       };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda.dtsi b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
new file mode 100644 (file)
index 0000000..ad9e331
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda-clk.dtsi"
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&evic>;
+
+       aliases {
+               gpio0 = &gpio0;
+               gpio1 = &gpio1;
+               gpio2 = &gpio2;
+               gpio3 = &gpio3;
+               gpio4 = &gpio4;
+               gpio5 = &gpio5;
+               gpio6 = &gpio6;
+               gpio7 = &gpio7;
+               gpio8 = &gpio8;
+               gpio9 = &gpio9;
+               serial0 = &uart1;
+               serial1 = &uart2;
+               serial2 = &uart3;
+               serial3 = &uart4;
+               serial4 = &uart5;
+               serial5 = &uart6;
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       compatible = "mti,mips14KEc";
+                       device_type = "cpu";
+               };
+       };
+
+       soc {
+               compatible = "microchip,pic32mzda-infra";
+               interrupts = <0 IRQ_TYPE_EDGE_RISING>;
+       };
+
+       evic: interrupt-controller@1f810000 {
+               compatible = "microchip,pic32mzda-evic";
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               reg = <0x1f810000 0x1000>;
+               microchip,external-irqs = <3 8 13 18 23>;
+       };
+
+       pic32_pinctrl: pinctrl@1f801400{
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "microchip,pic32mzda-pinctrl";
+               reg = <0x1f801400 0x400>;
+               clocks = <&PBCLK1>;
+       };
+
+       /* PORTA */
+       gpio0: gpio0@1f860000 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860000 0x100>;
+               interrupts = <118 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <0>;
+               gpio-ranges = <&pic32_pinctrl 0 0 16>;
+       };
+
+       /* PORTB */
+       gpio1: gpio1@1f860100 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860100 0x100>;
+               interrupts = <119 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <1>;
+               gpio-ranges = <&pic32_pinctrl 0 16 16>;
+       };
+
+       /* PORTC */
+       gpio2: gpio2@1f860200 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860200 0x100>;
+               interrupts = <120 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <2>;
+               gpio-ranges = <&pic32_pinctrl 0 32 16>;
+       };
+
+       /* PORTD */
+       gpio3: gpio3@1f860300 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860300 0x100>;
+               interrupts = <121 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <3>;
+               gpio-ranges = <&pic32_pinctrl 0 48 16>;
+       };
+
+       /* PORTE */
+       gpio4: gpio4@1f860400 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860400 0x100>;
+               interrupts = <122 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <4>;
+               gpio-ranges = <&pic32_pinctrl 0 64 16>;
+       };
+
+       /* PORTF */
+       gpio5: gpio5@1f860500 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860500 0x100>;
+               interrupts = <123 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <5>;
+               gpio-ranges = <&pic32_pinctrl 0 80 16>;
+       };
+
+       /* PORTG */
+       gpio6: gpio6@1f860600 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860600 0x100>;
+               interrupts = <124 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <6>;
+               gpio-ranges = <&pic32_pinctrl 0 96 16>;
+       };
+
+       /* PORTH */
+       gpio7: gpio7@1f860700 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860700 0x100>;
+               interrupts = <125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <7>;
+               gpio-ranges = <&pic32_pinctrl 0 112 16>;
+       };
+
+       /* PORTI does not exist */
+
+       /* PORTJ */
+       gpio8: gpio8@1f860800 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860800 0x100>;
+               interrupts = <126 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <8>;
+               gpio-ranges = <&pic32_pinctrl 0 128 16>;
+       };
+
+       /* PORTK */
+       gpio9: gpio9@1f860900 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860900 0x100>;
+               interrupts = <127 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <9>;
+               gpio-ranges = <&pic32_pinctrl 0 144 16>;
+       };
+
+       sdhci: sdhci@1f8ec000 {
+               compatible = "microchip,pic32mzda-sdhci";
+               reg = <0x1f8ec000 0x100>;
+               interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&REFCLKO4>, <&PBCLK5>;
+               clock-names = "base_clk", "sys_clk";
+               bus-width = <4>;
+               cap-sd-highspeed;
+               status = "disabled";
+       };
+
+       uart1: serial@1f822000 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822000 0x50>;
+               interrupts = <112 IRQ_TYPE_LEVEL_HIGH>,
+                       <113 IRQ_TYPE_LEVEL_HIGH>,
+                       <114 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart2: serial@1f822200 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822200 0x50>;
+               interrupts = <145 IRQ_TYPE_LEVEL_HIGH>,
+                       <146 IRQ_TYPE_LEVEL_HIGH>,
+                       <147 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart3: serial@1f822400 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822400 0x50>;
+               interrupts = <157 IRQ_TYPE_LEVEL_HIGH>,
+                       <158 IRQ_TYPE_LEVEL_HIGH>,
+                       <159 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart4: serial@1f822600 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822600 0x50>;
+               interrupts = <170 IRQ_TYPE_LEVEL_HIGH>,
+                       <171 IRQ_TYPE_LEVEL_HIGH>,
+                       <172 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart5: serial@1f822800 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822800 0x50>;
+               interrupts = <179 IRQ_TYPE_LEVEL_HIGH>,
+                       <180 IRQ_TYPE_LEVEL_HIGH>,
+                       <181 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart6: serial@1f822A00 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822A00 0x50>;
+               interrupts = <188 IRQ_TYPE_LEVEL_HIGH>,
+                       <189 IRQ_TYPE_LEVEL_HIGH>,
+                       <190 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda_sk.dts b/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
new file mode 100644 (file)
index 0000000..5d434a5
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda.dtsi"
+
+/ {
+       compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda";
+       model = "Microchip PIC32MZDA Starter Kit";
+
+       memory {
+               device_type = "memory";
+               reg = <0x08000000 0x08000000>;
+       };
+
+       chosen {
+               bootargs = "earlyprintk=ttyPIC1,115200n8r console=ttyPIC1,115200n8";
+       };
+
+       leds0 {
+               compatible = "gpio-leds";
+               pinctrl-names = "default";
+               pinctrl-0 = <&user_leds_s0>;
+
+               led@1 {
+                       label = "pic32mzda_sk:red:led1";
+                       gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+                       linux,default-trigger = "heartbeat";
+               };
+
+               led@2 {
+                       label = "pic32mzda_sk:yellow:led2";
+                       gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
+                       linux,default-trigger = "mmc0";
+               };
+
+               led@3 {
+                       label = "pic32mzda_sk:green:led3";
+                       gpios = <&gpio7 2 GPIO_ACTIVE_HIGH>;
+                       default-state = "on";
+               };
+       };
+
+       keys0 {
+               compatible = "gpio-keys";
+               pinctrl-0 = <&user_buttons_s0>;
+               pinctrl-names = "default";
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               button@sw1 {
+                       label = "ESC";
+                       linux,code = <1>;
+                       gpios = <&gpio1 12 0>;
+               };
+
+               button@sw2 {
+                       label = "Home";
+                       linux,code = <102>;
+                       gpios = <&gpio1 13 0>;
+               };
+
+               button@sw3 {
+                       label = "Menu";
+                       linux,code = <139>;
+                       gpios = <&gpio1 14 0>;
+               };
+       };
+};
+
+&uart2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart2>;
+       status = "okay";
+};
+
+&uart4 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart4>;
+       status = "okay";
+};
+
+&sdhci {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_sdhc1>;
+       status = "okay";
+       assigned-clocks = <&REFCLKO2>,<&REFCLKO4>,<&REFCLKO5>;
+       assigned-clock-rates = <50000000>,<25000000>,<40000000>;
+};
+
+&pic32_pinctrl {
+
+       pinctrl_sdhc1: sdhc1_pins0 {
+               pins = "A6", "D4", "G13", "G12", "G14", "A7", "A0";
+               microchip,digital;
+       };
+
+       user_leds_s0: user_leds_s0 {
+               pins = "H0", "H1", "H2";
+               output-low;
+               microchip,digital;
+       };
+
+       user_buttons_s0: user_buttons_s0 {
+               pins = "B12", "B13", "B14";
+               microchip,digital;
+               input-enable;
+               bias-pull-up;
+       };
+
+       pinctrl_uart2: pinctrl_uart2 {
+               uart2-tx {
+                       pins = "G9";
+                       function = "U2TX";
+                       microchip,digital;
+                       output-high;
+               };
+               uart2-rx {
+                       pins = "B0";
+                       function = "U2RX";
+                       microchip,digital;
+                       input-enable;
+               };
+       };
+
+       pinctrl_uart4: uart4-0 {
+               uart4-tx {
+                       pins = "C3";
+                       function = "U4TX";
+                       microchip,digital;
+                       output-high;
+               };
+               uart4-rx {
+                       pins = "E8";
+                       function = "U4RX";
+                       microchip,digital;
+                       input-enable;
+               };
+       };
+};
index 13d0439496a9d28d500b2d3e37b585e3f4541335..3ad4ba9b12fd6a649eb07b201414f18ba60b83a7 100644 (file)
                        };
                };
 
+               usb@1b000100 {
+                       compatible = "qca,ar7100-ehci", "generic-ehci";
+                       reg = <0x1b000100 0x100>;
+
+                       interrupts = <3>;
+                       resets = <&rst 5>;
+
+                       has-transaction-translator;
+
+                       phy-names = "usb";
+                       phys = <&usb_phy>;
+
+                       status = "disabled";
+               };
+
                spi@1f000000 {
                        compatible = "qca,ar9132-spi", "qca,ar7100-spi";
                        reg = <0x1f000000 0x10>;
                        #size-cells = <0>;
                };
        };
+
+       usb_phy: usb-phy {
+               compatible = "qca,ar7100-usb-phy";
+
+               reset-names = "usb-phy", "usb-suspend-override";
+               resets = <&rst 4>, <&rst 3>;
+
+               #phy-cells = <0>;
+
+               status = "disabled";
+       };
 };
index 003015ab34e7cffcde16231b6c9b871862a697a7..e535ee3c26a4e8402a5da1dcc8c0f748205dacca 100644 (file)
                        };
                };
 
+               usb@1b000100 {
+                       status = "okay";
+               };
+
                spi@1f000000 {
                        status = "okay";
                        num-cs = <1>;
                };
        };
 
+       usb-phy {
+               status = "okay";
+       };
+
        gpio-keys {
                compatible = "gpio-keys-polled";
                #address-cells = <1>;
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
new file mode 100644 (file)
index 0000000..52192c6
--- /dev/null
@@ -0,0 +1,89 @@
+CONFIG_MACH_PIC32=y
+CONFIG_DTB_PIC32_MZDA_SK=y
+CONFIG_HZ_100=y
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_SECCOMP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_RELAY=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_BINFMT_MISC=m
+# CONFIG_SUSPEND is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_INPUT_LEDS=m
+CONFIG_INPUT_POLLDEV=y
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_EVBUG=m
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_GPIO_POLLED=m
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_PIC32=y
+CONFIG_SERIAL_PIC32_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_RAW_DRIVER=m
+CONFIG_GPIO_SYSFS=y
+# CONFIG_HWMON is not set
+CONFIG_HIDRAW=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_MICROCHIP_PIC32=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_ONESHOT=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_GPIO=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+# CONFIG_MIPS_PLATFORM_DEVICES is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_FSCACHE=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
index 06b9bc7ea14b1da802dc29090174230cc2e1d613..c3212ff2672343fcb52315403c73d16759be77b1 100644 (file)
 #ifndef __ASM_CACHEOPS_H
 #define __ASM_CACHEOPS_H
 
+/*
+ * Most cache ops are split into a 2 bit field identifying the cache, and a 3
+ * bit field identifying the cache operation.
+ */
+#define CacheOp_Cache                  0x03
+#define CacheOp_Op                     0x1c
+
+#define Cache_I                                0x00
+#define Cache_D                                0x01
+#define Cache_T                                0x02
+#define Cache_S                                0x03
+
+#define Index_Writeback_Inv            0x00
+#define Index_Load_Tag                 0x04
+#define Index_Store_Tag                        0x08
+#define Hit_Invalidate                 0x10
+#define Hit_Writeback_Inv              0x14    /* not with Cache_I though */
+#define Hit_Writeback                  0x18
+
 /*
  * Cache Operations available on all MIPS processors with R4000-style caches
  */
-#define Index_Invalidate_I             0x00
-#define Index_Writeback_Inv_D          0x01
-#define Index_Load_Tag_I               0x04
-#define Index_Load_Tag_D               0x05
-#define Index_Store_Tag_I              0x08
-#define Index_Store_Tag_D              0x09
-#define Hit_Invalidate_I               0x10
-#define Hit_Invalidate_D               0x11
-#define Hit_Writeback_Inv_D            0x15
+#define Index_Invalidate_I             (Cache_I | Index_Writeback_Inv)
+#define Index_Writeback_Inv_D          (Cache_D | Index_Writeback_Inv)
+#define Index_Load_Tag_I               (Cache_I | Index_Load_Tag)
+#define Index_Load_Tag_D               (Cache_D | Index_Load_Tag)
+#define Index_Store_Tag_I              (Cache_I | Index_Store_Tag)
+#define Index_Store_Tag_D              (Cache_D | Index_Store_Tag)
+#define Hit_Invalidate_I               (Cache_I | Hit_Invalidate)
+#define Hit_Invalidate_D               (Cache_D | Hit_Invalidate)
+#define Hit_Writeback_Inv_D            (Cache_D | Hit_Writeback_Inv)
 
 /*
  * R4000-specific cacheops
  */
-#define Create_Dirty_Excl_D            0x0d
-#define Fill                           0x14
-#define Hit_Writeback_I                        0x18
-#define Hit_Writeback_D                        0x19
+#define Create_Dirty_Excl_D            (Cache_D | 0x0c)
+#define Fill                           (Cache_I | 0x14)
+#define Hit_Writeback_I                        (Cache_I | Hit_Writeback)
+#define Hit_Writeback_D                        (Cache_D | Hit_Writeback)
 
 /*
  * R4000SC and R4400SC-specific cacheops
  */
-#define Index_Invalidate_SI            0x02
-#define Index_Writeback_Inv_SD         0x03
-#define Index_Load_Tag_SI              0x06
-#define Index_Load_Tag_SD              0x07
-#define Index_Store_Tag_SI             0x0A
-#define Index_Store_Tag_SD             0x0B
-#define Create_Dirty_Excl_SD           0x0f
-#define Hit_Invalidate_SI              0x12
-#define Hit_Invalidate_SD              0x13
-#define Hit_Writeback_Inv_SD           0x17
-#define Hit_Writeback_SD               0x1b
-#define Hit_Set_Virtual_SI             0x1e
-#define Hit_Set_Virtual_SD             0x1f
+#define Cache_SI                       0x02
+#define Cache_SD                       0x03
+
+#define Index_Invalidate_SI            (Cache_SI | Index_Writeback_Inv)
+#define Index_Writeback_Inv_SD         (Cache_SD | Index_Writeback_Inv)
+#define Index_Load_Tag_SI              (Cache_SI | Index_Load_Tag)
+#define Index_Load_Tag_SD              (Cache_SD | Index_Load_Tag)
+#define Index_Store_Tag_SI             (Cache_SI | Index_Store_Tag)
+#define Index_Store_Tag_SD             (Cache_SD | Index_Store_Tag)
+#define Create_Dirty_Excl_SD           (Cache_SD | 0x0c)
+#define Hit_Invalidate_SI              (Cache_SI | Hit_Invalidate)
+#define Hit_Invalidate_SD              (Cache_SD | Hit_Invalidate)
+#define Hit_Writeback_Inv_SD           (Cache_SD | Hit_Writeback_Inv)
+#define Hit_Writeback_SD               (Cache_SD | Hit_Writeback)
+#define Hit_Set_Virtual_SI             (Cache_SI | 0x1c)
+#define Hit_Set_Virtual_SD             (Cache_SD | 0x1c)
 
 /*
  * R5000-specific cacheops
  */
-#define R5K_Page_Invalidate_S          0x17
+#define R5K_Page_Invalidate_S          (Cache_S | 0x14)
 
 /*
  * RM7000-specific cacheops
  */
-#define Page_Invalidate_T              0x16
-#define Index_Store_Tag_T              0x0a
-#define Index_Load_Tag_T               0x06
+#define Page_Invalidate_T              (Cache_T | 0x14)
+#define Index_Store_Tag_T              (Cache_T | Index_Store_Tag)
+#define Index_Load_Tag_T               (Cache_T | Index_Load_Tag)
 
 /*
  * R10000-specific cacheops
  * Cacheops 0x02, 0x06, 0x0a, 0x0c-0x0e, 0x16, 0x1a and 0x1e are unused.
  * Most of the _S cacheops are identical to the R4000SC _SD cacheops.
  */
-#define Index_Writeback_Inv_S          0x03
-#define Index_Load_Tag_S               0x07
-#define Index_Store_Tag_S              0x0B
-#define Hit_Invalidate_S               0x13
+#define Index_Writeback_Inv_S          (Cache_S | Index_Writeback_Inv)
+#define Index_Load_Tag_S               (Cache_S | Index_Load_Tag)
+#define Index_Store_Tag_S              (Cache_S | Index_Store_Tag)
+#define Hit_Invalidate_S               (Cache_S | Hit_Invalidate)
 #define Cache_Barrier                  0x14
-#define Hit_Writeback_Inv_S            0x17
-#define Index_Load_Data_I              0x18
-#define Index_Load_Data_D              0x19
-#define Index_Load_Data_S              0x1b
-#define Index_Store_Data_I             0x1c
-#define Index_Store_Data_D             0x1d
-#define Index_Store_Data_S             0x1f
+#define Hit_Writeback_Inv_S            (Cache_S | Hit_Writeback_Inv)
+#define Index_Load_Data_I              (Cache_I | 0x18)
+#define Index_Load_Data_D              (Cache_D | 0x18)
+#define Index_Load_Data_S              (Cache_S | 0x18)
+#define Index_Store_Data_I             (Cache_I | 0x1c)
+#define Index_Store_Data_D             (Cache_D | 0x1c)
+#define Index_Store_Data_S             (Cache_S | 0x1c)
 
 /*
  * Loongson2-specific cacheops
  */
-#define Hit_Invalidate_I_Loongson2     0x00
+#define Hit_Invalidate_I_Loongson2     (Cache_I | 0x00)
 
 #endif /* __ASM_CACHEOPS_H */
index d1e04c943f5f7c7d9ec232851e14d9b5c4215496..eeec8c8e2da2ed59e3d89d099721209203e9c243 100644 (file)
 # define cpu_has_small_pages   (cpu_data[0].options & MIPS_CPU_SP)
 #endif
 
+#ifndef cpu_has_nan_legacy
+#define cpu_has_nan_legacy     (cpu_data[0].options & MIPS_CPU_NAN_LEGACY)
+#endif
+#ifndef cpu_has_nan_2008
+#define cpu_has_nan_2008       (cpu_data[0].options & MIPS_CPU_NAN_2008)
+#endif
+
 #endif /* __ASM_CPU_FEATURES_H */
index 82ad15f11049284c2f347fc66b68ea5091fb810d..a97ca97285ecf1dd897c9c7a9610209e15ad70c7 100644 (file)
@@ -386,6 +386,8 @@ enum cpu_type_enum {
 #define MIPS_CPU_BP_GHIST      0x8000000000ull /* R12K+ Branch Prediction Global History */
 #define MIPS_CPU_SP            0x10000000000ull /* Small (1KB) page support */
 #define MIPS_CPU_FTLB          0x20000000000ull /* CPU has Fixed-page-size TLB */
+#define MIPS_CPU_NAN_LEGACY    0x40000000000ull /* Legacy NaN implemented */
+#define MIPS_CPU_NAN_2008      0x80000000000ull /* 2008 NaN implemented */
 
 /*
  * CPU ASE encodings
index b01a6ff468e00aab5d185a9dd53023101b34f2b7..e090fc388e02483faf1291e2f4844f823aec5bc2 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/fs.h>
 #include <uapi/linux/elf.h>
 
-#include <asm/cpu-info.h>
 #include <asm/current.h>
 
 /* ELF header e_flags defines. */
@@ -44,6 +43,7 @@
 #define EF_MIPS_OPTIONS_FIRST  0x00000080
 #define EF_MIPS_32BITMODE      0x00000100
 #define EF_MIPS_FP64           0x00000200
+#define EF_MIPS_NAN2008                0x00000400
 #define EF_MIPS_ABI            0x0000f000
 #define EF_MIPS_ARCH           0xf0000000
 
@@ -227,7 +227,7 @@ struct mips_elf_abiflags_v0 {
        int __res = 1;                                                  \
        struct elfhdr *__h = (hdr);                                     \
                                                                        \
-       if (__h->e_machine != EM_MIPS)                                  \
+       if (!mips_elf_check_machine(__h))                               \
                __res = 0;                                              \
        if (__h->e_ident[EI_CLASS] != ELFCLASS32)                       \
                __res = 0;                                              \
@@ -258,7 +258,7 @@ struct mips_elf_abiflags_v0 {
        int __res = 1;                                                  \
        struct elfhdr *__h = (hdr);                                     \
                                                                        \
-       if (__h->e_machine != EM_MIPS)                                  \
+       if (!mips_elf_check_machine(__h))                               \
                __res = 0;                                              \
        if (__h->e_ident[EI_CLASS] != ELFCLASS64)                       \
                __res = 0;                                              \
@@ -285,6 +285,11 @@ struct mips_elf_abiflags_v0 {
 
 #endif /* !defined(ELF_ARCH) */
 
+#define mips_elf_check_machine(x) ((x)->e_machine == EM_MIPS)
+
+#define vmcore_elf32_check_arch mips_elf_check_machine
+#define vmcore_elf64_check_arch mips_elf_check_machine
+
 struct mips_abi;
 
 extern struct mips_abi mips_abi;
@@ -305,7 +310,7 @@ do {                                                                        \
                                                                        \
        current->thread.abi = &mips_abi;                                \
                                                                        \
-       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
+       mips_set_personality_nan(state);                                \
 } while (0)
 
 #endif /* CONFIG_32BIT */
@@ -367,7 +372,7 @@ do {                                                                        \
        else                                                            \
                current->thread.abi = &mips_abi;                        \
                                                                        \
-       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
+       mips_set_personality_nan(state);                                \
                                                                        \
        p = personality(current->personality);                          \
        if (p != PER_LINUX32 && p != PER_LINUX)                         \
@@ -432,6 +437,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
                                       int uses_interp);
 
 struct arch_elf_state {
+       int nan_2008;
        int fp_abi;
        int interp_fp_abi;
        int overall_fp_mode;
@@ -440,17 +446,23 @@ struct arch_elf_state {
 #define MIPS_ABI_FP_UNKNOWN    (-1)    /* Unknown FP ABI (kernel internal) */
 
 #define INIT_ARCH_ELF_STATE {                  \
+       .nan_2008 = -1,                         \
        .fp_abi = MIPS_ABI_FP_UNKNOWN,          \
        .interp_fp_abi = MIPS_ABI_FP_UNKNOWN,   \
        .overall_fp_mode = -1,                  \
 }
 
+/* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
+extern bool mips_use_nan_legacy;
+extern bool mips_use_nan_2008;
+
 extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
                            bool is_interp, struct arch_elf_state *state);
 
-extern int arch_check_elf(void *ehdr, bool has_interpreter,
+extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
                          struct arch_elf_state *state);
 
+extern void mips_set_personality_nan(struct arch_elf_state *state);
 extern void mips_set_personality_fp(struct arch_elf_state *state);
 
 #endif /* _ASM_ELF_H */
index 9cbf383b8834c7551820930b0dc98a66895012fa..f06f97bd62df906bf4160594119dec4d52b028eb 100644 (file)
@@ -179,6 +179,10 @@ static inline void lose_fpu_inatomic(int save, struct task_struct *tsk)
                if (save)
                        _save_fp(tsk);
                __disable_fpu();
+       } else {
+               /* FPU should not have been left enabled with no owner */
+               WARN(read_c0_status() & ST0_CU1,
+                    "Orphaned FPU left enabled");
        }
        KSTK_STATUS(tsk) &= ~ST0_CU1;
        clear_tsk_thread_flag(tsk, TIF_USEDFPU);
index 2f021cdfba4f8fc60a5348f7822f0014d02a3eff..3225c3c0724b3642392bad7e9c395c9451afe555 100644 (file)
@@ -79,7 +79,7 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 /*
  * Break instruction with special math emu break code set
  */
-#define BREAK_MATH (0x0000000d | (BRK_MEMU << 16))
+#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
 
 #define SIGNALLING_NAN 0x7ff800007ff80000LL
 
index d10fd80dbb7e96b898d2230c2d0f5112d02c3bc1..2b4dc7ad53b8a32bc139dd33d15cb5bb533f8d85 100644 (file)
@@ -275,6 +275,7 @@ static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long si
  */
 #define ioremap_cachable(offset, size)                                 \
        __ioremap_mode((offset), (size), _page_cachable_default)
+#define ioremap_cache ioremap_cachable
 
 /*
  * These two are MIPS specific ioremap variant.         ioremap_cacheable_cow
index e7b138b4b3d37f9f038aaec933b5040f0e7c3b2a..65c351e328cc932af4aedca946ee19f9bd43f8c3 100644 (file)
@@ -84,41 +84,11 @@ static inline void arch_local_irq_restore(unsigned long flags)
        : "memory");
 }
 
-static inline void __arch_local_irq_restore(unsigned long flags)
-{
-       __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-#if defined(CONFIG_IRQ_MIPS_CPU)
-       /*
-        * Slow, but doesn't suffer from a relatively unlikely race
-        * condition we're having since days 1.
-        */
-       "       beqz    %[flags], 1f                                    \n"
-       "       di                                                      \n"
-       "       ei                                                      \n"
-       "1:                                                             \n"
-#else
-       /*
-        * Fast, dangerous.  Life is fun, life is good.
-        */
-       "       mfc0    $1, $12                                         \n"
-       "       ins     $1, %[flags], 0, 1                              \n"
-       "       mtc0    $1, $12                                         \n"
-#endif
-       "       " __stringify(__irq_disable_hazard) "                   \n"
-       "       .set    pop                                             \n"
-       : [flags] "=r" (flags)
-       : "0" (flags)
-       : "memory");
-}
 #else
 /* Functions that require preempt_{dis,en}able() are in mips-atomic.c */
 void arch_local_irq_disable(void);
 unsigned long arch_local_irq_save(void);
 void arch_local_irq_restore(unsigned long flags);
-void __arch_local_irq_restore(unsigned long flags);
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 static inline void arch_local_irq_enable(void)
index 7c191443c7ea199f876a2c495b2b6d3f41bfc1d5..f6b12790716c071db8a3ce17971dd146c30025f5 100644 (file)
@@ -58,7 +58,7 @@
 #define KVM_MAX_VCPUS          1
 #define KVM_USER_MEM_SLOTS     8
 /* memory slots that does not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS  0
+#define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 #define KVM_INVALID_INST               0xdeadbeef
 #define KVM_INVALID_ADDR               0xdeadbeef
 
-#define KVM_MALTA_GUEST_RTC_ADDR       0xb8000070UL
-
-#define GUEST_TICKS_PER_JIFFY          (40000000/HZ)
-#define MS_TO_NS(x)                    (x * 1E6L)
-
-#define CAUSEB_DC                      27
-#define CAUSEF_DC                      (_ULCAST_(1) << 27)
-
 extern atomic_t kvm_mips_instance;
 extern kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
@@ -289,34 +281,6 @@ enum mips_mmu_types {
        MMU_TYPE_R8000
 };
 
-/*
- * Trap codes
- */
-#define T_INT                  0       /* Interrupt pending */
-#define T_TLB_MOD              1       /* TLB modified fault */
-#define T_TLB_LD_MISS          2       /* TLB miss on load or ifetch */
-#define T_TLB_ST_MISS          3       /* TLB miss on a store */
-#define T_ADDR_ERR_LD          4       /* Address error on a load or ifetch */
-#define T_ADDR_ERR_ST          5       /* Address error on a store */
-#define T_BUS_ERR_IFETCH       6       /* Bus error on an ifetch */
-#define T_BUS_ERR_LD_ST                7       /* Bus error on a load or store */
-#define T_SYSCALL              8       /* System call */
-#define T_BREAK                        9       /* Breakpoint */
-#define T_RES_INST             10      /* Reserved instruction exception */
-#define T_COP_UNUSABLE         11      /* Coprocessor unusable */
-#define T_OVFLOW               12      /* Arithmetic overflow */
-
-/*
- * Trap definitions added for r4000 port.
- */
-#define T_TRAP                 13      /* Trap instruction */
-#define T_VCEI                 14      /* Virtual coherency exception */
-#define T_MSAFPE               14      /* MSA floating point exception */
-#define T_FPE                  15      /* Floating point exception */
-#define T_MSADIS               21      /* MSA disabled exception */
-#define T_WATCH                        23      /* Watch address reference */
-#define T_VCED                 31      /* Virtual coherency data */
-
 /* Resume Flags */
 #define RESUME_FLAG_DR         (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST       (1<<1)  /* Resume host? */
@@ -686,7 +650,6 @@ extern void kvm_mips_dump_host_tlbs(void);
 extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
 extern void kvm_mips_flush_host_tlb(int skip_kseg0);
 extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
-extern int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index);
 
 extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
                                     unsigned long entryhi);
index 4eee221b0cf0bce93fa7adb7b634c9feebcdf21d..2b3487213d1e932247077d8b3f67ba28612f2683 100644 (file)
@@ -115,6 +115,7 @@ static inline int soc_is_qca955x(void)
        return soc_is_qca9556() || soc_is_qca9558();
 }
 
+void ath79_ddr_wb_flush(unsigned int reg);
 void ath79_ddr_set_pci_windows(void);
 
 extern void __iomem *ath79_pll_base;
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
deleted file mode 100644 (file)
index 1e6b587..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef __BCM963XX_TAG_H
-#define __BCM963XX_TAG_H
-
-#define TAGVER_LEN             4       /* Length of Tag Version */
-#define TAGLAYOUT_LEN          4       /* Length of FlashLayoutVer */
-#define SIG1_LEN               20      /* Company Signature 1 Length */
-#define SIG2_LEN               14      /* Company Signature 2 Length */
-#define BOARDID_LEN            16      /* Length of BoardId */
-#define ENDIANFLAG_LEN         2       /* Endian Flag Length */
-#define CHIPID_LEN             6       /* Chip Id Length */
-#define IMAGE_LEN              10      /* Length of Length Field */
-#define ADDRESS_LEN            12      /* Length of Address field */
-#define DUALFLAG_LEN           2       /* Dual Image flag Length */
-#define INACTIVEFLAG_LEN       2       /* Inactie Flag Length */
-#define RSASIG_LEN             20      /* Length of RSA Signature in tag */
-#define TAGINFO1_LEN           30      /* Length of vendor information field1 in tag */
-#define FLASHLAYOUTVER_LEN     4       /* Length of Flash Layout Version String tag */
-#define TAGINFO2_LEN           16      /* Length of vendor information field2 in tag */
-#define ALTTAGINFO_LEN         54      /* Alternate length for vendor information; Pirelli */
-
-#define NUM_PIRELLI            2
-#define IMAGETAG_CRC_START     0xFFFFFFFF
-
-#define PIRELLI_BOARDS { \
-       "AGPF-S0", \
-       "DWV-S0", \
-}
-
-/*
- * The broadcom firmware assumes the rootfs starts the image,
- * therefore uses the rootfs start (flash_image_address)
- * to determine where to flash the image.  Since we have the kernel first
- * we have to give it the kernel address, but the crc uses the length
- * associated with this address (root_length), which is added to the kernel
- * length (kernel_length) to determine the length of image to flash and thus
- * needs to be rootfs + deadcode (jffs2 EOF marker)
-*/
-
-struct bcm_tag {
-       /* 0-3: Version of the image tag */
-       char tag_version[TAGVER_LEN];
-       /* 4-23: Company Line 1 */
-       char sig_1[SIG1_LEN];
-       /*  24-37: Company Line 2 */
-       char sig_2[SIG2_LEN];
-       /* 38-43: Chip this image is for */
-       char chip_id[CHIPID_LEN];
-       /* 44-59: Board name */
-       char board_id[BOARDID_LEN];
-       /* 60-61: Map endianness -- 1 BE 0 LE */
-       char big_endian[ENDIANFLAG_LEN];
-       /* 62-71: Total length of image */
-       char total_length[IMAGE_LEN];
-       /* 72-83: Address in memory of CFE */
-       char cfe__address[ADDRESS_LEN];
-       /* 84-93: Size of CFE */
-       char cfe_length[IMAGE_LEN];
-       /* 94-105: Address in memory of image start
-        * (kernel for OpenWRT, rootfs for stock firmware)
-        */
-       char flash_image_start[ADDRESS_LEN];
-       /* 106-115: Size of rootfs */
-       char root_length[IMAGE_LEN];
-       /* 116-127: Address in memory of kernel */
-       char kernel_address[ADDRESS_LEN];
-       /* 128-137: Size of kernel */
-       char kernel_length[IMAGE_LEN];
-       /* 138-139: Unused at the moment */
-       char dual_image[DUALFLAG_LEN];
-       /* 140-141: Unused at the moment */
-       char inactive_flag[INACTIVEFLAG_LEN];
-       /* 142-161: RSA Signature (not used; some vendors may use this) */
-       char rsa_signature[RSASIG_LEN];
-       /* 162-191: Compilation and related information (not used in OpenWrt) */
-       char information1[TAGINFO1_LEN];
-       /* 192-195: Version flash layout */
-       char flash_layout_ver[FLASHLAYOUTVER_LEN];
-       /* 196-199: kernel+rootfs CRC32 */
-       __u32 fskernel_crc;
-       /* 200-215: Unused except on Alice Gate where is is information */
-       char information2[TAGINFO2_LEN];
-       /* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
-       __u32 image_crc;
-       /* 220-223: CRC32 of rootfs partition */
-       __u32 rootfs_crc;
-       /* 224-227: CRC32 of kernel partition */
-       __u32 kernel_crc;
-       /* 228-235: Unused at present */
-       char reserved1[8];
-       /* 236-239: CRC32 of header excluding last 20 bytes */
-       __u32 header_crc;
-       /* 240-255: Unused at present */
-       char reserved2[16];
-};
-
-#endif /* __BCM63XX_TAG_H */
diff --git a/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h b/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..4682308
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+
+/*
+ * CPU feature overrides for PIC32 boards
+ */
+#ifdef CONFIG_CPU_MIPS32
+#define cpu_has_vint           1
+#define cpu_has_veic           0
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_4k_cache       1
+#define cpu_has_fpu            0
+#define cpu_has_counter                1
+#define cpu_has_llsc           1
+#define cpu_has_nofpuex                0
+#define cpu_icache_snoops_remote_store 1
+#endif
+
+#ifdef CONFIG_CPU_MIPS64
+#error This platform does not support 64bit.
+#endif
+
+#endif /* __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-pic32/irq.h b/arch/mips/include/asm/mach-pic32/irq.h
new file mode 100644 (file)
index 0000000..864330c
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_IRQ_H
+#define __ASM_MACH_PIC32_IRQ_H
+
+#define NR_IRQS        256
+#define MIPS_CPU_IRQ_BASE 0
+
+#include_next <irq.h>
+
+#endif /* __ASM_MACH_PIC32_IRQ_H */
diff --git a/arch/mips/include/asm/mach-pic32/pic32.h b/arch/mips/include/asm/mach-pic32/pic32.h
new file mode 100644 (file)
index 0000000..ce52e91
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_H
+#define _ASM_MACH_PIC32_H
+
+#include <linux/io.h>
+
+/*
+ * PIC32 register offsets for SET/CLR/INV where supported.
+ */
+#define PIC32_CLR(_reg)                ((_reg) + 0x04)
+#define PIC32_SET(_reg)                ((_reg) + 0x08)
+#define PIC32_INV(_reg)                ((_reg) + 0x0C)
+
+/*
+ * PIC32 Base Register Offsets
+ */
+#define PIC32_BASE_CONFIG      0x1f800000
+#define PIC32_BASE_OSC         0x1f801200
+#define PIC32_BASE_RESET       0x1f801240
+#define PIC32_BASE_PPS         0x1f801400
+#define PIC32_BASE_UART                0x1f822000
+#define PIC32_BASE_PORT                0x1f860000
+#define PIC32_BASE_DEVCFG2     0x1fc4ff44
+
+/*
+ * Register unlock sequence required for some register access.
+ */
+void pic32_syskey_unlock_debug(const char *fn, const ulong ln);
+#define pic32_syskey_unlock()  \
+       pic32_syskey_unlock_debug(__func__, __LINE__)
+
+#endif /* _ASM_MACH_PIC32_H */
diff --git a/arch/mips/include/asm/mach-pic32/spaces.h b/arch/mips/include/asm/mach-pic32/spaces.h
new file mode 100644 (file)
index 0000000..046a0a9
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_SPACES_H
+#define _ASM_MACH_PIC32_SPACES_H
+
+#ifdef CONFIG_PIC32MZDA
+#define PHYS_OFFSET    _AC(0x08000000, UL)
+#define UNCAC_BASE     _AC(0xa8000000, UL)
+#endif
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_MACH_PIC32_SPACES_H */
diff --git a/arch/mips/include/asm/mach-ralink/irq.h b/arch/mips/include/asm/mach-ralink/irq.h
new file mode 100644 (file)
index 0000000..4321865
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef __ASM_MACH_RALINK_IRQ_H
+#define __ASM_MACH_RALINK_IRQ_H
+
+#define GIC_NUM_INTRS  64
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h
new file mode 100644 (file)
index 0000000..610b61e
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _MT7621_REGS_H_
+#define _MT7621_REGS_H_
+
+#define MT7621_PALMBUS_BASE            0x1C000000
+#define MT7621_PALMBUS_SIZE            0x03FFFFFF
+
+#define MT7621_SYSC_BASE               0x1E000000
+
+#define SYSC_REG_CHIP_NAME0            0x00
+#define SYSC_REG_CHIP_NAME1            0x04
+#define SYSC_REG_CHIP_REV              0x0c
+#define SYSC_REG_SYSTEM_CONFIG0                0x10
+#define SYSC_REG_SYSTEM_CONFIG1                0x14
+
+#define CHIP_REV_PKG_MASK              0x1
+#define CHIP_REV_PKG_SHIFT             16
+#define CHIP_REV_VER_MASK              0xf
+#define CHIP_REV_VER_SHIFT             8
+#define CHIP_REV_ECO_MASK              0xf
+
+#define MT7621_DRAM_BASE                0x0
+#define MT7621_DDR2_SIZE_MIN           32
+#define MT7621_DDR2_SIZE_MAX           256
+
+#define MT7621_CHIP_NAME0              0x3637544D
+#define MT7621_CHIP_NAME1              0x20203132
+
+#define MIPS_GIC_IRQ_BASE           (MIPS_CPU_IRQ_BASE + 8)
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..15db1b3
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Ralink MT7621 specific CPU feature overrides
+ *
+ * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
+ * Copyright (C) 2015 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This file was derived from: include/asm-mips/cpu-features.h
+ *     Copyright (C) 2003, 2004 Ralf Baechle
+ *     Copyright (C) 2004 Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+#ifndef _MT7621_CPU_FEATURE_OVERRIDES_H
+#define _MT7621_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_3k_cache       0
+#define cpu_has_4k_cache       1
+#define cpu_has_tx39_cache     0
+#define cpu_has_sb1_cache      0
+#define cpu_has_fpu            0
+#define cpu_has_32fpr          0
+#define cpu_has_counter                1
+#define cpu_has_watch          1
+#define cpu_has_divec          1
+
+#define cpu_has_prefetch       1
+#define cpu_has_ejtag          1
+#define cpu_has_llsc           1
+
+#define cpu_has_mips16         1
+#define cpu_has_mdmx           0
+#define cpu_has_mips3d         0
+#define cpu_has_smartmips      0
+
+#define cpu_has_mips32r1       1
+#define cpu_has_mips32r2       1
+#define cpu_has_mips64r1       0
+#define cpu_has_mips64r2       0
+
+#define cpu_has_dsp            1
+#define cpu_has_dsp2           0
+#define cpu_has_mipsmt         1
+
+#define cpu_has_64bits         0
+#define cpu_has_64bit_zero_reg 0
+#define cpu_has_64bit_gp_regs  0
+#define cpu_has_64bit_addresses        0
+
+#define cpu_dcache_line_size() 32
+#define cpu_icache_line_size() 32
+
+#define cpu_has_dc_aliases     0
+#define cpu_has_vtag_icache    0
+
+#define cpu_has_rixi           0
+#define cpu_has_tlbinv         0
+#define cpu_has_userlocal      1
+
+#endif /* _MT7621_CPU_FEATURE_OVERRIDES_H */
index 6516e9da51334916b04b6f8689a0857faad5f0ca..b196825a1de9ca1f3b60df1059978569c93ad8c2 100644 (file)
@@ -243,6 +243,10 @@ BUILD_CM_Cx_R_(tcid_8_priority,    0x80)
 #define  CM_GCR_BASE_CMDEFTGT_IOCU0            2
 #define  CM_GCR_BASE_CMDEFTGT_IOCU1            3
 
+/* GCR_RESET_EXT_BASE register fields */
+#define CM_GCR_RESET_EXT_BASE_EVARESET         BIT(31)
+#define CM_GCR_RESET_EXT_BASE_UEB              BIT(30)
+
 /* GCR_ACCESS register fields */
 #define CM_GCR_ACCESS_ACCESSEN_SHF             0
 #define CM_GCR_ACCESS_ACCESSEN_MSK             (_ULCAST_(0xff) << 0)
index 4b89f28047f7118588acf2152ba43e44a5d353e2..1f6ea8352ca90408772f0a7f72f7e213bece28ba 100644 (file)
@@ -52,7 +52,7 @@ do {                                                                  \
        __this_cpu_inc(mipsr2emustats.M);                               \
        err = __get_user(nir, (u32 __user *)regs->cp0_epc);             \
        if (!err) {                                                     \
-               if (nir == BREAK_MATH)                                  \
+               if (nir == BREAK_MATH(0))                               \
                        __this_cpu_inc(mipsr2bdemustats.M);             \
        }                                                               \
        preempt_enable();                                               \
index e43aca183c9918d4138f000c4a385659f8523eae..3ad19ad04d8a282179423e9fcc5ecbb5557d26c7 100644 (file)
 #define CAUSEF_IV              (_ULCAST_(1)   << 23)
 #define CAUSEB_PCI             26
 #define CAUSEF_PCI             (_ULCAST_(1)   << 26)
+#define CAUSEB_DC              27
+#define CAUSEF_DC              (_ULCAST_(1)   << 27)
 #define CAUSEB_CE              28
 #define CAUSEF_CE              (_ULCAST_(3)   << 28)
 #define CAUSEB_TI              30
 #define CAUSEB_BD              31
 #define CAUSEF_BD              (_ULCAST_(1)   << 31)
 
+/*
+ * Cause.ExcCode trap codes.
+ */
+#define EXCCODE_INT            0       /* Interrupt pending */
+#define EXCCODE_MOD            1       /* TLB modified fault */
+#define EXCCODE_TLBL           2       /* TLB miss on load or ifetch */
+#define EXCCODE_TLBS           3       /* TLB miss on a store */
+#define EXCCODE_ADEL           4       /* Address error on a load or ifetch */
+#define EXCCODE_ADES           5       /* Address error on a store */
+#define EXCCODE_IBE            6       /* Bus error on an ifetch */
+#define EXCCODE_DBE            7       /* Bus error on a load or store */
+#define EXCCODE_SYS            8       /* System call */
+#define EXCCODE_BP             9       /* Breakpoint */
+#define EXCCODE_RI             10      /* Reserved instruction exception */
+#define EXCCODE_CPU            11      /* Coprocessor unusable */
+#define EXCCODE_OV             12      /* Arithmetic overflow */
+#define EXCCODE_TR             13      /* Trap instruction */
+#define EXCCODE_MSAFPE         14      /* MSA floating point exception */
+#define EXCCODE_FPE            15      /* Floating point exception */
+#define EXCCODE_TLBRI          19      /* TLB Read-Inhibit exception */
+#define EXCCODE_TLBXI          20      /* TLB Execution-Inhibit exception */
+#define EXCCODE_MSADIS         21      /* MSA disabled exception */
+#define EXCCODE_MDMX           22      /* MDMX unusable exception */
+#define EXCCODE_WATCH          23      /* Watch address reference */
+#define EXCCODE_MCHECK         24      /* Machine check */
+#define EXCCODE_THREAD         25      /* Thread exceptions (MT) */
+#define EXCCODE_DSPDIS         26      /* DSP disabled exception */
+#define EXCCODE_GE             27      /* Virtualized guest exception (VZ) */
+
+/* Implementation specific trap codes used by MIPS cores */
+#define MIPS_EXCCODE_TLBPAR    16      /* TLB parity error exception */
+
 /*
  * Bits in the coprocessor 0 config register.
  */
index 8ebd3f579b848ea5c1a1f6c3cb4d79a2bafbcfac..3ed10a8d78651373a55e4f99c722767cbbc26810 100644 (file)
@@ -128,7 +128,8 @@ static inline int octeon_has_feature(enum octeon_feature feature)
        case OCTEON_FEATURE_PCIE:
                return OCTEON_IS_MODEL(OCTEON_CN56XX)
                        || OCTEON_IS_MODEL(OCTEON_CN52XX)
-                       || OCTEON_IS_MODEL(OCTEON_CN6XXX);
+                       || OCTEON_IS_MODEL(OCTEON_CN6XXX)
+                       || OCTEON_IS_MODEL(OCTEON_CN7XXX);
 
        case OCTEON_FEATURE_SRIO:
                return OCTEON_IS_MODEL(OCTEON_CN63XX)
index 2046c023022406d8a336197db13df1a75b1e8021..21ed7150fec3f4847aef98ac45cd56241b4f50b3 100644 (file)
@@ -33,7 +33,7 @@
 #define PAGE_SHIFT     16
 #endif
 #define PAGE_SIZE      (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK      (~(PAGE_SIZE - 1))
+#define PAGE_MASK      (~((1 << PAGE_SHIFT) - 1))
 
 /*
  * This is used for calculating the real page sizes
index 6995b4a02e2359bf6e2e1b8493bcae55443753f1..9a4fe0133ff1c7d82685d426dc1a91b371a972a6 100644 (file)
@@ -353,7 +353,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
 static inline pte_t pte_mkyoung(pte_t pte)
 {
        pte_val(pte) |= _PAGE_ACCESSED;
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (!(pte_val(pte) & _PAGE_NO_READ))
                pte_val(pte) |= _PAGE_SILENT_READ;
        else
@@ -542,7 +542,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
        pmd_val(pmd) |= _PAGE_ACCESSED;
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (!(pmd_val(pmd) & _PAGE_NO_READ))
                pmd_val(pmd) |= _PAGE_SILENT_READ;
        else
index 3f832c3dd8f5f1cad76461b47ee00a0b7f85722d..041153f5cf93432ea1140c10cd53c3d48e91ab76 100644 (file)
@@ -45,7 +45,7 @@ extern unsigned int vced_count, vcei_count;
  * User space process size: 2GB. This is hardcoded into a few places,
  * so don't change it unless you know what you are doing.
  */
-#define TASK_SIZE      0x7fff8000UL
+#define TASK_SIZE      0x80000000UL
 #endif
 
 #define STACK_TOP_MAX  TASK_SIZE
index a71da576883c8f4b1a3d60279ebfaefb95798031..eebf3954960644daf75dd5ae0d6e7eca1c50f34a 100644 (file)
                .set    reorder
                .set    noat
                mfc0    a0, CP0_STATUS
-               li      v1, 0xff00
+               li      v1, ST0_CU1 | ST0_IM
                ori     a0, STATMASK
                xori    a0, STATMASK
                mtc0    a0, CP0_STATUS
                ori     a0, STATMASK
                xori    a0, STATMASK
                mtc0    a0, CP0_STATUS
-               li      v1, 0xff00
+               li      v1, ST0_CU1 | ST0_FR | ST0_IM
                and     a0, v1
                LONG_L  v0, PT_STATUS(sp)
                nor     v1, $0, v1
index 6499d93ae68d7096d63416a349d9afcdcc0cb3ae..47bc45a67e9ba187fb709b97e755900e9bcb4caf 100644 (file)
@@ -101,10 +101,8 @@ static inline void syscall_get_arguments(struct task_struct *task,
        /* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */
        if ((config_enabled(CONFIG_32BIT) ||
            test_tsk_thread_flag(task, TIF_32BIT_REGS)) &&
-           (regs->regs[2] == __NR_syscall)) {
+           (regs->regs[2] == __NR_syscall))
                i++;
-               n++;
-       }
 
        while (n--)
                ret |= mips_get_syscall_arg(args++, task, regs, i++);
index 9b44d5a816fa3ee64faf18a80c7f415dfb681a4d..ddea53e3a9bb7bbd56463a0727ff2b3bc378f614 100644 (file)
@@ -116,7 +116,8 @@ enum cop_op {
        dmtc_op       = 0x05, ctc_op        = 0x06,
        mthc0_op      = 0x06, mthc_op       = 0x07,
        bc_op         = 0x08, bc1eqz_op     = 0x09,
-       bc1nez_op     = 0x0d, cop_op        = 0x10,
+       mfmc0_op      = 0x0b, bc1nez_op     = 0x0d,
+       wrpgpr_op     = 0x0e, cop_op        = 0x10,
        copm_op       = 0x18
 };
 
@@ -529,7 +530,7 @@ enum MIPS6e_i8_func {
 };
 
 /*
- * (microMIPS & MIPS16e) NOP instruction.
+ * (microMIPS) NOP instruction.
  */
 #define MM_NOP16       0x0c00
 
@@ -679,7 +680,7 @@ struct fp0_format {         /* FPU multiply and add format (MIPS32) */
        ;))))))
 };
 
-struct mm_fp0_format {         /* FPU multipy and add format (microMIPS) */
+struct mm_fp0_format {         /* FPU multiply and add format (microMIPS) */
        __BITFIELD_FIELD(unsigned int opcode : 6,
        __BITFIELD_FIELD(unsigned int ft : 5,
        __BITFIELD_FIELD(unsigned int fs : 5,
@@ -799,6 +800,13 @@ struct mm_x_format {               /* Scaled indexed load format (microMIPS) */
        ;)))))
 };
 
+struct mm_a_format {           /* ADDIUPC format (microMIPS) */
+       __BITFIELD_FIELD(unsigned int opcode : 6,
+       __BITFIELD_FIELD(unsigned int rs : 3,
+       __BITFIELD_FIELD(signed int simmediate : 23,
+       ;)))
+};
+
 /*
  * microMIPS instruction formats (16-bit length)
  */
@@ -940,6 +948,7 @@ union mips_instruction {
        struct mm_i_format mm_i_format;
        struct mm_m_format mm_m_format;
        struct mm_x_format mm_x_format;
+       struct mm_a_format mm_a_format;
        struct mm_b0_format mm_b0_format;
        struct mm_b1_format mm_b1_format;
        struct mm16_m_format mm16_m_format ;
index 90f03a7da6654da7000108042dcf00944d322406..3129795de940b0c370c3eb6926ade8b11f878f91 100644 (file)
 #define __NR_userfaultfd               (__NR_Linux + 357)
 #define __NR_membarrier                        (__NR_Linux + 358)
 #define __NR_mlock2                    (__NR_Linux + 359)
+#define __NR_copy_file_range           (__NR_Linux + 360)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls            359
+#define __NR_Linux_syscalls            360
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux                 4000
-#define __NR_O32_Linux_syscalls                359
+#define __NR_O32_Linux_syscalls                360
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
 #define __NR_userfaultfd               (__NR_Linux + 317)
 #define __NR_membarrier                        (__NR_Linux + 318)
 #define __NR_mlock2                    (__NR_Linux + 319)
+#define __NR_copy_file_range           (__NR_Linux + 320)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls            319
+#define __NR_Linux_syscalls            320
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux                  5000
-#define __NR_64_Linux_syscalls         319
+#define __NR_64_Linux_syscalls         320
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
 #define __NR_userfaultfd               (__NR_Linux + 321)
 #define __NR_membarrier                        (__NR_Linux + 322)
 #define __NR_mlock2                    (__NR_Linux + 323)
+#define __NR_copy_file_range           (__NR_Linux + 324)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls            323
+#define __NR_Linux_syscalls            324
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux                 6000
-#define __NR_N32_Linux_syscalls                323
+#define __NR_N32_Linux_syscalls                324
 
 #endif /* _UAPI_ASM_UNISTD_H */
index 1188e00bb120a2637c53f3916f28b1f990a9515e..1b992c6e3d8e212420673fbb697edeff0b6314e3 100644 (file)
@@ -35,7 +35,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
        int __res = 1;                                                  \
        struct elfhdr *__h = (hdr);                                     \
                                                                        \
-       if (__h->e_machine != EM_MIPS)                                  \
+       if (!mips_elf_check_machine(__h))                               \
                __res = 0;                                              \
        if (__h->e_ident[EI_CLASS] != ELFCLASS32)                       \
                __res = 0;                                              \
index 928767858b867b65cce4aa92f965f96064a1f429..abd3affe5fb3bd71367b908f9439b54ddea7a61c 100644 (file)
@@ -47,7 +47,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
        int __res = 1;                                                  \
        struct elfhdr *__h = (hdr);                                     \
                                                                        \
-       if (__h->e_machine != EM_MIPS)                                  \
+       if (!mips_elf_check_machine(__h))                               \
                __res = 0;                                              \
        if (__h->e_ident[EI_CLASS] != ELFCLASS32)                       \
                __res = 0;                                              \
index 09f4034f239f511867a4d56792de909d0ad0c773..6392dbe504fb3b4050210f7a5ca993362efa352b 100644 (file)
@@ -190,7 +190,7 @@ static inline void check_daddi(void)
        printk("Checking for the daddi bug... ");
 
        local_irq_save(flags);
-       handler = set_except_vector(12, handle_daddi_ov);
+       handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
        /*
         * The following code fails to trigger an overflow exception
         * when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
@@ -214,7 +214,7 @@ static inline void check_daddi(void)
                ".set   pop"
                : "=r" (v), "=&r" (tmp)
                : "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-       set_except_vector(12, handler);
+       set_except_vector(EXCCODE_OV, handler);
        local_irq_restore(flags);
 
        if (daddi_ov) {
@@ -225,14 +225,14 @@ static inline void check_daddi(void)
        printk("yes, workaround... ");
 
        local_irq_save(flags);
-       handler = set_except_vector(12, handle_daddi_ov);
+       handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
        asm volatile(
                "addiu  %1, $0, %2\n\t"
                "dsrl   %1, %1, 1\n\t"
                "daddi  %0, %1, %3"
                : "=r" (v), "=&r" (tmp)
                : "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-       set_except_vector(12, handler);
+       set_except_vector(EXCCODE_OV, handler);
        local_irq_restore(flags);
 
        if (daddi_ov) {
index 6b9064499bd3dfb49ef0c09ad3da48034dea0ff2..b725b713b9f8b56e3763784c18b0550dffd77b6d 100644 (file)
@@ -98,6 +98,161 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
        c->fpu_msk31 = ~(fcsr0 ^ fcsr1) & ~mask;
 }
 
+/*
+ * Determine the IEEE 754 NaN encodings and ABS.fmt/NEG.fmt execution modes
+ * supported by FPU hardware.
+ */
+static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
+{
+       if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+                           MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+                           MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+               unsigned long sr, fir, fcsr, fcsr0, fcsr1;
+
+               sr = read_c0_status();
+               __enable_fpu(FPU_AS_IS);
+
+               fir = read_32bit_cp1_register(CP1_REVISION);
+               if (fir & MIPS_FPIR_HAS2008) {
+                       fcsr = read_32bit_cp1_register(CP1_STATUS);
+
+                       fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+                       write_32bit_cp1_register(CP1_STATUS, fcsr0);
+                       fcsr0 = read_32bit_cp1_register(CP1_STATUS);
+
+                       fcsr1 = fcsr | FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+                       write_32bit_cp1_register(CP1_STATUS, fcsr1);
+                       fcsr1 = read_32bit_cp1_register(CP1_STATUS);
+
+                       write_32bit_cp1_register(CP1_STATUS, fcsr);
+
+                       if (!(fcsr0 & FPU_CSR_NAN2008))
+                               c->options |= MIPS_CPU_NAN_LEGACY;
+                       if (fcsr1 & FPU_CSR_NAN2008)
+                               c->options |= MIPS_CPU_NAN_2008;
+
+                       if ((fcsr0 ^ fcsr1) & FPU_CSR_ABS2008)
+                               c->fpu_msk31 &= ~FPU_CSR_ABS2008;
+                       else
+                               c->fpu_csr31 |= fcsr & FPU_CSR_ABS2008;
+
+                       if ((fcsr0 ^ fcsr1) & FPU_CSR_NAN2008)
+                               c->fpu_msk31 &= ~FPU_CSR_NAN2008;
+                       else
+                               c->fpu_csr31 |= fcsr & FPU_CSR_NAN2008;
+               } else {
+                       c->options |= MIPS_CPU_NAN_LEGACY;
+               }
+
+               write_c0_status(sr);
+       } else {
+               c->options |= MIPS_CPU_NAN_LEGACY;
+       }
+}
+
+/*
+ * IEEE 754 conformance mode to use.  Affects the NaN encoding and the
+ * ABS.fmt/NEG.fmt execution mode.
+ */
+static enum { STRICT, LEGACY, STD2008, RELAXED } ieee754 = STRICT;
+
+/*
+ * Set the IEEE 754 NaN encodings and the ABS.fmt/NEG.fmt execution modes
+ * to support by the FPU emulator according to the IEEE 754 conformance
+ * mode selected.  Note that "relaxed" straps the emulator so that it
+ * allows 2008-NaN binaries even for legacy processors.
+ */
+static void cpu_set_nofpu_2008(struct cpuinfo_mips *c)
+{
+       c->options &= ~(MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY);
+       c->fpu_csr31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+       c->fpu_msk31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+
+       switch (ieee754) {
+       case STRICT:
+               if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+                                   MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+                                   MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+                       c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+               } else {
+                       c->options |= MIPS_CPU_NAN_LEGACY;
+                       c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               }
+               break;
+       case LEGACY:
+               c->options |= MIPS_CPU_NAN_LEGACY;
+               c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               break;
+       case STD2008:
+               c->options |= MIPS_CPU_NAN_2008;
+               c->fpu_csr31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               break;
+       case RELAXED:
+               c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+               break;
+       }
+}
+
+/*
+ * Override the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * according to the "ieee754=" parameter.
+ */
+static void cpu_set_nan_2008(struct cpuinfo_mips *c)
+{
+       switch (ieee754) {
+       case STRICT:
+               mips_use_nan_legacy = !!cpu_has_nan_legacy;
+               mips_use_nan_2008 = !!cpu_has_nan_2008;
+               break;
+       case LEGACY:
+               mips_use_nan_legacy = !!cpu_has_nan_legacy;
+               mips_use_nan_2008 = !cpu_has_nan_legacy;
+               break;
+       case STD2008:
+               mips_use_nan_legacy = !cpu_has_nan_2008;
+               mips_use_nan_2008 = !!cpu_has_nan_2008;
+               break;
+       case RELAXED:
+               mips_use_nan_legacy = true;
+               mips_use_nan_2008 = true;
+               break;
+       }
+}
+
+/*
+ * IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode override
+ * settings:
+ *
+ * strict:  accept binaries that request a NaN encoding supported by the FPU
+ * legacy:  only accept legacy-NaN binaries
+ * 2008:    only accept 2008-NaN binaries
+ * relaxed: accept any binaries regardless of whether supported by the FPU
+ */
+static int __init ieee754_setup(char *s)
+{
+       if (!s)
+               return -1;
+       else if (!strcmp(s, "strict"))
+               ieee754 = STRICT;
+       else if (!strcmp(s, "legacy"))
+               ieee754 = LEGACY;
+       else if (!strcmp(s, "2008"))
+               ieee754 = STD2008;
+       else if (!strcmp(s, "relaxed"))
+               ieee754 = RELAXED;
+       else
+               return -1;
+
+       if (!(boot_cpu_data.options & MIPS_CPU_FPU))
+               cpu_set_nofpu_2008(&boot_cpu_data);
+       cpu_set_nan_2008(&boot_cpu_data);
+
+       return 0;
+}
+
+early_param("ieee754", ieee754_setup);
+
 /*
  * Set the FIR feature flags for the FPU emulator.
  */
@@ -113,6 +268,8 @@ static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
        if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
                            MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
                value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
+       if (c->options & MIPS_CPU_NAN_2008)
+               value |= MIPS_FPIR_HAS2008;
        c->fpu_id = value;
 }
 
@@ -137,6 +294,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
        }
 
        cpu_set_fpu_fcsr_mask(c);
+       cpu_set_fpu_2008(c);
+       cpu_set_nan_2008(c);
 }
 
 /*
@@ -147,6 +306,8 @@ static void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
        c->options &= ~MIPS_CPU_FPU;
        c->fpu_msk31 = mips_nofpu_msk31;
 
+       cpu_set_nofpu_2008(c);
+       cpu_set_nan_2008(c);
        cpu_set_nofpu_id(c);
 }
 
index 4a4d9e067c89427fc34e990586f0e8237a9418ba..c3c234dc0c07748ad7696f7b4d3c395df3954293 100644 (file)
 #include <linux/elf.h>
 #include <linux/sched.h>
 
+#include <asm/cpu-info.h>
+
+/* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
+bool mips_use_nan_legacy;
+bool mips_use_nan_2008;
+
 /* FPU modes */
 enum {
        FP_FRE,
@@ -68,15 +74,23 @@ static struct mode_req none_req = { true, true, false, true, true };
 int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
                     bool is_interp, struct arch_elf_state *state)
 {
-       struct elf32_hdr *ehdr32 = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *ehdr = _ehdr;
        struct elf32_phdr *phdr32 = _phdr;
        struct elf64_phdr *phdr64 = _phdr;
        struct mips_elf_abiflags_v0 abiflags;
+       bool elf32;
+       u32 flags;
        int ret;
 
+       elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+       flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
        /* Lets see if this is an O32 ELF */
-       if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
-               if (ehdr32->e_flags & EF_MIPS_FP64) {
+       if (elf32) {
+               if (flags & EF_MIPS_FP64) {
                        /*
                         * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
                         * later if needed
@@ -120,13 +134,50 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
        return 0;
 }
 
-int arch_check_elf(void *_ehdr, bool has_interpreter,
+int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
                   struct arch_elf_state *state)
 {
-       struct elf32_hdr *ehdr = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *ehdr = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *iehdr = _interp_ehdr;
        struct mode_req prog_req, interp_req;
        int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
-       bool is_mips64;
+       bool elf32;
+       u32 flags;
+
+       elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+       flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
+       /*
+        * Determine the NaN personality, reject the binary if not allowed.
+        * Also ensure that any interpreter matches the executable.
+        */
+       if (flags & EF_MIPS_NAN2008) {
+               if (mips_use_nan_2008)
+                       state->nan_2008 = 1;
+               else
+                       return -ENOEXEC;
+       } else {
+               if (mips_use_nan_legacy)
+                       state->nan_2008 = 0;
+               else
+                       return -ENOEXEC;
+       }
+       if (has_interpreter) {
+               bool ielf32;
+               u32 iflags;
+
+               ielf32 = iehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+               iflags = ielf32 ? iehdr->e32.e_flags : iehdr->e64.e_flags;
+
+               if ((flags ^ iflags) & EF_MIPS_NAN2008)
+                       return -ELIBBAD;
+       }
 
        if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
                return 0;
@@ -142,21 +193,18 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
                abi0 = abi1 = fp_abi;
        }
 
-       is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
-                   (ehdr->e_flags & EF_MIPS_ABI2);
+       if (elf32 && !(flags & EF_MIPS_ABI2)) {
+               /* Default to a mode capable of running code expecting FR=0 */
+               state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
 
-       if (is_mips64) {
+               /* Allow all ABIs we know about */
+               max_abi = MIPS_ABI_FP_64A;
+       } else {
                /* MIPS64 code always uses FR=1, thus the default is easy */
                state->overall_fp_mode = FP_FR1;
 
                /* Disallow access to the various FPXX & FP64 ABIs */
                max_abi = MIPS_ABI_FP_SOFT;
-       } else {
-               /* Default to a mode capable of running code expecting FR=0 */
-               state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
-
-               /* Allow all ABIs we know about */
-               max_abi = MIPS_ABI_FP_64A;
        }
 
        if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
@@ -254,3 +302,27 @@ void mips_set_personality_fp(struct arch_elf_state *state)
                BUG();
        }
 }
+
+/*
+ * Select the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * in FCSR according to the ELF NaN personality.
+ */
+void mips_set_personality_nan(struct arch_elf_state *state)
+{
+       struct cpuinfo_mips *c = &boot_cpu_data;
+       struct task_struct *t = current;
+
+       t->thread.fpu.fcr31 = c->fpu_csr31;
+       switch (state->nan_2008) {
+       case 0:
+               break;
+       case 1:
+               if (!(c->fpu_msk31 & FPU_CSR_NAN2008))
+                       t->thread.fpu.fcr31 |= FPU_CSR_NAN2008;
+               if (!(c->fpu_msk31 & FPU_CSR_ABS2008))
+                       t->thread.fpu.fcr31 |= FPU_CSR_ABS2008;
+               break;
+       default:
+               BUG();
+       }
+}
index c6854d9df926d5c44e3654bcc7fd1d57136741b8..705be43c35333f648f04a96f26a4104ba172c46c 100644 (file)
@@ -21,7 +21,7 @@ static struct txx9_pio_reg __iomem *txx9_pioptr;
 
 static int txx9_gpio_get(struct gpio_chip *chip, unsigned int offset)
 {
-       return __raw_readl(&txx9_pioptr->din) & (1 << offset);
+       return !!(__raw_readl(&txx9_pioptr->din) & (1 << offset));
 }
 
 static void txx9_gpio_set_raw(unsigned int offset, int value)
index f2975d4d1e449cc948d84dacac736451a3144cf9..eddd5fd6fdfa2ee20f50886b8fbbad9045fd496d 100644 (file)
@@ -65,12 +65,10 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
        status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
        status |= KU_USER;
        regs->cp0_status = status;
+       lose_fpu(0);
+       clear_thread_flag(TIF_MSA_CTX_LIVE);
        clear_used_math();
-       clear_fpu_owner();
        init_dsp();
-       clear_thread_flag(TIF_USEDMSA);
-       clear_thread_flag(TIF_MSA_CTX_LIVE);
-       disable_msa();
        regs->cp0_epc = pc;
        regs->regs[29] = sp;
 }
index 4f0ac78d17f196e7c34de33af8bdcd4f708392e3..a5279b2f31989f8c8dcd915fccb233a4ada33332 100644 (file)
@@ -548,9 +548,6 @@ static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_NAME(c0_badvaddr, cp0_badvaddr),
        REG_OFFSET_NAME(c0_cause, cp0_cause),
        REG_OFFSET_NAME(c0_epc, cp0_epc),
-#ifdef CONFIG_MIPS_MT_SMTC
-       REG_OFFSET_NAME(c0_tcstatus, cp0_tcstatus),
-#endif
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
        REG_OFFSET_NAME(mpl0, mpl[0]),
        REG_OFFSET_NAME(mpl1, mpl[1]),
index 2d23c834ba96cd328a87d016e328176fc9a6876e..a56317444bdad94c73325eb1f72d75ff46af1b82 100644 (file)
@@ -595,3 +595,4 @@ EXPORT(sys_call_table)
        PTR     sys_userfaultfd
        PTR     sys_membarrier
        PTR     sys_mlock2
+       PTR     sys_copy_file_range             /* 4360 */
index deac63315d0ed4123d1e0f09b617e9250d68c885..2b2dc14610d02b58dc704ee63de6520d45ee5156 100644 (file)
@@ -433,4 +433,5 @@ EXPORT(sys_call_table)
        PTR     sys_userfaultfd
        PTR     sys_membarrier
        PTR     sys_mlock2
+       PTR     sys_copy_file_range             /* 5320 */
        .size   sys_call_table,.-sys_call_table
index 5a69eb48d0a8cef87c33c3184f4ddc3236f01d2d..2bf5c8593d91daad1a9ee5ffea26eb59318854d9 100644 (file)
@@ -423,4 +423,5 @@ EXPORT(sysn32_call_table)
        PTR     sys_userfaultfd
        PTR     sys_membarrier
        PTR     sys_mlock2
+       PTR     sys_copy_file_range
        .size   sysn32_call_table,.-sysn32_call_table
index e4b6d7c9782263e7c6dfca1b7eb4e6818ca78a6e..c5b759e584c758a9d56acf44d7677ff74dc7ecf8 100644 (file)
@@ -578,4 +578,5 @@ EXPORT(sys32_call_table)
        PTR     sys_userfaultfd
        PTR     sys_membarrier
        PTR     sys_mlock2
+       PTR     sys_copy_file_range             /* 4360 */
        .size   sys32_call_table,.-sys32_call_table
index 66aac55df3497f79d39f1b203601eccf3a25a48f..5fdaf8bdcd2ebeed8e31eb7825edd958bfc1c251 100644 (file)
@@ -623,7 +623,7 @@ static void __init request_crashkernel(struct resource *res)
 
 #define USE_PROM_CMDLINE       IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
 #define USE_DTB_CMDLINE                IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
-#define EXTEND_WITH_PROM       IS_ENABLED(CONFIG_MIPS_CMDLINE_EXTEND)
+#define EXTEND_WITH_PROM       IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
 
 static void __init arch_mem_init(char **cmdline_p)
 {
@@ -782,6 +782,7 @@ static inline void prefill_possible_map(void) {}
 void __init setup_arch(char **cmdline_p)
 {
        cpu_probe();
+       mips_cm_probe();
        prom_init();
 
        setup_early_fdc_console();
index e04c8057b88238956efe64c519dfa3aac41e1ba9..2ad4e4c96d61cb02f8703d46efde1c596649a5df 100644 (file)
@@ -202,6 +202,9 @@ static void boot_core(unsigned core)
        /* Ensure its coherency is disabled */
        write_gcr_co_coherence(0);
 
+       /* Start it with the legacy memory map and exception base */
+       write_gcr_co_reset_ext_base(CM_GCR_RESET_EXT_BASE_UEB);
+
        /* Ensure the core can access the GCRs */
        access = read_gcr_access();
        access |= 1 << (CM_GCR_ACCESS_ACCESSEN_SHF + core);
index 2242bdd4370eb19851ff5a31540a02c65fa26498..4472a7f985776ad78a6e032a333cacd5bb2c7415 100644 (file)
 #include <asm/barrier.h>
 #include <asm/mipsregs.h>
 
-static atomic_t count_start_flag = ATOMIC_INIT(0);
+static unsigned int initcount = 0;
 static atomic_t count_count_start = ATOMIC_INIT(0);
 static atomic_t count_count_stop = ATOMIC_INIT(0);
-static atomic_t count_reference = ATOMIC_INIT(0);
 
 #define COUNTON 100
-#define NR_LOOPS 5
+#define NR_LOOPS 3
 
 void synchronise_count_master(int cpu)
 {
        int i;
        unsigned long flags;
-       unsigned int initcount;
 
        printk(KERN_INFO "Synchronize counters for CPU %u: ", cpu);
 
        local_irq_save(flags);
 
-       /*
-        * Notify the slaves that it's time to start
-        */
-       atomic_set(&count_reference, read_c0_count());
-       atomic_set(&count_start_flag, cpu);
-       smp_wmb();
-
-       /* Count will be initialised to current timer for all CPU's */
-       initcount = read_c0_count();
-
        /*
         * We loop a few times to get a primed instruction cache,
         * then the last pass is more or less synchronised and
@@ -63,9 +51,13 @@ void synchronise_count_master(int cpu)
                atomic_set(&count_count_stop, 0);
                smp_wmb();
 
-               /* this lets the slaves write their count register */
+               /* Let the slave writes its count register */
                atomic_inc(&count_count_start);
 
+               /* Count will be initialised to current timer */
+               if (i == 1)
+                       initcount = read_c0_count();
+
                /*
                 * Everyone initialises count in the last loop:
                 */
@@ -73,7 +65,7 @@ void synchronise_count_master(int cpu)
                        write_c0_count(initcount);
 
                /*
-                * Wait for all slaves to leave the synchronization point:
+                * Wait for slave to leave the synchronization point:
                 */
                while (atomic_read(&count_count_stop) != 1)
                        mb();
@@ -83,7 +75,6 @@ void synchronise_count_master(int cpu)
        }
        /* Arrange for an interrupt in a short while */
        write_c0_compare(read_c0_count() + COUNTON);
-       atomic_set(&count_start_flag, 0);
 
        local_irq_restore(flags);
 
@@ -98,19 +89,12 @@ void synchronise_count_master(int cpu)
 void synchronise_count_slave(int cpu)
 {
        int i;
-       unsigned int initcount;
 
        /*
         * Not every cpu is online at the time this gets called,
         * so we first wait for the master to say everyone is ready
         */
 
-       while (atomic_read(&count_start_flag) != cpu)
-               mb();
-
-       /* Count will be initialised to next expire for all CPU's */
-       initcount = atomic_read(&count_reference);
-
        for (i = 0; i < NR_LOOPS; i++) {
                atomic_inc(&count_count_start);
                while (atomic_read(&count_count_start) != 2)
index 886cb1976e90f682dafac7a958043d68bc4b6473..ae790c575d4fe2be7907714239e55f3b38e387cf 100644 (file)
@@ -663,7 +663,7 @@ static int simulate_rdhwr_normal(struct pt_regs *regs, unsigned int opcode)
        return -1;
 }
 
-static int simulate_rdhwr_mm(struct pt_regs *regs, unsigned short opcode)
+static int simulate_rdhwr_mm(struct pt_regs *regs, unsigned int opcode)
 {
        if ((opcode & MM_POOL32A_FUNC) == MM_RDHWR) {
                int rd = (opcode & MM_RS) >> 16;
@@ -1119,11 +1119,12 @@ no_r2_instr:
        if (get_isa16_mode(regs->cp0_epc)) {
                unsigned short mmop[2] = { 0 };
 
-               if (unlikely(get_user(mmop[0], epc) < 0))
+               if (unlikely(get_user(mmop[0], (u16 __user *)epc + 0) < 0))
                        status = SIGSEGV;
-               if (unlikely(get_user(mmop[1], epc) < 0))
+               if (unlikely(get_user(mmop[1], (u16 __user *)epc + 1) < 0))
                        status = SIGSEGV;
-               opcode = (mmop[0] << 16) | mmop[1];
+               opcode = mmop[0];
+               opcode = (opcode << 16) | mmop[1];
 
                if (status < 0)
                        status = simulate_rdhwr_mm(regs, opcode);
@@ -1369,26 +1370,12 @@ asmlinkage void do_cpu(struct pt_regs *regs)
                if (unlikely(compute_return_epc(regs) < 0))
                        break;
 
-               if (get_isa16_mode(regs->cp0_epc)) {
-                       unsigned short mmop[2] = { 0 };
-
-                       if (unlikely(get_user(mmop[0], epc) < 0))
-                               status = SIGSEGV;
-                       if (unlikely(get_user(mmop[1], epc) < 0))
-                               status = SIGSEGV;
-                       opcode = (mmop[0] << 16) | mmop[1];
-
-                       if (status < 0)
-                               status = simulate_rdhwr_mm(regs, opcode);
-               } else {
+               if (!get_isa16_mode(regs->cp0_epc)) {
                        if (unlikely(get_user(opcode, epc) < 0))
                                status = SIGSEGV;
 
                        if (!cpu_has_llsc && status < 0)
                                status = simulate_llsc(regs, opcode);
-
-                       if (status < 0)
-                               status = simulate_rdhwr_normal(regs, opcode);
                }
 
                if (status < 0)
@@ -2250,7 +2237,7 @@ void __init trap_init(void)
         * Only some CPUs have the watch exceptions.
         */
        if (cpu_has_watch)
-               set_except_vector(23, handle_watch);
+               set_except_vector(EXCCODE_WATCH, handle_watch);
 
        /*
         * Initialise interrupt handlers
@@ -2277,27 +2264,27 @@ void __init trap_init(void)
        if (board_be_init)
                board_be_init();
 
-       set_except_vector(0, using_rollback_handler() ? rollback_handle_int
-                                                     : handle_int);
-       set_except_vector(1, handle_tlbm);
-       set_except_vector(2, handle_tlbl);
-       set_except_vector(3, handle_tlbs);
+       set_except_vector(EXCCODE_INT, using_rollback_handler() ?
+                                       rollback_handle_int : handle_int);
+       set_except_vector(EXCCODE_MOD, handle_tlbm);
+       set_except_vector(EXCCODE_TLBL, handle_tlbl);
+       set_except_vector(EXCCODE_TLBS, handle_tlbs);
 
-       set_except_vector(4, handle_adel);
-       set_except_vector(5, handle_ades);
+       set_except_vector(EXCCODE_ADEL, handle_adel);
+       set_except_vector(EXCCODE_ADES, handle_ades);
 
-       set_except_vector(6, handle_ibe);
-       set_except_vector(7, handle_dbe);
+       set_except_vector(EXCCODE_IBE, handle_ibe);
+       set_except_vector(EXCCODE_DBE, handle_dbe);
 
-       set_except_vector(8, handle_sys);
-       set_except_vector(9, handle_bp);
-       set_except_vector(10, rdhwr_noopt ? handle_ri :
+       set_except_vector(EXCCODE_SYS, handle_sys);
+       set_except_vector(EXCCODE_BP, handle_bp);
+       set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri :
                          (cpu_has_vtag_icache ?
                           handle_ri_rdhwr_vivt : handle_ri_rdhwr));
-       set_except_vector(11, handle_cpu);
-       set_except_vector(12, handle_ov);
-       set_except_vector(13, handle_tr);
-       set_except_vector(14, handle_msa_fpe);
+       set_except_vector(EXCCODE_CPU, handle_cpu);
+       set_except_vector(EXCCODE_OV, handle_ov);
+       set_except_vector(EXCCODE_TR, handle_tr);
+       set_except_vector(EXCCODE_MSAFPE, handle_msa_fpe);
 
        if (current_cpu_type() == CPU_R6000 ||
            current_cpu_type() == CPU_R6000A) {
@@ -2318,25 +2305,25 @@ void __init trap_init(void)
                board_nmi_handler_setup();
 
        if (cpu_has_fpu && !cpu_has_nofpuex)
-               set_except_vector(15, handle_fpe);
+               set_except_vector(EXCCODE_FPE, handle_fpe);
 
-       set_except_vector(16, handle_ftlb);
+       set_except_vector(MIPS_EXCCODE_TLBPAR, handle_ftlb);
 
        if (cpu_has_rixiex) {
-               set_except_vector(19, tlb_do_page_fault_0);
-               set_except_vector(20, tlb_do_page_fault_0);
+               set_except_vector(EXCCODE_TLBRI, tlb_do_page_fault_0);
+               set_except_vector(EXCCODE_TLBXI, tlb_do_page_fault_0);
        }
 
-       set_except_vector(21, handle_msa);
-       set_except_vector(22, handle_mdmx);
+       set_except_vector(EXCCODE_MSADIS, handle_msa);
+       set_except_vector(EXCCODE_MDMX, handle_mdmx);
 
        if (cpu_has_mcheck)
-               set_except_vector(24, handle_mcheck);
+               set_except_vector(EXCCODE_MCHECK, handle_mcheck);
 
        if (cpu_has_mipsmt)
-               set_except_vector(25, handle_mt);
+               set_except_vector(EXCCODE_THREAD, handle_mt);
 
-       set_except_vector(26, handle_dsp);
+       set_except_vector(EXCCODE_DSPDIS, handle_dsp);
 
        if (board_cache_error_setup)
                board_cache_error_setup();
index 313c2e37b978ba730372baafdc51dd707ed8e86b..d88aa2173fb0b16337e3c0ae06d86c87fb959ad4 100644 (file)
@@ -11,4 +11,4 @@
 #include <linux/kvm_host.h>
 
 struct kvm_mips_callbacks *kvm_mips_callbacks;
-EXPORT_SYMBOL(kvm_mips_callbacks);
+EXPORT_SYMBOL_GPL(kvm_mips_callbacks);
index 521121bdebff94a622b280544fcacddab1434b5d..f1527a465c1b1b071356b18ea066a468b30edabb 100644 (file)
@@ -86,10 +86,8 @@ int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
        } else {
                mfc0_inst = LW_TEMPLATE;
                mfc0_inst |= ((rt & 0x1f) << 16);
-               mfc0_inst |=
-                   offsetof(struct mips_coproc,
-                            reg[rd][sel]) + offsetof(struct kvm_mips_commpage,
-                                                     cop0);
+               mfc0_inst |= offsetof(struct kvm_mips_commpage,
+                                     cop0.reg[rd][sel]);
        }
 
        if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
@@ -123,9 +121,7 @@ int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
        sel = inst & 0x7;
 
        mtc0_inst |= ((rt & 0x1f) << 16);
-       mtc0_inst |=
-           offsetof(struct mips_coproc,
-                    reg[rd][sel]) + offsetof(struct kvm_mips_commpage, cop0);
+       mtc0_inst |= offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]);
 
        if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
                kseg0_opc =
index 1b675c7ce89f89d25f3457659405419505a8158d..b37954cc880d6ce06d9ca408962a4fabf7e7de06 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/random.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/cacheops.h>
 #include <asm/cpu-info.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -29,7 +30,6 @@
 #include <asm/r4kcache.h>
 #define CONFIG_MIPS_MT
 
-#include "opcode.h"
 #include "interrupt.h"
 #include "commpage.h"
 
@@ -1239,21 +1239,20 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
                        er = EMULATE_FAIL;
                        break;
 
-               case mfmcz_op:
+               case mfmc0_op:
 #ifdef KVM_MIPS_DEBUG_COP0_COUNTERS
                        cop0->stat[MIPS_CP0_STATUS][0]++;
 #endif
-                       if (rt != 0) {
+                       if (rt != 0)
                                vcpu->arch.gprs[rt] =
                                    kvm_read_c0_guest_status(cop0);
-                       }
                        /* EI */
                        if (inst & 0x20) {
-                               kvm_debug("[%#lx] mfmcz_op: EI\n",
+                               kvm_debug("[%#lx] mfmc0_op: EI\n",
                                          vcpu->arch.pc);
                                kvm_set_c0_guest_status(cop0, ST0_IE);
                        } else {
-                               kvm_debug("[%#lx] mfmcz_op: DI\n",
+                               kvm_debug("[%#lx] mfmc0_op: DI\n",
                                          vcpu->arch.pc);
                                kvm_clear_c0_guest_status(cop0, ST0_IE);
                        }
@@ -1545,19 +1544,6 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
        return 0;
 }
 
-#define MIPS_CACHE_OP_INDEX_INV         0x0
-#define MIPS_CACHE_OP_INDEX_LD_TAG      0x1
-#define MIPS_CACHE_OP_INDEX_ST_TAG      0x2
-#define MIPS_CACHE_OP_IMP               0x3
-#define MIPS_CACHE_OP_HIT_INV           0x4
-#define MIPS_CACHE_OP_FILL_WB_INV       0x5
-#define MIPS_CACHE_OP_HIT_HB            0x6
-#define MIPS_CACHE_OP_FETCH_LOCK        0x7
-
-#define MIPS_CACHE_ICACHE               0x0
-#define MIPS_CACHE_DCACHE               0x1
-#define MIPS_CACHE_SEC                  0x3
-
 enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
                                             uint32_t cause,
                                             struct kvm_run *run,
@@ -1582,8 +1568,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
        base = (inst >> 21) & 0x1f;
        op_inst = (inst >> 16) & 0x1f;
        offset = (int16_t)inst;
-       cache = (inst >> 16) & 0x3;
-       op = (inst >> 18) & 0x7;
+       cache = op_inst & CacheOp_Cache;
+       op = op_inst & CacheOp_Op;
 
        va = arch->gprs[base] + offset;
 
@@ -1595,14 +1581,14 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
         * invalidate the caches entirely by stepping through all the
         * ways/indexes
         */
-       if (op == MIPS_CACHE_OP_INDEX_INV) {
+       if (op == Index_Writeback_Inv) {
                kvm_debug("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
                          vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base,
                          arch->gprs[base], offset);
 
-               if (cache == MIPS_CACHE_DCACHE)
+               if (cache == Cache_D)
                        r4k_blast_dcache();
-               else if (cache == MIPS_CACHE_ICACHE)
+               else if (cache == Cache_I)
                        r4k_blast_icache();
                else {
                        kvm_err("%s: unsupported CACHE INDEX operation\n",
@@ -1675,9 +1661,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 
 skip_fault:
        /* XXXKYMA: Only a subset of cache ops are supported, used by Linux */
-       if (cache == MIPS_CACHE_DCACHE
-           && (op == MIPS_CACHE_OP_FILL_WB_INV
-               || op == MIPS_CACHE_OP_HIT_INV)) {
+       if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) {
                flush_dcache_line(va);
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -1687,7 +1671,7 @@ skip_fault:
                 */
                kvm_mips_trans_cache_va(inst, opc, vcpu);
 #endif
-       } else if (op == MIPS_CACHE_OP_HIT_INV && cache == MIPS_CACHE_ICACHE) {
+       } else if (op_inst == Hit_Invalidate_I) {
                flush_dcache_line(va);
                flush_icache_line(va);
 
@@ -1781,7 +1765,7 @@ enum emulation_result kvm_mips_emulate_syscall(unsigned long cause,
                kvm_debug("Delivering SYSCALL @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_SYSCALL << CAUSEB_EXCCODE));
+                                         (EXCCODE_SYS << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -1828,7 +1812,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBL << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1874,7 +1858,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBL << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1918,7 +1902,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBS << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1962,7 +1946,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBS << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2033,7 +2017,8 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause,
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
        }
 
-       kvm_change_c0_guest_cause(cop0, (0xff), (T_TLB_MOD << CAUSEB_EXCCODE));
+       kvm_change_c0_guest_cause(cop0, (0xff),
+                                 (EXCCODE_MOD << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2068,7 +2053,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause,
        arch->pc = KVM_GUEST_KSEG0 + 0x180;
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_COP_UNUSABLE << CAUSEB_EXCCODE));
+                                 (EXCCODE_CPU << CAUSEB_EXCCODE));
        kvm_change_c0_guest_cause(cop0, (CAUSEF_CE), (0x1 << CAUSEB_CE));
 
        return EMULATE_DONE;
@@ -2096,7 +2081,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause,
                kvm_debug("Delivering RI @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_RES_INST << CAUSEB_EXCCODE));
+                                         (EXCCODE_RI << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2131,7 +2116,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
                kvm_debug("Delivering BP @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_BREAK << CAUSEB_EXCCODE));
+                                         (EXCCODE_BP << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2166,7 +2151,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
                kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_TRAP << CAUSEB_EXCCODE));
+                                         (EXCCODE_TR << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2201,7 +2186,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
                kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_MSAFPE << CAUSEB_EXCCODE));
+                                         (EXCCODE_MSAFPE << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2236,7 +2221,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
                kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_FPE << CAUSEB_EXCCODE));
+                                         (EXCCODE_FPE << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2271,7 +2256,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
                kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_MSADIS << CAUSEB_EXCCODE));
+                                         (EXCCODE_MSADIS << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2480,25 +2465,25 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
 
        if (usermode) {
                switch (exccode) {
-               case T_INT:
-               case T_SYSCALL:
-               case T_BREAK:
-               case T_RES_INST:
-               case T_TRAP:
-               case T_MSAFPE:
-               case T_FPE:
-               case T_MSADIS:
+               case EXCCODE_INT:
+               case EXCCODE_SYS:
+               case EXCCODE_BP:
+               case EXCCODE_RI:
+               case EXCCODE_TR:
+               case EXCCODE_MSAFPE:
+               case EXCCODE_FPE:
+               case EXCCODE_MSADIS:
                        break;
 
-               case T_COP_UNUSABLE:
+               case EXCCODE_CPU:
                        if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 0)
                                er = EMULATE_PRIV_FAIL;
                        break;
 
-               case T_TLB_MOD:
+               case EXCCODE_MOD:
                        break;
 
-               case T_TLB_LD_MISS:
+               case EXCCODE_TLBL:
                        /*
                         * We we are accessing Guest kernel space, then send an
                         * address error exception to the guest
@@ -2507,12 +2492,12 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                                kvm_debug("%s: LD MISS @ %#lx\n", __func__,
                                          badvaddr);
                                cause &= ~0xff;
-                               cause |= (T_ADDR_ERR_LD << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_ADEL << CAUSEB_EXCCODE);
                                er = EMULATE_PRIV_FAIL;
                        }
                        break;
 
-               case T_TLB_ST_MISS:
+               case EXCCODE_TLBS:
                        /*
                         * We we are accessing Guest kernel space, then send an
                         * address error exception to the guest
@@ -2521,26 +2506,26 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                                kvm_debug("%s: ST MISS @ %#lx\n", __func__,
                                          badvaddr);
                                cause &= ~0xff;
-                               cause |= (T_ADDR_ERR_ST << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_ADES << CAUSEB_EXCCODE);
                                er = EMULATE_PRIV_FAIL;
                        }
                        break;
 
-               case T_ADDR_ERR_ST:
+               case EXCCODE_ADES:
                        kvm_debug("%s: address error ST @ %#lx\n", __func__,
                                  badvaddr);
                        if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
                                cause &= ~0xff;
-                               cause |= (T_TLB_ST_MISS << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_TLBS << CAUSEB_EXCCODE);
                        }
                        er = EMULATE_PRIV_FAIL;
                        break;
-               case T_ADDR_ERR_LD:
+               case EXCCODE_ADEL:
                        kvm_debug("%s: address error LD @ %#lx\n", __func__,
                                  badvaddr);
                        if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
                                cause &= ~0xff;
-                               cause |= (T_TLB_LD_MISS << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_TLBL << CAUSEB_EXCCODE);
                        }
                        er = EMULATE_PRIV_FAIL;
                        break;
@@ -2583,13 +2568,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
         * an entry into the guest TLB.
         */
        index = kvm_mips_guest_tlb_lookup(vcpu,
-                                         (va & VPN2_MASK) |
-                                         (kvm_read_c0_guest_entryhi
-                                          (vcpu->arch.cop0) & ASID_MASK));
+                     (va & VPN2_MASK) |
+                     (kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & ASID_MASK));
        if (index < 0) {
-               if (exccode == T_TLB_LD_MISS) {
+               if (exccode == EXCCODE_TLBL) {
                        er = kvm_mips_emulate_tlbmiss_ld(cause, opc, run, vcpu);
-               } else if (exccode == T_TLB_ST_MISS) {
+               } else if (exccode == EXCCODE_TLBS) {
                        er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu);
                } else {
                        kvm_err("%s: invalid exc code: %d\n", __func__,
@@ -2604,10 +2588,10 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
                 * exception to the guest
                 */
                if (!TLB_IS_VALID(*tlb, va)) {
-                       if (exccode == T_TLB_LD_MISS) {
+                       if (exccode == EXCCODE_TLBL) {
                                er = kvm_mips_emulate_tlbinv_ld(cause, opc, run,
                                                                vcpu);
-                       } else if (exccode == T_TLB_ST_MISS) {
+                       } else if (exccode == EXCCODE_TLBS) {
                                er = kvm_mips_emulate_tlbinv_st(cause, opc, run,
                                                                vcpu);
                        } else {
index 9b4445940c2bcc941b217aa3b983a4369f62f382..95f790663b0c2af09a3e0dbf2fe836b58ecfd078 100644 (file)
@@ -128,7 +128,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -137,7 +137,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -146,7 +146,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -155,7 +155,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
index 7e2210846b8b9d1519f679e6a0950fcaf1e6dfd5..81687ab1b523eaebce4f2edbaeffc3b515a1b9e9 100644 (file)
@@ -335,7 +335,7 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
 
        /* Now restore the host state just enough to run the handlers */
 
-       /* Swtich EBASE to the one used by Linux */
+       /* Switch EBASE to the one used by Linux */
        /* load up the host EBASE */
        mfc0    v0, CP0_STATUS
 
@@ -490,11 +490,11 @@ __kvm_mips_return_to_guest:
        REG_ADDU t3, t1, t2
        LONG_L  k0, (t3)
        andi    k0, k0, 0xff
-       mtc0    k0,CP0_ENTRYHI
+       mtc0    k0, CP0_ENTRYHI
        ehb
 
        /* Disable RDHWR access */
-       mtc0    zero,  CP0_HWRENA
+       mtc0    zero, CP0_HWRENA
 
        /* load the guest context from VCPU and return */
        LONG_L  $0, VCPU_R0(k1)
@@ -606,11 +606,11 @@ __kvm_mips_return_to_host:
 
        /* Restore RDHWR access */
        PTR_LI  k0, 0x2000000F
-       mtc0    k0,  CP0_HWRENA
+       mtc0    k0, CP0_HWRENA
 
        /* Restore RA, which is the address we will return to */
-       LONG_L  ra, PT_R31(k1)
-       j       ra
+       LONG_L  ra, PT_R31(k1)
+       j       ra
         nop
 
 VECTOR_END(MIPSX(GuestExceptionEnd))
index b9b803facdbf7594dc700ca828a2ced604e617aa..8bc3977576e6af5f7b5c1a17f306da0f21b26937 100644 (file)
@@ -229,7 +229,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                            kzalloc(npages * sizeof(unsigned long), GFP_KERNEL);
 
                        if (!kvm->arch.guest_pmap) {
-                               kvm_err("Failed to allocate guest PMAP");
+                               kvm_err("Failed to allocate guest PMAP\n");
                                return;
                        }
 
@@ -1264,8 +1264,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 
        switch (exccode) {
-       case T_INT:
-               kvm_debug("[%d]T_INT @ %p\n", vcpu->vcpu_id, opc);
+       case EXCCODE_INT:
+               kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc);
 
                ++vcpu->stat.int_exits;
                trace_kvm_exit(vcpu, INT_EXITS);
@@ -1276,8 +1276,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = RESUME_GUEST;
                break;
 
-       case T_COP_UNUSABLE:
-               kvm_debug("T_COP_UNUSABLE: @ PC: %p\n", opc);
+       case EXCCODE_CPU:
+               kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc);
 
                ++vcpu->stat.cop_unusable_exits;
                trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS);
@@ -1287,13 +1287,13 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ret = RESUME_HOST;
                break;
 
-       case T_TLB_MOD:
+       case EXCCODE_MOD:
                ++vcpu->stat.tlbmod_exits;
                trace_kvm_exit(vcpu, TLBMOD_EXITS);
                ret = kvm_mips_callbacks->handle_tlb_mod(vcpu);
                break;
 
-       case T_TLB_ST_MISS:
+       case EXCCODE_TLBS:
                kvm_debug("TLB ST fault:  cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
                          cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
                          badvaddr);
@@ -1303,7 +1303,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu);
                break;
 
-       case T_TLB_LD_MISS:
+       case EXCCODE_TLBL:
                kvm_debug("TLB LD fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
                          cause, opc, badvaddr);
 
@@ -1312,55 +1312,55 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu);
                break;
 
-       case T_ADDR_ERR_ST:
+       case EXCCODE_ADES:
                ++vcpu->stat.addrerr_st_exits;
                trace_kvm_exit(vcpu, ADDRERR_ST_EXITS);
                ret = kvm_mips_callbacks->handle_addr_err_st(vcpu);
                break;
 
-       case T_ADDR_ERR_LD:
+       case EXCCODE_ADEL:
                ++vcpu->stat.addrerr_ld_exits;
                trace_kvm_exit(vcpu, ADDRERR_LD_EXITS);
                ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu);
                break;
 
-       case T_SYSCALL:
+       case EXCCODE_SYS:
                ++vcpu->stat.syscall_exits;
                trace_kvm_exit(vcpu, SYSCALL_EXITS);
                ret = kvm_mips_callbacks->handle_syscall(vcpu);
                break;
 
-       case T_RES_INST:
+       case EXCCODE_RI:
                ++vcpu->stat.resvd_inst_exits;
                trace_kvm_exit(vcpu, RESVD_INST_EXITS);
                ret = kvm_mips_callbacks->handle_res_inst(vcpu);
                break;
 
-       case T_BREAK:
+       case EXCCODE_BP:
                ++vcpu->stat.break_inst_exits;
                trace_kvm_exit(vcpu, BREAK_INST_EXITS);
                ret = kvm_mips_callbacks->handle_break(vcpu);
                break;
 
-       case T_TRAP:
+       case EXCCODE_TR:
                ++vcpu->stat.trap_inst_exits;
                trace_kvm_exit(vcpu, TRAP_INST_EXITS);
                ret = kvm_mips_callbacks->handle_trap(vcpu);
                break;
 
-       case T_MSAFPE:
+       case EXCCODE_MSAFPE:
                ++vcpu->stat.msa_fpe_exits;
                trace_kvm_exit(vcpu, MSA_FPE_EXITS);
                ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
                break;
 
-       case T_FPE:
+       case EXCCODE_FPE:
                ++vcpu->stat.fpe_exits;
                trace_kvm_exit(vcpu, FPE_EXITS);
                ret = kvm_mips_callbacks->handle_fpe(vcpu);
                break;
 
-       case T_MSADIS:
+       case EXCCODE_MSADIS:
                ++vcpu->stat.msa_disabled_exits;
                trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
                ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
@@ -1620,7 +1620,7 @@ static struct notifier_block kvm_mips_csr_die_notifier = {
        .notifier_call = kvm_mips_csr_die_notify,
 };
 
-int __init kvm_mips_init(void)
+static int __init kvm_mips_init(void)
 {
        int ret;
 
@@ -1646,7 +1646,7 @@ int __init kvm_mips_init(void)
        return 0;
 }
 
-void __exit kvm_mips_exit(void)
+static void __exit kvm_mips_exit(void)
 {
        kvm_exit();
 
diff --git a/arch/mips/kvm/opcode.h b/arch/mips/kvm/opcode.h
deleted file mode 100644 (file)
index 03a6ae8..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
- * Authors: Sanjay Lal <sanjayl@kymasys.com>
- */
-
-/* Define opcode values not defined in <asm/isnt.h> */
-
-#ifndef __KVM_MIPS_OPCODE_H__
-#define __KVM_MIPS_OPCODE_H__
-
-/* COP0 Ops */
-#define mfmcz_op       0x0b    /* 01011 */
-#define wrpgpr_op      0x0e    /* 01110 */
-
-/* COP0 opcodes (only if COP0 and CO=1): */
-#define wait_op                0x20    /* 100000 */
-
-#endif /* __KVM_MIPS_OPCODE_H__ */
index 570479c03bdc35009f48985e70d17f00686b5012..a08c439462472e3a2e441c1238d62cdee2988ed7 100644 (file)
 #define PRIx64 "llx"
 
 atomic_t kvm_mips_instance;
-EXPORT_SYMBOL(kvm_mips_instance);
+EXPORT_SYMBOL_GPL(kvm_mips_instance);
 
 /* These function pointers are initialized once the KVM module is loaded */
 kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
-EXPORT_SYMBOL(kvm_mips_gfn_to_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_gfn_to_pfn);
 
 void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_release_pfn_clean);
+EXPORT_SYMBOL_GPL(kvm_mips_release_pfn_clean);
 
 bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_is_error_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_is_error_pfn);
 
 uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
 {
@@ -111,7 +111,7 @@ void kvm_mips_dump_host_tlbs(void)
        mtc0_tlbw_hazard();
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_mips_dump_host_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_host_tlbs);
 
 void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
 {
@@ -139,7 +139,7 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
                         (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask);
        }
 }
-EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs);
 
 static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
 {
@@ -191,7 +191,7 @@ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
 
        return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset;
 }
-EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa);
+EXPORT_SYMBOL_GPL(kvm_mips_translate_guest_kseg0_to_hpa);
 
 /* XXXKYMA: Must be called with interrupts disabled */
 /* set flush_dcache_mask == 0 if no dcache flush required */
@@ -308,7 +308,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
        return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
                                       flush_dcache_mask);
 }
-EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
 
 int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
        struct kvm_vcpu *vcpu)
@@ -351,7 +351,7 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
 
        return 0;
 }
-EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault);
 
 int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
                                         struct kvm_mips_tlb *tlb,
@@ -401,7 +401,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
        return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
                                       tlb->tlb_mask);
 }
-EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
 
 int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 {
@@ -422,7 +422,7 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 
        return index;
 }
-EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup);
 
 int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 {
@@ -458,7 +458,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 
        return idx;
 }
-EXPORT_SYMBOL(kvm_mips_host_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup);
 
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 {
@@ -505,44 +505,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 
        return 0;
 }
-EXPORT_SYMBOL(kvm_mips_host_tlb_inv);
-
-/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID */
-int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index)
-{
-       unsigned long flags, old_entryhi;
-
-       if (index >= current_cpu_data.tlbsize)
-               BUG();
-
-       local_irq_save(flags);
-
-       old_entryhi = read_c0_entryhi();
-
-       write_c0_entryhi(UNIQUE_ENTRYHI(index));
-       mtc0_tlbw_hazard();
-
-       write_c0_index(index);
-       mtc0_tlbw_hazard();
-
-       write_c0_entrylo0(0);
-       mtc0_tlbw_hazard();
-
-       write_c0_entrylo1(0);
-       mtc0_tlbw_hazard();
-
-       tlb_write_indexed();
-       mtc0_tlbw_hazard();
-       tlbw_use_hazard();
-
-       write_c0_entryhi(old_entryhi);
-       mtc0_tlbw_hazard();
-       tlbw_use_hazard();
-
-       local_irq_restore(flags);
-
-       return 0;
-}
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
 
 void kvm_mips_flush_host_tlb(int skip_kseg0)
 {
@@ -594,7 +557,7 @@ void kvm_mips_flush_host_tlb(int skip_kseg0)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_mips_flush_host_tlb);
+EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb);
 
 void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
                             struct kvm_vcpu *vcpu)
@@ -642,7 +605,7 @@ void kvm_local_flush_tlb_all(void)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_local_flush_tlb_all);
+EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all);
 
 /**
  * kvm_mips_migrate_count() - Migrate timer.
@@ -673,8 +636,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        local_irq_save(flags);
 
-       if (((vcpu->arch.
-             guest_kernel_asid[cpu] ^ asid_cache(cpu)) & ASID_VERSION_MASK)) {
+       if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
+                                                       ASID_VERSION_MASK) {
                kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
                vcpu->arch.guest_kernel_asid[cpu] =
                    vcpu->arch.guest_kernel_mm.context.asid[cpu];
@@ -739,7 +702,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        local_irq_restore(flags);
 
 }
-EXPORT_SYMBOL(kvm_arch_vcpu_load);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_load);
 
 /* ASID can change if another task is scheduled during preemption */
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -768,7 +731,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_arch_vcpu_put);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_put);
 
 uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
 {
@@ -813,4 +776,4 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
 
        return inst;
 }
-EXPORT_SYMBOL(kvm_get_inst);
+EXPORT_SYMBOL_GPL(kvm_get_inst);
index d836ed5b0bc7ea38e36350304a6238a520e0d74d..ad988000563f264d443b92bd0785e40ba78fe95d 100644 (file)
@@ -16,7 +16,6 @@
 
 #include <linux/kvm_host.h>
 
-#include "opcode.h"
 #include "interrupt.h"
 
 static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
index 272af8ac2425290c892849186b170f4b95c259b6..5530070e0d05dd51248afb9a8780eb3a86b173a7 100644 (file)
@@ -57,7 +57,6 @@ notrace void arch_local_irq_disable(void)
 }
 EXPORT_SYMBOL(arch_local_irq_disable);
 
-
 notrace unsigned long arch_local_irq_save(void)
 {
        unsigned long flags;
@@ -111,31 +110,4 @@ notrace void arch_local_irq_restore(unsigned long flags)
 }
 EXPORT_SYMBOL(arch_local_irq_restore);
 
-
-notrace void __arch_local_irq_restore(unsigned long flags)
-{
-       unsigned long __tmp1;
-
-       preempt_disable();
-
-       __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-       "       mfc0    $1, $12                                         \n"
-       "       andi    %[flags], 1                                     \n"
-       "       ori     $1, 0x1f                                        \n"
-       "       xori    $1, 0x1f                                        \n"
-       "       or      %[flags], $1                                    \n"
-       "       mtc0    %[flags], $12                                   \n"
-       "       " __stringify(__irq_disable_hazard) "                   \n"
-       "       .set    pop                                             \n"
-       : [flags] "=r" (__tmp1)
-       : "0" (flags)
-       : "memory");
-
-       preempt_enable();
-}
-EXPORT_SYMBOL(__arch_local_irq_restore);
-
-#endif /* !CONFIG_CPU_MIPSR2 */
+#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
index 2e48e83d5524ac13a25a98b0c1d9bd60ae2c7ffa..85d808924c94b52b25f24654def7529b76039266 100644 (file)
@@ -22,6 +22,27 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
   endif
 endif
 
+cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+#
+# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
+# as MIPS64 R2; older versions as just R1.  This leaves the possibility open
+# that GCC might generate R2 code for -march=loongson3a which then is rejected
+# by GAS.  The cc-option can't probe for this behaviour so -march=loongson3a
+# can't easily be used safely within the kbuild framework.
+#
+ifeq ($(call cc-ifversion, -ge, 0409, y), y)
+  ifeq ($(call ld-ifversion, -ge, 22500000, y), y)
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+  else
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+  endif
+else
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+endif
+
 #
 # Loongson Machines' Support
 #
index bf9f1a77f0e59825fcdb6ced3ce73ca78d8c7c19..a2631a52ca998c0029b27cec6fa85bac9d0782d0 100644 (file)
@@ -13,6 +13,9 @@
 #define SMBUS_PCI_REG64                0x64
 #define SMBUS_PCI_REGB4                0xb4
 
+#define HPET_MIN_CYCLES                64
+#define HPET_MIN_PROG_DELTA    (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
+
 static DEFINE_SPINLOCK(hpet_lock);
 DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
 
@@ -161,8 +164,9 @@ static int hpet_next_event(unsigned long delta,
        cnt += delta;
        hpet_write(HPET_T0_CMP, cnt);
 
-       res = ((int)(hpet_read(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0;
-       return res;
+       res = (int)(cnt - hpet_read(HPET_COUNTER));
+
+       return res < HPET_MIN_CYCLES ? -ETIME : 0;
 }
 
 static irqreturn_t hpet_irq_handler(int irq, void *data)
@@ -237,7 +241,7 @@ void __init setup_hpet_timer(void)
        cd->cpumask = cpumask_of(cpu);
        clockevent_set_clock(cd, HPET_FREQ);
        cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
-       cd->min_delta_ns = 5000;
+       cd->min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, cd);
 
        clockevents_register_device(cd);
        setup_irq(HPET_T0_IRQ, &hpet_irq);
index 1a4738a8f2d3906ccffb58bdf8d9b35ee4b04ef3..509832a9836c9ae70f52aa422ab70f85d7f8971c 100644 (file)
 #include "smp.h"
 
 DEFINE_PER_CPU(int, cpu_state);
-DEFINE_PER_CPU(uint32_t, core0_c0count);
 
 static void *ipi_set0_regs[16];
 static void *ipi_clear0_regs[16];
 static void *ipi_status0_regs[16];
 static void *ipi_en0_regs[16];
 static void *ipi_mailbox_buf[16];
+static uint32_t core0_c0count[NR_CPUS];
 
 /* read a 32bit value from ipi register */
 #define loongson3_ipi_read32(addr) readl(addr)
@@ -275,12 +275,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
        if (action & SMP_ASK_C0COUNT) {
                BUG_ON(cpu != 0);
                c0count = read_c0_count();
-               for (i = 1; i < num_possible_cpus(); i++)
-                       per_cpu(core0_c0count, i) = c0count;
+               c0count = c0count ? c0count : 1;
+               for (i = 1; i < nr_cpu_ids; i++)
+                       core0_c0count[i] = c0count;
+               __wbflush(); /* Let others see the result ASAP */
        }
 }
 
-#define MAX_LOOPS 1111
+#define MAX_LOOPS 800
 /*
  * SMP init and finish on secondary CPUs
  */
@@ -305,16 +307,20 @@ static void loongson3_init_secondary(void)
                cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
 
        i = 0;
-       __this_cpu_write(core0_c0count, 0);
+       core0_c0count[cpu] = 0;
        loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
-       while (!__this_cpu_read(core0_c0count)) {
+       while (!core0_c0count[cpu]) {
                i++;
                cpu_relax();
        }
 
        if (i > MAX_LOOPS)
                i = MAX_LOOPS;
-       initcount = __this_cpu_read(core0_c0count) + i;
+       if (cpu_data[cpu].package)
+               initcount = core0_c0count[cpu] + i;
+       else /* Local access is faster for loops */
+               initcount = core0_c0count[cpu] + i/2;
+
        write_c0_count(initcount);
 }
 
index 32f0e19a0d7f71fef2bb693e0da7b7bbc6bc9af9..cdfd44ffa51c88133b7b59417adb3fc070dfeee7 100644 (file)
@@ -1266,6 +1266,8 @@ branch_common:
                                                 */
                                                sig = mips_dsemul(xcp, ir,
                                                                  contpc);
+                                               if (sig < 0)
+                                                       break;
                                                if (sig)
                                                        xcp->cp0_epc = bcpc;
                                                /*
@@ -1319,6 +1321,8 @@ branch_common:
                                 * instruction in the dslot
                                 */
                                sig = mips_dsemul(xcp, ir, contpc);
+                               if (sig < 0)
+                                       break;
                                if (sig)
                                        xcp->cp0_epc = bcpc;
                                /* SIGILL forces out of the emulation loop.  */
index 926d56bf37f24ca1dbfcfcdbf71cc8f3165dfa08..eb96485ed939ccc09c378e184c3a2a83fec80717 100644 (file)
 
 union ieee754dp ieee754dp_neg(union ieee754dp x)
 {
-       unsigned int oldrm;
        union ieee754dp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       y = ieee754dp_sub(ieee754dp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               DPSIGN(y) = !DPSIGN(x);
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               y = ieee754dp_sub(ieee754dp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
 
 union ieee754dp ieee754dp_abs(union ieee754dp x)
 {
-       unsigned int oldrm;
        union ieee754dp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       if (DPSIGN(x))
-               y = ieee754dp_sub(ieee754dp_zero(0), x);
-       else
-               y = ieee754dp_add(ieee754dp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               DPSIGN(y) = 0;
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               if (DPSIGN(x))
+                       y = ieee754dp_sub(ieee754dp_zero(0), x);
+               else
+                       y = ieee754dp_add(ieee754dp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
index 6ffc336c530e3e11c48c1dcb8ab9eb50224b7ff4..f3985617ce31526c7de1d1fb19a0af3a207bd925 100644 (file)
@@ -38,10 +38,13 @@ int ieee754dp_tint(union ieee754dp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754si_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754si_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -53,7 +56,7 @@ int ieee754dp_tint(union ieee754dp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754si_indef();
+               return ieee754si_overflow(xs);
        }
        /* oh gawd */
        if (xe > DP_FBITS) {
@@ -93,7 +96,7 @@ int ieee754dp_tint(union ieee754dp x)
                if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754si_indef();
+                       return ieee754si_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 9cdc145b75e012d75a22f146c1e3c3b5c65a4e7f..748fa10ed4cf63d3e0995a3d6ae6860b6c432d7b 100644 (file)
@@ -38,10 +38,13 @@ s64 ieee754dp_tlong(union ieee754dp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754di_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754di_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -56,7 +59,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754di_indef();
+               return ieee754di_overflow(xs);
        }
        /* oh gawd */
        if (xe > DP_FBITS) {
@@ -97,7 +100,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
                if ((xm >> 63) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754di_indef();
+                       return ieee754di_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index cbb36c14b155ad07dae2250550bc4090622180cd..46b964d2b79c0a0f24aa7cf6e77e1846a88e842f 100644 (file)
@@ -31,17 +31,41 @@ struct emuframe {
        unsigned long           epc;
 };
 
+/*
+ * Set up an emulation frame for instruction IR, from a delay slot of
+ * a branch jumping to CPC.  Return 0 if successful, -1 if no emulation
+ * required, otherwise a signal number causing a frame setup failure.
+ */
 int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 {
+       int isa16 = get_isa16_mode(regs->cp0_epc);
+       mips_instruction break_math;
        struct emuframe __user *fr;
        int err;
 
-       if ((get_isa16_mode(regs->cp0_epc) && ((ir >> 16) == MM_NOP16)) ||
-               (ir == 0)) {
-               /* NOP is easy */
-               regs->cp0_epc = cpc;
-               clear_delay_slot(regs);
-               return 0;
+       /* NOP is easy */
+       if (ir == 0)
+               return -1;
+
+       /* microMIPS instructions */
+       if (isa16) {
+               union mips_instruction insn = { .word = ir };
+
+               /* NOP16 aka MOVE16 $0, $0 */
+               if ((ir >> 16) == MM_NOP16)
+                       return -1;
+
+               /* ADDIUPC */
+               if (insn.mm_a_format.opcode == mm_addiupc_op) {
+                       unsigned int rs;
+                       s32 v;
+
+                       rs = (((insn.mm_a_format.rs + 0x1e) & 0xf) + 2);
+                       v = regs->cp0_epc & ~3;
+                       v += insn.mm_a_format.simmediate << 2;
+                       regs->regs[rs] = (long)v;
+                       return -1;
+               }
        }
 
        pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
@@ -55,14 +79,10 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
         * Algorithmics used a system call instruction, and
         * borrowed that vector.  MIPS/Linux version is a bit
         * more heavyweight in the interests of portability and
-        * multiprocessor support.  For Linux we generate a
-        * an unaligned access and force an address error exception.
-        *
-        * For embedded systems (stand-alone) we prefer to use a
-        * non-existing CP1 instruction. This prevents us from emulating
-        * branches, but gives us a cleaner interface to the exception
-        * handler (single entry point).
+        * multiprocessor support.  For Linux we use a BREAK 514
+        * instruction causing a breakpoint exception.
         */
+       break_math = BREAK_MATH(isa16);
 
        /* Ensure that the two instructions are in the same cache line */
        fr = (struct emuframe __user *)
@@ -72,14 +92,18 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
        if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe))))
                return SIGBUS;
 
-       if (get_isa16_mode(regs->cp0_epc)) {
-               err = __put_user(ir >> 16, (u16 __user *)(&fr->emul));
-               err |= __put_user(ir & 0xffff, (u16 __user *)((long)(&fr->emul) + 2));
-               err |= __put_user(BREAK_MATH >> 16, (u16 __user *)(&fr->badinst));
-               err |= __put_user(BREAK_MATH & 0xffff, (u16 __user *)((long)(&fr->badinst) + 2));
+       if (isa16) {
+               err = __put_user(ir >> 16,
+                                (u16 __user *)(&fr->emul));
+               err |= __put_user(ir & 0xffff,
+                                 (u16 __user *)((long)(&fr->emul) + 2));
+               err |= __put_user(break_math >> 16,
+                                 (u16 __user *)(&fr->badinst));
+               err |= __put_user(break_math & 0xffff,
+                                 (u16 __user *)((long)(&fr->badinst) + 2));
        } else {
                err = __put_user(ir, &fr->emul);
-               err |= __put_user((mips_instruction)BREAK_MATH, &fr->badinst);
+               err |= __put_user(break_math, &fr->badinst);
        }
 
        err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie);
@@ -90,8 +114,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
                return SIGBUS;
        }
 
-       regs->cp0_epc = ((unsigned long) &fr->emul) |
-               get_isa16_mode(regs->cp0_epc);
+       regs->cp0_epc = (unsigned long)&fr->emul | isa16;
 
        flush_cache_sigtramp((unsigned long)&fr->emul);
 
@@ -100,6 +123,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 
 int do_dsemulret(struct pt_regs *xcp)
 {
+       int isa16 = get_isa16_mode(xcp->cp0_epc);
        struct emuframe __user *fr;
        unsigned long epc;
        u32 insn, cookie;
@@ -122,16 +146,19 @@ int do_dsemulret(struct pt_regs *xcp)
         *  - Is the instruction pointed to by the EPC an BREAK_MATH?
         *  - Is the following memory word the BD_COOKIE?
         */
-       if (get_isa16_mode(xcp->cp0_epc)) {
-               err = __get_user(instr[0], (u16 __user *)(&fr->badinst));
-               err |= __get_user(instr[1], (u16 __user *)((long)(&fr->badinst) + 2));
+       if (isa16) {
+               err = __get_user(instr[0],
+                                (u16 __user *)(&fr->badinst));
+               err |= __get_user(instr[1],
+                                 (u16 __user *)((long)(&fr->badinst) + 2));
                insn = (instr[0] << 16) | instr[1];
        } else {
                err = __get_user(insn, &fr->badinst);
        }
        err |= __get_user(cookie, &fr->cookie);
 
-       if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) {
+       if (unlikely(err ||
+                    insn != BREAK_MATH(isa16) || cookie != BD_COOKIE)) {
                MIPS_FPU_EMU_INC_STATS(errors);
                return 0;
        }
index 8e97acbbe22ceb96789d36c44b98500f9885154a..e16ae7b75dbb775c9af078994e785c257a454f39 100644 (file)
@@ -59,7 +59,8 @@ const union ieee754dp __ieee754dp_spcvals[] = {
        DPCNST(1, 3,           0x4000000000000ULL),     /* - 10.0   */
        DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL),     /* + infinity */
        DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL),     /* - infinity */
-       DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL),     /* + indef quiet Nan */
+       DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL),     /* + ind legacy qNaN */
+       DPCNST(0, DP_EMAX + 1, 0x8000000000000ULL),     /* + indef 2008 qNaN */
        DPCNST(0, DP_EMAX,     0xFFFFFFFFFFFFFULL),     /* + max */
        DPCNST(1, DP_EMAX,     0xFFFFFFFFFFFFFULL),     /* - max */
        DPCNST(0, DP_EMIN,     0x0000000000000ULL),     /* + min normal */
@@ -82,7 +83,8 @@ const union ieee754sp __ieee754sp_spcvals[] = {
        SPCNST(1, 3,           0x200000),       /* - 10.0   */
        SPCNST(0, SP_EMAX + 1, 0x000000),       /* + infinity */
        SPCNST(1, SP_EMAX + 1, 0x000000),       /* - infinity */
-       SPCNST(0, SP_EMAX + 1, 0x3FFFFF),       /* + indef quiet Nan  */
+       SPCNST(0, SP_EMAX + 1, 0x3FFFFF),       /* + indef legacy quiet NaN */
+       SPCNST(0, SP_EMAX + 1, 0x400000),       /* + indef 2008 quiet NaN */
        SPCNST(0, SP_EMAX,     0x7FFFFF),       /* + max normal */
        SPCNST(1, SP_EMAX,     0x7FFFFF),       /* - max normal */
        SPCNST(0, SP_EMIN,     0x000000),       /* + min normal */
index df94720714c73cc95736544f7b05804d607d27ca..d3be351aed151305396723ddfff661837a114cde 100644 (file)
@@ -221,15 +221,16 @@ union ieee754dp ieee754dp_dump(char *s, union ieee754dp x);
 #define IEEE754_SPCVAL_NTEN            5       /* -10.0 */
 #define IEEE754_SPCVAL_PINFINITY       6       /* +inf */
 #define IEEE754_SPCVAL_NINFINITY       7       /* -inf */
-#define IEEE754_SPCVAL_INDEF           8       /* quiet NaN */
-#define IEEE754_SPCVAL_PMAX            9       /* +max norm */
-#define IEEE754_SPCVAL_NMAX            10      /* -max norm */
-#define IEEE754_SPCVAL_PMIN            11      /* +min norm */
-#define IEEE754_SPCVAL_NMIN            12      /* -min norm */
-#define IEEE754_SPCVAL_PMIND           13      /* +min denorm */
-#define IEEE754_SPCVAL_NMIND           14      /* -min denorm */
-#define IEEE754_SPCVAL_P1E31           15      /* + 1.0e31 */
-#define IEEE754_SPCVAL_P1E63           16      /* + 1.0e63 */
+#define IEEE754_SPCVAL_INDEF_LEG       8       /* legacy quiet NaN */
+#define IEEE754_SPCVAL_INDEF_2008      9       /* IEEE 754-2008 quiet NaN */
+#define IEEE754_SPCVAL_PMAX            10      /* +max norm */
+#define IEEE754_SPCVAL_NMAX            11      /* -max norm */
+#define IEEE754_SPCVAL_PMIN            12      /* +min norm */
+#define IEEE754_SPCVAL_NMIN            13      /* -min norm */
+#define IEEE754_SPCVAL_PMIND           14      /* +min denorm */
+#define IEEE754_SPCVAL_NMIND           15      /* -min denorm */
+#define IEEE754_SPCVAL_P1E31           16      /* + 1.0e31 */
+#define IEEE754_SPCVAL_P1E63           17      /* + 1.0e63 */
 
 extern const union ieee754dp __ieee754dp_spcvals[];
 extern const union ieee754sp __ieee754sp_spcvals[];
@@ -243,7 +244,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
 #define ieee754dp_zero(sn)     (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
 #define ieee754dp_one(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
 #define ieee754dp_ten(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754dp_indef()      (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754dp_indef()      (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+                                                  ieee754_csr.nan2008])
 #define ieee754dp_max(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
 #define ieee754dp_min(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
 #define ieee754dp_mind(sn)     (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -254,7 +256,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
 #define ieee754sp_zero(sn)     (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
 #define ieee754sp_one(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
 #define ieee754sp_ten(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754sp_indef()      (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754sp_indef()      (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+                                                  ieee754_csr.nan2008])
 #define ieee754sp_max(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
 #define ieee754sp_min(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
 #define ieee754sp_mind(sn)     (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -266,12 +269,25 @@ extern const union ieee754sp __ieee754sp_spcvals[];
  */
 static inline int ieee754si_indef(void)
 {
-       return INT_MAX;
+       return ieee754_csr.nan2008 ? 0 : INT_MAX;
 }
 
 static inline s64 ieee754di_indef(void)
 {
-       return S64_MAX;
+       return ieee754_csr.nan2008 ? 0 : S64_MAX;
+}
+
+/*
+ * Overflow integer value
+ */
+static inline int ieee754si_overflow(int xs)
+{
+       return ieee754_csr.nan2008 && xs ? INT_MIN : INT_MAX;
+}
+
+static inline s64 ieee754di_overflow(int xs)
+{
+       return ieee754_csr.nan2008 && xs ? S64_MIN : S64_MAX;
 }
 
 /* result types for xctx.rt */
index 522d843f2ffd04a47873328724a9d45965dce0ce..ad3c73436777f0c3103c887827c4547d888f5941 100644 (file)
@@ -37,8 +37,11 @@ static inline int ieee754dp_isnan(union ieee754dp x)
 
 static inline int ieee754dp_issnan(union ieee754dp x)
 {
+       int qbit;
+
        assert(ieee754dp_isnan(x));
-       return (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+       qbit = (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+       return ieee754_csr.nan2008 ^ qbit;
 }
 
 
@@ -51,7 +54,12 @@ union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r)
        assert(ieee754dp_issnan(r));
 
        ieee754_setcx(IEEE754_INVALID_OPERATION);
-       return ieee754dp_indef();
+       if (ieee754_csr.nan2008)
+               DPMANT(r) |= DP_MBIT(DP_FBITS - 1);
+       else
+               r = ieee754dp_indef();
+
+       return r;
 }
 
 static u64 ieee754dp_get_rounding(int sn, u64 xm)
index 6383e2c5c1adb5d527fe1023eb173f06e3776dc7..ed7bb277b3e05e224315508f32396fd37c239641 100644 (file)
@@ -63,10 +63,10 @@ static inline int ieee754_class_nan(int xc)
        if (ve == SP_EMAX+1+SP_EBIAS) {                                 \
                if (vm == 0)                                            \
                        vc = IEEE754_CLASS_INF;                         \
-               else if (vm & SP_MBIT(SP_FBITS-1))                      \
-                       vc = IEEE754_CLASS_SNAN;                        \
-               else                                                    \
+               else if (ieee754_csr.nan2008 ^ !(vm & SP_MBIT(SP_FBITS - 1))) \
                        vc = IEEE754_CLASS_QNAN;                        \
+               else                                                    \
+                       vc = IEEE754_CLASS_SNAN;                        \
        } else if (ve == SP_EMIN-1+SP_EBIAS) {                          \
                if (vm) {                                               \
                        ve = SP_EMIN;                                   \
@@ -97,10 +97,10 @@ static inline int ieee754_class_nan(int xc)
        if (ve == DP_EMAX+1+DP_EBIAS) {                                 \
                if (vm == 0)                                            \
                        vc = IEEE754_CLASS_INF;                         \
-               else if (vm & DP_MBIT(DP_FBITS-1))                      \
-                       vc = IEEE754_CLASS_SNAN;                        \
-               else                                                    \
+               else if (ieee754_csr.nan2008 ^ !(vm & DP_MBIT(DP_FBITS - 1))) \
                        vc = IEEE754_CLASS_QNAN;                        \
+               else                                                    \
+                       vc = IEEE754_CLASS_SNAN;                        \
        } else if (ve == DP_EMIN-1+DP_EBIAS) {                          \
                if (vm) {                                               \
                        ve = DP_EMIN;                                   \
index ca8e35e33bf790f594156268e320ce866b2ed5c2..def00ffc50fcc7bc4740933215cdf5872940a450 100644 (file)
@@ -37,8 +37,11 @@ static inline int ieee754sp_isnan(union ieee754sp x)
 
 static inline int ieee754sp_issnan(union ieee754sp x)
 {
+       int qbit;
+
        assert(ieee754sp_isnan(x));
-       return SPMANT(x) & SP_MBIT(SP_FBITS - 1);
+       qbit = (SPMANT(x) & SP_MBIT(SP_FBITS - 1)) == SP_MBIT(SP_FBITS - 1);
+       return ieee754_csr.nan2008 ^ qbit;
 }
 
 
@@ -51,7 +54,12 @@ union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r)
        assert(ieee754sp_issnan(r));
 
        ieee754_setcx(IEEE754_INVALID_OPERATION);
-       return ieee754sp_indef();
+       if (ieee754_csr.nan2008)
+               SPMANT(r) |= SP_MBIT(SP_FBITS - 1);
+       else
+               r = ieee754sp_indef();
+
+       return r;
 }
 
 static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
index 3797148893adb62c10a989dc78e5e9dce56e1e9e..5060e8fdcb0b8817c5f6836e0a20273f08a79b52 100644 (file)
@@ -44,13 +44,16 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
 
        switch (xc) {
        case IEEE754_CLASS_SNAN:
-               return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm));
-
+               x = ieee754dp_nanxcpt(x);
+               EXPLODEXDP;
+               /* Fall through.  */
        case IEEE754_CLASS_QNAN:
                y = ieee754sp_nan_fdp(xs, xm);
-               EXPLODEYSP;
-               if (!ieee754_class_nan(yc))
-                       y = ieee754sp_indef();
+               if (!ieee754_csr.nan2008) {
+                       EXPLODEYSP;
+                       if (!ieee754_class_nan(yc))
+                               y = ieee754sp_indef();
+               }
                return y;
 
        case IEEE754_CLASS_INF:
index c50e9451f2d21da196d0afbe00979548ca8b217e..756c9cf2dfd2514f7461190a9a03fc065c697195 100644 (file)
 
 union ieee754sp ieee754sp_neg(union ieee754sp x)
 {
-       unsigned int oldrm;
        union ieee754sp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       y = ieee754sp_sub(ieee754sp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               SPSIGN(y) = !SPSIGN(x);
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               y = ieee754sp_sub(ieee754sp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
 
 union ieee754sp ieee754sp_abs(union ieee754sp x)
 {
-       unsigned int oldrm;
        union ieee754sp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       if (SPSIGN(x))
-               y = ieee754sp_sub(ieee754sp_zero(0), x);
-       else
-               y = ieee754sp_add(ieee754sp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               SPSIGN(y) = 0;
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               if (SPSIGN(x))
+                       y = ieee754sp_sub(ieee754sp_zero(0), x);
+               else
+                       y = ieee754sp_add(ieee754sp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
index 091299a317980b6bf0cfd84f0958b1f271d2bf9d..f4b4cabfe2e11740e17b4c77c913e6762ac26b71 100644 (file)
@@ -38,10 +38,13 @@ int ieee754sp_tint(union ieee754sp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754si_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754si_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -56,7 +59,7 @@ int ieee754sp_tint(union ieee754sp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754si_indef();
+               return ieee754si_overflow(xs);
        }
        /* oh gawd */
        if (xe > SP_FBITS) {
@@ -97,7 +100,7 @@ int ieee754sp_tint(union ieee754sp x)
                if ((xm >> 31) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754si_indef();
+                       return ieee754si_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 9f3c742c1cea6ac264d1d4d66ef56b6916f675cb..a2450c7e452a672fdeba96bba5a7c262c1b6e876 100644 (file)
@@ -39,10 +39,13 @@ s64 ieee754sp_tlong(union ieee754sp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754di_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754di_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -57,7 +60,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754di_indef();
+               return ieee754di_overflow(xs);
        }
        /* oh gawd */
        if (xe > SP_FBITS) {
@@ -94,7 +97,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
                if ((xm >> 63) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754di_indef();
+                       return ieee754di_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 5c81fdd032c3b1269549f27e27348e9606eb5424..353037699512ca5515b11ce8fb2c808eb6386c78 100644 (file)
@@ -146,7 +146,7 @@ unsigned long arch_mmap_rnd(void)
 {
        unsigned long rnd;
 
-       rnd = (unsigned long)get_random_int();
+       rnd = get_random_long();
        rnd <<= PAGE_SHIFT;
        if (TASK_IS_32BIT_ADDR)
                rnd &= 0xfffffful;
@@ -174,7 +174,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 
 static inline unsigned long brk_rnd(void)
 {
-       unsigned long rnd = get_random_int();
+       unsigned long rnd = get_random_long();
 
        rnd = rnd << PAGE_SHIFT;
        /* 8MB for 32bit, 256MB for 64bit */
index 3bd0597d9c3da3a56ea6ba083ca05c67bd71c5e7..249647578e587580eb1b528a5af7540ee13f19d2 100644 (file)
@@ -181,10 +181,6 @@ static int __init mips_sc_probe_cm3(void)
        return 1;
 }
 
-void __weak platform_early_l2_init(void)
-{
-}
-
 static inline int __init mips_sc_probe(void)
 {
        struct cpuinfo_mips *c = &current_cpu_data;
@@ -194,12 +190,6 @@ static inline int __init mips_sc_probe(void)
        /* Mark as not present until probe completed */
        c->scache.flags |= MIPS_CACHE_NOT_PRESENT;
 
-       /*
-        * Do we need some platform specific probing before
-        * we configure L2?
-        */
-       platform_early_l2_init();
-
        if (mips_cm_revision() >= CM_REV_CM3)
                return mips_sc_probe_cm3();
 
index 482192cc8f2b88ae89f4cf1495c0dd6055a5bda6..5a04b6f5c6fb8a2cc52d986f59042ad8cd585850 100644 (file)
@@ -241,7 +241,7 @@ static void output_pgtable_bits_defines(void)
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
        pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
 #endif
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (cpu_has_rixi) {
 #ifdef _PAGE_NO_EXEC_SHIFT
                pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
index 571148c5fd0baa5ce8656e87574144d90b58e346..dc2c5214809d38703a7048babe7a16f83d77f28a 100644 (file)
@@ -293,7 +293,6 @@ mips_pci_controller:
        console_config();
 #endif
        /* Early detection of CMP support */
-       mips_cm_probe();
        mips_cpc_probe();
 
        if (!register_cps_smp_ops())
@@ -304,10 +303,3 @@ mips_pci_controller:
                return;
        register_up_smp_ops();
 }
-
-void platform_early_l2_init(void)
-{
-       /* L2 configuration lives in the CM3 */
-       if (mips_cm_revision() >= CM_REV_CM3)
-               mips_cm_probe();
-}
index 2eda01e6e08fd38d5e572f5e66283536d9e9cd34..139ad1d7ab5e3ce9dfa8aba6b507fe04e8b8d8b9 100644 (file)
@@ -43,6 +43,7 @@ obj-$(CONFIG_SIBYTE_BCM1x80)  += pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)           += fixup-sni.o ops-sni.o
 obj-$(CONFIG_LANTIQ)           += fixup-lantiq.o
 obj-$(CONFIG_PCI_LANTIQ)       += pci-lantiq.o ops-lantiq.o
+obj-$(CONFIG_SOC_MT7620)       += pci-mt7620.o
 obj-$(CONFIG_SOC_RT288X)       += pci-rt2880.o
 obj-$(CONFIG_SOC_RT3883)       += pci-rt3883.o
 obj-$(CONFIG_TANBAC_TB0219)    += fixup-tb0219.o
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
new file mode 100644 (file)
index 0000000..1ae932c
--- /dev/null
@@ -0,0 +1,426 @@
+/*
+ *  Ralink MT7620A SoC PCI support
+ *
+ *  Copyright (C) 2007-2013 Bruce Chang (Mediatek)
+ *  Copyright (C) 2013-2016 John Crispin <blogic@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7620.h>
+
+#define RALINK_PCI_IO_MAP_BASE         0x10160000
+#define RALINK_PCI_MEMORY_BASE         0x0
+
+#define RALINK_INT_PCIE0               4
+
+#define RALINK_CLKCFG1                 0x30
+#define RALINK_GPIOMODE                        0x60
+
+#define PPLL_CFG1                      0x9c
+#define PDRV_SW_SET                    BIT(23)
+
+#define PPLL_DRV                       0xa0
+#define PDRV_SW_SET                    (1<<31)
+#define LC_CKDRVPD                     (1<<19)
+#define LC_CKDRVOHZ                    (1<<18)
+#define LC_CKDRVHZ                     (1<<17)
+#define LC_CKTEST                      (1<<16)
+
+/* PCI Bridge registers */
+#define RALINK_PCI_PCICFG_ADDR         0x00
+#define PCIRST                         BIT(1)
+
+#define RALINK_PCI_PCIENA              0x0C
+#define PCIINT2                                BIT(20)
+
+#define RALINK_PCI_CONFIG_ADDR         0x20
+#define RALINK_PCI_CONFIG_DATA_VIRT_REG        0x24
+#define RALINK_PCI_MEMBASE             0x28
+#define RALINK_PCI_IOBASE              0x2C
+
+/* PCI RC registers */
+#define RALINK_PCI0_BAR0SETUP_ADDR     0x10
+#define RALINK_PCI0_IMBASEBAR0_ADDR    0x18
+#define RALINK_PCI0_ID                 0x30
+#define RALINK_PCI0_CLASS              0x34
+#define RALINK_PCI0_SUBID              0x38
+#define RALINK_PCI0_STATUS             0x50
+#define PCIE_LINK_UP_ST                        BIT(0)
+
+#define PCIEPHY0_CFG                   0x90
+
+#define RALINK_PCIEPHY_P0_CTL_OFFSET   0x7498
+#define RALINK_PCIE0_CLK_EN            (1 << 26)
+
+#define BUSY                           0x80000000
+#define WAITRETRY_MAX                  10
+#define WRITE_MODE                     (1UL << 23)
+#define DATA_SHIFT                     0
+#define ADDR_SHIFT                     8
+
+
+static void __iomem *bridge_base;
+static void __iomem *pcie_base;
+
+static struct reset_control *rstpcie0;
+
+static inline void bridge_w32(u32 val, unsigned reg)
+{
+       iowrite32(val, bridge_base + reg);
+}
+
+static inline u32 bridge_r32(unsigned reg)
+{
+       return ioread32(bridge_base + reg);
+}
+
+static inline void pcie_w32(u32 val, unsigned reg)
+{
+       iowrite32(val, pcie_base + reg);
+}
+
+static inline u32 pcie_r32(unsigned reg)
+{
+       return ioread32(pcie_base + reg);
+}
+
+static inline void pcie_m32(u32 clr, u32 set, unsigned reg)
+{
+       u32 val = pcie_r32(reg);
+
+       val &= ~clr;
+       val |= set;
+       pcie_w32(val, reg);
+}
+
+static int wait_pciephy_busy(void)
+{
+       unsigned long reg_value = 0x0, retry = 0;
+
+       while (1) {
+               reg_value = pcie_r32(PCIEPHY0_CFG);
+
+               if (reg_value & BUSY)
+                       mdelay(100);
+               else
+                       break;
+               if (retry++ > WAITRETRY_MAX) {
+                       printk(KERN_WARN "PCIE-PHY retry failed.\n");
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static void pcie_phy(unsigned long addr, unsigned long val)
+{
+       wait_pciephy_busy();
+       pcie_w32(WRITE_MODE | (val << DATA_SHIFT) | (addr << ADDR_SHIFT),
+                PCIEPHY0_CFG);
+       mdelay(1);
+       wait_pciephy_busy();
+}
+
+static int pci_config_read(struct pci_bus *bus, unsigned int devfn, int where,
+                          int size, u32 *val)
+{
+       unsigned int slot = PCI_SLOT(devfn);
+       u8 func = PCI_FUNC(devfn);
+       u32 address;
+       u32 data;
+       u32 num = 0;
+
+       if (bus)
+               num = bus->number;
+
+       address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+                 (func << 8) | (where & 0xfc) | 0x80000000;
+       bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+       data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       switch (size) {
+       case 1:
+               *val = (data >> ((where & 3) << 3)) & 0xff;
+               break;
+       case 2:
+               *val = (data >> ((where & 3) << 3)) & 0xffff;
+               break;
+       case 4:
+               *val = data;
+               break;
+       }
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_config_write(struct pci_bus *bus, unsigned int devfn, int where,
+                           int size, u32 val)
+{
+       unsigned int slot = PCI_SLOT(devfn);
+       u8 func = PCI_FUNC(devfn);
+       u32 address;
+       u32 data;
+       u32 num = 0;
+
+       if (bus)
+               num = bus->number;
+
+       address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+                 (func << 8) | (where & 0xfc) | 0x80000000;
+       bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+       data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       switch (size) {
+       case 1:
+               data = (data & ~(0xff << ((where & 3) << 3))) |
+                       (val << ((where & 3) << 3));
+               break;
+       case 2:
+               data = (data & ~(0xffff << ((where & 3) << 3))) |
+                       (val << ((where & 3) << 3));
+               break;
+       case 4:
+               data = val;
+               break;
+       }
+
+       bridge_w32(data, RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops mt7620_pci_ops = {
+       .read   = pci_config_read,
+       .write  = pci_config_write,
+};
+
+static struct resource mt7620_res_pci_mem1;
+static struct resource mt7620_res_pci_io1;
+struct pci_controller mt7620_controller = {
+       .pci_ops        = &mt7620_pci_ops,
+       .mem_resource   = &mt7620_res_pci_mem1,
+       .mem_offset     = 0x00000000UL,
+       .io_resource    = &mt7620_res_pci_io1,
+       .io_offset      = 0x00000000UL,
+       .io_map_base    = 0xa0000000,
+};
+
+static int mt7620_pci_hw_init(struct platform_device *pdev)
+{
+       /* bypass PCIe DLL */
+       pcie_phy(0x0, 0x80);
+       pcie_phy(0x1, 0x04);
+
+       /* Elastic buffer control */
+       pcie_phy(0x68, 0xB4);
+
+       /* put core into reset */
+       pcie_m32(0, PCIRST, RALINK_PCI_PCICFG_ADDR);
+       reset_control_assert(rstpcie0);
+
+       /* disable power and all clocks */
+       rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+       rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+
+       /* bring core out of reset */
+       reset_control_deassert(rstpcie0);
+       rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+       mdelay(100);
+
+       if (!(rt_sysc_r32(PPLL_CFG1) & PDRV_SW_SET)) {
+               dev_err(&pdev->dev, "MT7620 PPLL unlock\n");
+               reset_control_assert(rstpcie0);
+               rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+               return -1;
+       }
+
+       /* power up the bus */
+       rt_sysc_m32(LC_CKDRVHZ | LC_CKDRVOHZ, LC_CKDRVPD | PDRV_SW_SET,
+                   PPLL_DRV);
+
+       return 0;
+}
+
+static int mt7628_pci_hw_init(struct platform_device *pdev)
+{
+       u32 val = 0;
+
+       /* bring the core out of reset */
+       rt_sysc_m32(BIT(16), 0, RALINK_GPIOMODE);
+       reset_control_deassert(rstpcie0);
+
+       /* enable the pci clk */
+       rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+       mdelay(100);
+
+       /* voodoo from the SDK driver */
+       pcie_m32(~0xff, 0x5, RALINK_PCIEPHY_P0_CTL_OFFSET);
+
+       pci_config_read(NULL, 0, 0x70c, 4, &val);
+       val &= ~(0xff) << 8;
+       val |= 0x50 << 8;
+       pci_config_write(NULL, 0, 0x70c, 4, val);
+
+       pci_config_read(NULL, 0, 0x70c, 4, &val);
+       dev_err(&pdev->dev, "Port 0 N_FTS = %x\n", (unsigned int) val);
+
+       return 0;
+}
+
+static int mt7620_pci_probe(struct platform_device *pdev)
+{
+       struct resource *bridge_res = platform_get_resource(pdev,
+                                                           IORESOURCE_MEM, 0);
+       struct resource *pcie_res = platform_get_resource(pdev,
+                                                         IORESOURCE_MEM, 1);
+       u32 val = 0;
+
+       rstpcie0 = devm_reset_control_get(&pdev->dev, "pcie0");
+       if (IS_ERR(rstpcie0))
+               return PTR_ERR(rstpcie0);
+
+       bridge_base = devm_ioremap_resource(&pdev->dev, bridge_res);
+       if (IS_ERR(bridge_base))
+               return PTR_ERR(bridge_base);
+
+       pcie_base = devm_ioremap_resource(&pdev->dev, pcie_res);
+       if (IS_ERR(pcie_base))
+               return PTR_ERR(pcie_base);
+
+       iomem_resource.start = 0;
+       iomem_resource.end = ~0;
+       ioport_resource.start = 0;
+       ioport_resource.end = ~0;
+
+       /* bring up the pci core */
+       switch (ralink_soc) {
+       case MT762X_SOC_MT7620A:
+               if (mt7620_pci_hw_init(pdev))
+                       return -1;
+               break;
+
+       case MT762X_SOC_MT7628AN:
+               if (mt7628_pci_hw_init(pdev))
+                       return -1;
+               break;
+
+       default:
+               dev_err(&pdev->dev, "pcie is not supported on this hardware\n");
+               return -1;
+       }
+       mdelay(50);
+
+       /* enable write access */
+       pcie_m32(PCIRST, 0, RALINK_PCI_PCICFG_ADDR);
+       mdelay(100);
+
+       /* check if there is a card present */
+       if ((pcie_r32(RALINK_PCI0_STATUS) & PCIE_LINK_UP_ST) == 0) {
+               reset_control_assert(rstpcie0);
+               rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+               if (ralink_soc == MT762X_SOC_MT7620A)
+                       rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+               dev_err(&pdev->dev, "PCIE0 no card, disable it(RST&CLK)\n");
+               return -1;
+       }
+
+       /* setup ranges */
+       bridge_w32(0xffffffff, RALINK_PCI_MEMBASE);
+       bridge_w32(RALINK_PCI_IO_MAP_BASE, RALINK_PCI_IOBASE);
+
+       pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+       pcie_w32(RALINK_PCI_MEMORY_BASE, RALINK_PCI0_IMBASEBAR0_ADDR);
+       pcie_w32(0x06040001, RALINK_PCI0_CLASS);
+
+       /* enable interrupts */
+       pcie_m32(0, PCIINT2, RALINK_PCI_PCIENA);
+
+       /* voodoo from the SDK driver */
+       pci_config_read(NULL, 0, 4, 4, &val);
+       pci_config_write(NULL, 0, 4, 4, val | 0x7);
+
+       pci_load_of_ranges(&mt7620_controller, pdev->dev.of_node);
+       register_pci_controller(&mt7620_controller);
+
+       return 0;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+       u16 cmd;
+       u32 val;
+       int irq = 0;
+
+       if ((dev->bus->number == 0) && (slot == 0)) {
+               pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+               pci_config_write(dev->bus, 0, PCI_BASE_ADDRESS_0, 4,
+                                RALINK_PCI_MEMORY_BASE);
+               pci_config_read(dev->bus, 0, PCI_BASE_ADDRESS_0, 4, &val);
+       } else if ((dev->bus->number == 1) && (slot == 0x0)) {
+               irq = RALINK_INT_PCIE0;
+       } else {
+               dev_err(&dev->dev, "no irq found - bus=0x%x, slot = 0x%x\n",
+                       dev->bus->number, slot);
+               return 0;
+       }
+       dev_err(&dev->dev, "card - bus=0x%x, slot = 0x%x irq=%d\n",
+               dev->bus->number, slot, irq);
+
+       /* configure the cache line size to 0x14 */
+       pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 0x14);
+
+       /* configure latency timer to 0xff */
+       pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xff);
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+
+       /* setup the slot */
+       cmd = cmd | PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
+       pci_write_config_word(dev, PCI_COMMAND, cmd);
+       pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+
+       return irq;
+}
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+       return 0;
+}
+
+static const struct of_device_id mt7620_pci_ids[] = {
+       { .compatible = "mediatek,mt7620-pci" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, mt7620_pci_ids);
+
+static struct platform_driver mt7620_pci_driver = {
+       .probe = mt7620_pci_probe,
+       .driver = {
+               .name = "mt7620-pci",
+               .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(mt7620_pci_ids),
+       },
+};
+
+static int __init mt7620_pci_init(void)
+{
+       return platform_driver_register(&mt7620_pci_driver);
+}
+
+arch_initcall(mt7620_pci_init);
diff --git a/arch/mips/pic32/Kconfig b/arch/mips/pic32/Kconfig
new file mode 100644 (file)
index 0000000..fde56a8
--- /dev/null
@@ -0,0 +1,51 @@
+if MACH_PIC32
+
+choice
+       prompt "Machine Type"
+
+config PIC32MZDA
+       bool "Microchip PIC32MZDA Platform"
+       select BOOT_ELF32
+       select BOOT_RAW
+       select CEVT_R4K
+       select CSRC_R4K
+       select DMA_NONCOHERENT
+       select SYS_HAS_CPU_MIPS32_R2
+       select SYS_HAS_EARLY_PRINTK
+       select SYS_SUPPORTS_32BIT_KERNEL
+       select SYS_SUPPORTS_LITTLE_ENDIAN
+       select ARCH_REQUIRE_GPIOLIB
+       select HAVE_MACH_CLKDEV
+       select COMMON_CLK
+       select CLKDEV_LOOKUP
+       select LIBFDT
+       select USE_OF
+       select PINCTRL
+       select PIC32_EVIC
+       help
+         Support for the Microchip PIC32MZDA microcontroller.
+
+         This is a 32-bit microcontroller with support for external or
+         internally packaged DDR2 memory up to 128MB.
+
+         For more information, see <http://www.microchip.com/>.
+
+endchoice
+
+choice
+       prompt "Devicetree selection"
+       default DTB_PIC32_NONE
+       help
+         Select the devicetree.
+
+config DTB_PIC32_NONE
+       bool "None"
+
+config DTB_PIC32_MZDA_SK
+       bool "PIC32MZDA Starter Kit"
+       depends on PIC32MZDA
+       select BUILTIN_DTB
+
+endchoice
+
+endif # MACH_PIC32
diff --git a/arch/mips/pic32/Makefile b/arch/mips/pic32/Makefile
new file mode 100644 (file)
index 0000000..fd357f4
--- /dev/null
@@ -0,0 +1,6 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-$(CONFIG_MACH_PIC32) += common/
+obj-$(CONFIG_PIC32MZDA) += pic32mzda/
diff --git a/arch/mips/pic32/Platform b/arch/mips/pic32/Platform
new file mode 100644 (file)
index 0000000..cd2084f
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# PIC32MZDA
+#
+platform-$(CONFIG_PIC32MZDA)   += pic32/
+cflags-$(CONFIG_PIC32MZDA)     += -I$(srctree)/arch/mips/include/asm/mach-pic32
+load-$(CONFIG_PIC32MZDA)       += 0xffffffff88000000
+all-$(CONFIG_PIC32MZDA)                := $(COMPRESSION_FNAME).bin
diff --git a/arch/mips/pic32/common/Makefile b/arch/mips/pic32/common/Makefile
new file mode 100644 (file)
index 0000000..be1909c
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-y = reset.o irq.o
diff --git a/arch/mips/pic32/common/irq.c b/arch/mips/pic32/common/irq.c
new file mode 100644 (file)
index 0000000..6df347e
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+void __init arch_init_irq(void)
+{
+       irqchip_init();
+}
diff --git a/arch/mips/pic32/common/reset.c b/arch/mips/pic32/common/reset.c
new file mode 100644 (file)
index 0000000..8334575
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/reboot.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define PIC32_RSWRST           0x10
+
+static void pic32_halt(void)
+{
+       while (1) {
+               __asm__(".set push;\n"
+                       ".set arch=r4000;\n"
+                       "wait;\n"
+                       ".set pop;\n"
+               );
+       }
+}
+
+static void pic32_machine_restart(char *command)
+{
+       void __iomem *reg =
+               ioremap(PIC32_BASE_RESET + PIC32_RSWRST, sizeof(u32));
+
+       pic32_syskey_unlock();
+
+       /* magic write/read */
+       __raw_writel(1, reg);
+       (void)__raw_readl(reg);
+
+       pic32_halt();
+}
+
+static void pic32_machine_halt(void)
+{
+       local_irq_disable();
+
+       pic32_halt();
+}
+
+static int __init mips_reboot_setup(void)
+{
+       _machine_restart = pic32_machine_restart;
+       _machine_halt = pic32_machine_halt;
+       pm_power_off = pic32_machine_halt;
+
+       return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/pic32/pic32mzda/Makefile b/arch/mips/pic32/pic32mzda/Makefile
new file mode 100644 (file)
index 0000000..4a4c272
--- /dev/null
@@ -0,0 +1,9 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-y                  := init.o time.o config.o
+
+obj-$(CONFIG_EARLY_PRINTK)     += early_console.o      \
+                                  early_pin.o          \
+                                  early_clk.o
diff --git a/arch/mips/pic32/pic32mzda/config.c b/arch/mips/pic32/pic32mzda/config.c
new file mode 100644 (file)
index 0000000..fe293a0
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+#define PIC32_CFGCON   0x0000
+#define PIC32_DEVID    0x0020
+#define PIC32_SYSKEY   0x0030
+#define PIC32_CFGEBIA  0x00c0
+#define PIC32_CFGEBIC  0x00d0
+#define PIC32_CFGCON2  0x00f0
+#define PIC32_RCON     0x1240
+
+static void __iomem *pic32_conf_base;
+static DEFINE_SPINLOCK(config_lock);
+static u32 pic32_reset_status;
+
+static u32 pic32_conf_get_reg_field(u32 offset, u32 rshift, u32 mask)
+{
+       u32 v;
+
+       v = readl(pic32_conf_base + offset);
+       v >>= rshift;
+       v &= mask;
+
+       return v;
+}
+
+static u32 pic32_conf_modify_atomic(u32 offset, u32 mask, u32 set)
+{
+       u32 v;
+       unsigned long flags;
+
+       spin_lock_irqsave(&config_lock, flags);
+       v = readl(pic32_conf_base + offset);
+       v &= ~mask;
+       v |= (set & mask);
+       writel(v, pic32_conf_base + offset);
+       spin_unlock_irqrestore(&config_lock, flags);
+
+       return 0;
+}
+
+int pic32_enable_lcd(void)
+{
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), BIT(31));
+}
+
+int pic32_disable_lcd(void)
+{
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), 0);
+}
+
+int pic32_set_lcd_mode(int mode)
+{
+       u32 mask = mode ? BIT(30) : 0;
+
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(30), mask);
+}
+
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrsh, u32 wthrsh)
+{
+       u32 clr, set;
+
+       clr = (0x3ff << 4) | (0x3ff << 16);
+       set = (rthrsh << 4) | (wthrsh << 16);
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, clr, set);
+}
+
+void pic32_syskey_unlock_debug(const char *func, const ulong line)
+{
+       void __iomem *syskey = pic32_conf_base + PIC32_SYSKEY;
+
+       pr_debug("%s: called from %s:%lu\n", __func__, func, line);
+       writel(0x00000000, syskey);
+       writel(0xAA996655, syskey);
+       writel(0x556699AA, syskey);
+}
+
+static u32 pic32_get_device_id(void)
+{
+       return pic32_conf_get_reg_field(PIC32_DEVID, 0, 0x0fffffff);
+}
+
+static u32 pic32_get_device_version(void)
+{
+       return pic32_conf_get_reg_field(PIC32_DEVID, 28, 0xf);
+}
+
+u32 pic32_get_boot_status(void)
+{
+       return pic32_reset_status;
+}
+EXPORT_SYMBOL(pic32_get_boot_status);
+
+void __init pic32_config_init(void)
+{
+       pic32_conf_base = ioremap(PIC32_BASE_CONFIG, 0x110);
+       if (!pic32_conf_base)
+               panic("pic32: config base not mapped");
+
+       /* Boot Status */
+       pic32_reset_status = readl(pic32_conf_base + PIC32_RCON);
+       writel(-1, PIC32_CLR(pic32_conf_base + PIC32_RCON));
+
+       /* Device Inforation */
+       pr_info("Device Id: 0x%08x, Device Ver: 0x%04x\n",
+               pic32_get_device_id(),
+               pic32_get_device_version());
+}
diff --git a/arch/mips/pic32/pic32mzda/early_clk.c b/arch/mips/pic32/pic32mzda/early_clk.c
new file mode 100644 (file)
index 0000000..96c090e
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+/* Oscillators, PLL & clocks */
+#define ICLK_MASK      0x00000080
+#define PLLDIV_MASK    0x00000007
+#define CUROSC_MASK    0x00000007
+#define PLLMUL_MASK    0x0000007F
+#define PB_MASK                0x00000007
+#define FRC1           0
+#define FRC2           7
+#define SPLL           1
+#define POSC           2
+#define FRC_CLK                8000000
+
+#define PIC32_POSC_FREQ        24000000
+
+#define OSCCON         0x0000
+#define SPLLCON                0x0020
+#define PB1DIV         0x0140
+
+u32 pic32_get_sysclk(void)
+{
+       u32 osc_freq = 0;
+       u32 pllclk;
+       u32 frcdivn;
+       u32 osccon;
+       u32 spllcon;
+       int curr_osc;
+
+       u32 plliclk;
+       u32 pllidiv;
+       u32 pllodiv;
+       u32 pllmult;
+       u32 frcdiv;
+
+       void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+
+       osccon = __raw_readl(osc_base + OSCCON);
+       spllcon = __raw_readl(osc_base + SPLLCON);
+
+       plliclk = (spllcon & ICLK_MASK);
+       pllidiv = ((spllcon >> 8) & PLLDIV_MASK) + 1;
+       pllodiv = ((spllcon >> 24) & PLLDIV_MASK);
+       pllmult = ((spllcon >> 16) & PLLMUL_MASK) + 1;
+       frcdiv = ((osccon >> 24) & PLLDIV_MASK);
+
+       pllclk = plliclk ? FRC_CLK : PIC32_POSC_FREQ;
+       frcdivn = ((1 << frcdiv) + 1) + (128 * (frcdiv == 7));
+
+       if (pllodiv < 2)
+               pllodiv = 2;
+       else if (pllodiv < 5)
+               pllodiv = (1 << pllodiv);
+       else
+               pllodiv = 32;
+
+       curr_osc = (int)((osccon >> 12) & CUROSC_MASK);
+
+       switch (curr_osc) {
+       case FRC1:
+       case FRC2:
+               osc_freq = FRC_CLK / frcdivn;
+               break;
+       case SPLL:
+               osc_freq = ((pllclk / pllidiv) * pllmult) / pllodiv;
+               break;
+       case POSC:
+               osc_freq = PIC32_POSC_FREQ;
+               break;
+       default:
+               break;
+       }
+
+       iounmap(osc_base);
+
+       return osc_freq;
+}
+
+u32 pic32_get_pbclk(int bus)
+{
+       u32 clk_freq;
+       void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+       u32 pbxdiv = PB1DIV + ((bus - 1) * 0x10);
+       u32 pbdiv = (__raw_readl(osc_base + pbxdiv) & PB_MASK) + 1;
+
+       iounmap(osc_base);
+
+       clk_freq = pic32_get_sysclk();
+
+       return clk_freq / pbdiv;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c
new file mode 100644 (file)
index 0000000..d7b7834
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+#include <asm/fw/fw.h>
+
+#include "pic32mzda.h"
+#include "early_pin.h"
+
+/* Default early console parameters */
+#define EARLY_CONSOLE_PORT     1
+#define EARLY_CONSOLE_BAUDRATE 115200
+
+#define UART_ENABLE            BIT(15)
+#define UART_ENABLE_RX         BIT(12)
+#define UART_ENABLE_TX         BIT(10)
+#define UART_TX_FULL           BIT(9)
+
+/* UART1(x == 0) - UART6(x == 5) */
+#define UART_BASE(x)   ((x) * 0x0200)
+#define U_MODE(x)      UART_BASE(x)
+#define U_STA(x)       (UART_BASE(x) + 0x10)
+#define U_TXR(x)       (UART_BASE(x) + 0x20)
+#define U_BRG(x)       (UART_BASE(x) + 0x40)
+
+static void __iomem *uart_base;
+static char console_port = -1;
+
+static int __init configure_uart_pins(int port)
+{
+       switch (port) {
+       case 1:
+               pic32_pps_input(IN_FUNC_U2RX, IN_RPB0);
+               pic32_pps_output(OUT_FUNC_U2TX, OUT_RPG9);
+               break;
+       case 5:
+               pic32_pps_input(IN_FUNC_U6RX, IN_RPD0);
+               pic32_pps_output(OUT_FUNC_U6TX, OUT_RPB8);
+               break;
+       default:
+               return -1;
+       }
+
+       return 0;
+}
+
+static void __init configure_uart(char port, int baud)
+{
+       u32 pbclk;
+
+       pbclk = pic32_get_pbclk(2);
+
+       __raw_writel(0, uart_base + U_MODE(port));
+       __raw_writel(((pbclk / baud) / 16) - 1, uart_base + U_BRG(port));
+       __raw_writel(UART_ENABLE, uart_base + U_MODE(port));
+       __raw_writel(UART_ENABLE_TX | UART_ENABLE_RX,
+                    uart_base + PIC32_SET(U_STA(port)));
+}
+
+static void __init setup_early_console(char port, int baud)
+{
+       if (configure_uart_pins(port))
+               return;
+
+       console_port = port;
+       configure_uart(console_port, baud);
+}
+
+static char * __init pic32_getcmdline(void)
+{
+       /*
+        * arch_mem_init() has not been called yet, so we don't have a real
+        * command line setup if using CONFIG_CMDLINE_BOOL.
+        */
+#ifdef CONFIG_CMDLINE_OVERRIDE
+       return CONFIG_CMDLINE;
+#else
+       return fw_getcmdline();
+#endif
+}
+
+static int __init get_port_from_cmdline(char *arch_cmdline)
+{
+       char *s;
+       int port = -1;
+
+       if (!arch_cmdline || *arch_cmdline == '\0')
+               goto _out;
+
+       s = strstr(arch_cmdline, "earlyprintk=");
+       if (s) {
+               s = strstr(s, "ttyS");
+               if (s)
+                       s += 4;
+               else
+                       goto _out;
+
+               port = (*s) - '0';
+       }
+
+_out:
+       return port;
+}
+
+static int __init get_baud_from_cmdline(char *arch_cmdline)
+{
+       char *s;
+       int baud = -1;
+
+       if (!arch_cmdline || *arch_cmdline == '\0')
+               goto _out;
+
+       s = strstr(arch_cmdline, "earlyprintk=");
+       if (s) {
+               s = strstr(s, "ttyS");
+               if (s)
+                       s += 6;
+               else
+                       goto _out;
+
+               baud = 0;
+               while (*s >= '0' && *s <= '9')
+                       baud = baud * 10 + *s++ - '0';
+       }
+
+_out:
+       return baud;
+}
+
+void __init fw_init_early_console(char port)
+{
+       char *arch_cmdline = pic32_getcmdline();
+       int baud = -1;
+
+       uart_base = ioremap_nocache(PIC32_BASE_UART, 0xc00);
+
+       baud = get_baud_from_cmdline(arch_cmdline);
+       if (port == -1)
+               port = get_port_from_cmdline(arch_cmdline);
+
+       if (port == -1)
+               port = EARLY_CONSOLE_PORT;
+
+       if (baud == -1)
+               baud = EARLY_CONSOLE_BAUDRATE;
+
+       setup_early_console(port, baud);
+}
+
+int prom_putchar(char c)
+{
+       if (console_port >= 0) {
+               while (__raw_readl(
+                               uart_base + U_STA(console_port)) & UART_TX_FULL)
+                       ;
+
+               __raw_writel(c, uart_base + U_TXR(console_port));
+       }
+
+       return 1;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.c b/arch/mips/pic32/pic32mzda/early_pin.c
new file mode 100644 (file)
index 0000000..aa673f8
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/io.h>
+
+#include "early_pin.h"
+
+#define PPS_BASE 0x1f800000
+
+/* Input PPS Registers */
+#define INT1R 0x1404
+#define INT2R 0x1408
+#define INT3R 0x140C
+#define INT4R 0x1410
+#define T2CKR 0x1418
+#define T3CKR 0x141C
+#define T4CKR 0x1420
+#define T5CKR 0x1424
+#define T6CKR 0x1428
+#define T7CKR 0x142C
+#define T8CKR 0x1430
+#define T9CKR 0x1434
+#define IC1R 0x1438
+#define IC2R 0x143C
+#define IC3R 0x1440
+#define IC4R 0x1444
+#define IC5R 0x1448
+#define IC6R 0x144C
+#define IC7R 0x1450
+#define IC8R 0x1454
+#define IC9R 0x1458
+#define OCFAR 0x1460
+#define U1RXR 0x1468
+#define U1CTSR 0x146C
+#define U2RXR 0x1470
+#define U2CTSR 0x1474
+#define U3RXR 0x1478
+#define U3CTSR 0x147C
+#define U4RXR 0x1480
+#define U4CTSR 0x1484
+#define U5RXR 0x1488
+#define U5CTSR 0x148C
+#define U6RXR 0x1490
+#define U6CTSR 0x1494
+#define SDI1R 0x149C
+#define SS1R 0x14A0
+#define SDI2R 0x14A8
+#define SS2R 0x14AC
+#define SDI3R 0x14B4
+#define SS3R 0x14B8
+#define SDI4R 0x14C0
+#define SS4R 0x14C4
+#define SDI5R 0x14CC
+#define SS5R 0x14D0
+#define SDI6R 0x14D8
+#define SS6R 0x14DC
+#define C1RXR 0x14E0
+#define C2RXR 0x14E4
+#define REFCLKI1R 0x14E8
+#define REFCLKI3R 0x14F0
+#define REFCLKI4R 0x14F4
+
+static const struct
+{
+       int function;
+       int reg;
+} input_pin_reg[] = {
+       { IN_FUNC_INT3, INT3R },
+       { IN_FUNC_T2CK, T2CKR },
+       { IN_FUNC_T6CK, T6CKR },
+       { IN_FUNC_IC3, IC3R  },
+       { IN_FUNC_IC7, IC7R },
+       { IN_FUNC_U1RX, U1RXR },
+       { IN_FUNC_U2CTS, U2CTSR },
+       { IN_FUNC_U5RX, U5RXR },
+       { IN_FUNC_U6CTS, U6CTSR },
+       { IN_FUNC_SDI1, SDI1R },
+       { IN_FUNC_SDI3, SDI3R },
+       { IN_FUNC_SDI5, SDI5R },
+       { IN_FUNC_SS6, SS6R },
+       { IN_FUNC_REFCLKI1, REFCLKI1R },
+       { IN_FUNC_INT4, INT4R },
+       { IN_FUNC_T5CK, T5CKR },
+       { IN_FUNC_T7CK, T7CKR },
+       { IN_FUNC_IC4, IC4R },
+       { IN_FUNC_IC8, IC8R },
+       { IN_FUNC_U3RX, U3RXR },
+       { IN_FUNC_U4CTS, U4CTSR },
+       { IN_FUNC_SDI2, SDI2R },
+       { IN_FUNC_SDI4, SDI4R },
+       { IN_FUNC_C1RX, C1RXR },
+       { IN_FUNC_REFCLKI4, REFCLKI4R },
+       { IN_FUNC_INT2, INT2R },
+       { IN_FUNC_T3CK, T3CKR },
+       { IN_FUNC_T8CK, T8CKR },
+       { IN_FUNC_IC2, IC2R },
+       { IN_FUNC_IC5, IC5R },
+       { IN_FUNC_IC9, IC9R },
+       { IN_FUNC_U1CTS, U1CTSR },
+       { IN_FUNC_U2RX, U2RXR },
+       { IN_FUNC_U5CTS, U5CTSR },
+       { IN_FUNC_SS1, SS1R },
+       { IN_FUNC_SS3, SS3R },
+       { IN_FUNC_SS4, SS4R },
+       { IN_FUNC_SS5, SS5R },
+       { IN_FUNC_C2RX, C2RXR },
+       { IN_FUNC_INT1, INT1R },
+       { IN_FUNC_T4CK, T4CKR },
+       { IN_FUNC_T9CK, T9CKR },
+       { IN_FUNC_IC1, IC1R },
+       { IN_FUNC_IC6, IC6R },
+       { IN_FUNC_U3CTS, U3CTSR },
+       { IN_FUNC_U4RX, U4RXR },
+       { IN_FUNC_U6RX, U6RXR },
+       { IN_FUNC_SS2, SS2R },
+       { IN_FUNC_SDI6, SDI6R },
+       { IN_FUNC_OCFA, OCFAR },
+       { IN_FUNC_REFCLKI3, REFCLKI3R },
+};
+
+void pic32_pps_input(int function, int pin)
+{
+       void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0xF4);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(input_pin_reg); i++) {
+               if (input_pin_reg[i].function == function) {
+                       __raw_writel(pin, pps_base + input_pin_reg[i].reg);
+                       return;
+               }
+       }
+
+       iounmap(pps_base);
+}
+
+/* Output PPS Registers */
+#define RPA14R 0x1538
+#define RPA15R 0x153C
+#define RPB0R 0x1540
+#define RPB1R 0x1544
+#define RPB2R 0x1548
+#define RPB3R 0x154C
+#define RPB5R 0x1554
+#define RPB6R 0x1558
+#define RPB7R 0x155C
+#define RPB8R 0x1560
+#define RPB9R 0x1564
+#define RPB10R 0x1568
+#define RPB14R 0x1578
+#define RPB15R 0x157C
+#define RPC1R 0x1584
+#define RPC2R 0x1588
+#define RPC3R 0x158C
+#define RPC4R 0x1590
+#define RPC13R 0x15B4
+#define RPC14R 0x15B8
+#define RPD0R 0x15C0
+#define RPD1R 0x15C4
+#define RPD2R 0x15C8
+#define RPD3R 0x15CC
+#define RPD4R 0x15D0
+#define RPD5R 0x15D4
+#define RPD6R 0x15D8
+#define RPD7R 0x15DC
+#define RPD9R 0x15E4
+#define RPD10R 0x15E8
+#define RPD11R 0x15EC
+#define RPD12R 0x15F0
+#define RPD14R 0x15F8
+#define RPD15R 0x15FC
+#define RPE3R 0x160C
+#define RPE5R 0x1614
+#define RPE8R 0x1620
+#define RPE9R 0x1624
+#define RPF0R 0x1640
+#define RPF1R 0x1644
+#define RPF2R 0x1648
+#define RPF3R 0x164C
+#define RPF4R 0x1650
+#define RPF5R 0x1654
+#define RPF8R 0x1660
+#define RPF12R 0x1670
+#define RPF13R 0x1674
+#define RPG0R 0x1680
+#define RPG1R 0x1684
+#define RPG6R 0x1698
+#define RPG7R 0x169C
+#define RPG8R 0x16A0
+#define RPG9R 0x16A4
+
+static const struct
+{
+       int pin;
+       int reg;
+} output_pin_reg[] = {
+       { OUT_RPD2, RPD2R },
+       { OUT_RPG8, RPG8R },
+       { OUT_RPF4, RPF4R },
+       { OUT_RPD10, RPD10R },
+       { OUT_RPF1, RPF1R },
+       { OUT_RPB9, RPB9R },
+       { OUT_RPB10, RPB10R },
+       { OUT_RPC14, RPC14R },
+       { OUT_RPB5, RPB5R },
+       { OUT_RPC1, RPC1R },
+       { OUT_RPD14, RPD14R },
+       { OUT_RPG1, RPG1R },
+       { OUT_RPA14, RPA14R },
+       { OUT_RPD6, RPD6R },
+       { OUT_RPD3, RPD3R },
+       { OUT_RPG7, RPG7R },
+       { OUT_RPF5, RPF5R },
+       { OUT_RPD11, RPD11R },
+       { OUT_RPF0, RPF0R },
+       { OUT_RPB1, RPB1R },
+       { OUT_RPE5, RPE5R },
+       { OUT_RPC13, RPC13R },
+       { OUT_RPB3, RPB3R },
+       { OUT_RPC4, RPC4R },
+       { OUT_RPD15, RPD15R },
+       { OUT_RPG0, RPG0R },
+       { OUT_RPA15, RPA15R },
+       { OUT_RPD7, RPD7R },
+       { OUT_RPD9, RPD9R },
+       { OUT_RPG6, RPG6R },
+       { OUT_RPB8, RPB8R },
+       { OUT_RPB15, RPB15R },
+       { OUT_RPD4, RPD4R },
+       { OUT_RPB0, RPB0R },
+       { OUT_RPE3, RPE3R },
+       { OUT_RPB7, RPB7R },
+       { OUT_RPF12, RPF12R },
+       { OUT_RPD12, RPD12R },
+       { OUT_RPF8, RPF8R },
+       { OUT_RPC3, RPC3R },
+       { OUT_RPE9, RPE9R },
+       { OUT_RPD1, RPD1R },
+       { OUT_RPG9, RPG9R },
+       { OUT_RPB14, RPB14R },
+       { OUT_RPD0, RPD0R },
+       { OUT_RPB6, RPB6R },
+       { OUT_RPD5, RPD5R },
+       { OUT_RPB2, RPB2R },
+       { OUT_RPF3, RPF3R },
+       { OUT_RPF13, RPF13R },
+       { OUT_RPC2, RPC2R },
+       { OUT_RPE8, RPE8R },
+       { OUT_RPF2, RPF2R },
+};
+
+void pic32_pps_output(int function, int pin)
+{
+       void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0x170);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(output_pin_reg); i++) {
+               if (output_pin_reg[i].pin == pin) {
+                       __raw_writel(function,
+                               pps_base + output_pin_reg[i].reg);
+                       return;
+               }
+       }
+
+       iounmap(pps_base);
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.h b/arch/mips/pic32/pic32mzda/early_pin.h
new file mode 100644 (file)
index 0000000..417fae9
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef _PIC32MZDA_EARLY_PIN_H
+#define _PIC32MZDA_EARLY_PIN_H
+
+/*
+ * This is a complete, yet overly simplistic and unoptimized, PIC32MZDA PPS
+ * configuration only useful before we have full pinctrl initialized.
+ */
+
+/* Input PPS Functions */
+enum {
+       IN_FUNC_INT3,
+       IN_FUNC_T2CK,
+       IN_FUNC_T6CK,
+       IN_FUNC_IC3,
+       IN_FUNC_IC7,
+       IN_FUNC_U1RX,
+       IN_FUNC_U2CTS,
+       IN_FUNC_U5RX,
+       IN_FUNC_U6CTS,
+       IN_FUNC_SDI1,
+       IN_FUNC_SDI3,
+       IN_FUNC_SDI5,
+       IN_FUNC_SS6,
+       IN_FUNC_REFCLKI1,
+       IN_FUNC_INT4,
+       IN_FUNC_T5CK,
+       IN_FUNC_T7CK,
+       IN_FUNC_IC4,
+       IN_FUNC_IC8,
+       IN_FUNC_U3RX,
+       IN_FUNC_U4CTS,
+       IN_FUNC_SDI2,
+       IN_FUNC_SDI4,
+       IN_FUNC_C1RX,
+       IN_FUNC_REFCLKI4,
+       IN_FUNC_INT2,
+       IN_FUNC_T3CK,
+       IN_FUNC_T8CK,
+       IN_FUNC_IC2,
+       IN_FUNC_IC5,
+       IN_FUNC_IC9,
+       IN_FUNC_U1CTS,
+       IN_FUNC_U2RX,
+       IN_FUNC_U5CTS,
+       IN_FUNC_SS1,
+       IN_FUNC_SS3,
+       IN_FUNC_SS4,
+       IN_FUNC_SS5,
+       IN_FUNC_C2RX,
+       IN_FUNC_INT1,
+       IN_FUNC_T4CK,
+       IN_FUNC_T9CK,
+       IN_FUNC_IC1,
+       IN_FUNC_IC6,
+       IN_FUNC_U3CTS,
+       IN_FUNC_U4RX,
+       IN_FUNC_U6RX,
+       IN_FUNC_SS2,
+       IN_FUNC_SDI6,
+       IN_FUNC_OCFA,
+       IN_FUNC_REFCLKI3,
+};
+
+/* Input PPS Pins */
+#define IN_RPD2 0x00
+#define IN_RPG8 0x01
+#define IN_RPF4 0x02
+#define IN_RPD10 0x03
+#define IN_RPF1 0x04
+#define IN_RPB9 0x05
+#define IN_RPB10 0x06
+#define IN_RPC14 0x07
+#define IN_RPB5 0x08
+#define IN_RPC1 0x0A
+#define IN_RPD14 0x0B
+#define IN_RPG1 0x0C
+#define IN_RPA14 0x0D
+#define IN_RPD6 0x0E
+#define IN_RPD3 0x00
+#define IN_RPG7 0x01
+#define IN_RPF5 0x02
+#define IN_RPD11 0x03
+#define IN_RPF0 0x04
+#define IN_RPB1 0x05
+#define IN_RPE5 0x06
+#define IN_RPC13 0x07
+#define IN_RPB3 0x08
+#define IN_RPC4 0x0A
+#define IN_RPD15 0x0B
+#define IN_RPG0 0x0C
+#define IN_RPA15 0x0D
+#define IN_RPD7 0x0E
+#define IN_RPD9 0x00
+#define IN_RPG6 0x01
+#define IN_RPB8 0x02
+#define IN_RPB15 0x03
+#define IN_RPD4 0x04
+#define IN_RPB0 0x05
+#define IN_RPE3 0x06
+#define IN_RPB7 0x07
+#define IN_RPF12 0x09
+#define IN_RPD12 0x0A
+#define IN_RPF8 0x0B
+#define IN_RPC3 0x0C
+#define IN_RPE9 0x0D
+#define IN_RPD1 0x00
+#define IN_RPG9 0x01
+#define IN_RPB14 0x02
+#define IN_RPD0 0x03
+#define IN_RPB6 0x05
+#define IN_RPD5 0x06
+#define IN_RPB2 0x07
+#define IN_RPF3 0x08
+#define IN_RPF13 0x09
+#define IN_RPF2 0x0B
+#define IN_RPC2 0x0C
+#define IN_RPE8 0x0D
+
+/* Output PPS Pins */
+enum {
+       OUT_RPD2,
+       OUT_RPG8,
+       OUT_RPF4,
+       OUT_RPD10,
+       OUT_RPF1,
+       OUT_RPB9,
+       OUT_RPB10,
+       OUT_RPC14,
+       OUT_RPB5,
+       OUT_RPC1,
+       OUT_RPD14,
+       OUT_RPG1,
+       OUT_RPA14,
+       OUT_RPD6,
+       OUT_RPD3,
+       OUT_RPG7,
+       OUT_RPF5,
+       OUT_RPD11,
+       OUT_RPF0,
+       OUT_RPB1,
+       OUT_RPE5,
+       OUT_RPC13,
+       OUT_RPB3,
+       OUT_RPC4,
+       OUT_RPD15,
+       OUT_RPG0,
+       OUT_RPA15,
+       OUT_RPD7,
+       OUT_RPD9,
+       OUT_RPG6,
+       OUT_RPB8,
+       OUT_RPB15,
+       OUT_RPD4,
+       OUT_RPB0,
+       OUT_RPE3,
+       OUT_RPB7,
+       OUT_RPF12,
+       OUT_RPD12,
+       OUT_RPF8,
+       OUT_RPC3,
+       OUT_RPE9,
+       OUT_RPD1,
+       OUT_RPG9,
+       OUT_RPB14,
+       OUT_RPD0,
+       OUT_RPB6,
+       OUT_RPD5,
+       OUT_RPB2,
+       OUT_RPF3,
+       OUT_RPF13,
+       OUT_RPC2,
+       OUT_RPE8,
+       OUT_RPF2,
+};
+
+/* Output PPS Functions */
+#define OUT_FUNC_U3TX 0x01
+#define OUT_FUNC_U4RTS 0x02
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_SS6 0x0A
+#define OUT_FUNC_OC3 0x0B
+#define OUT_FUNC_OC6 0x0C
+#define OUT_FUNC_REFCLKO4 0x0D
+#define OUT_FUNC_C2OUT 0x0E
+#define OUT_FUNC_C1TX 0x0F
+#define OUT_FUNC_U1TX 0x01
+#define OUT_FUNC_U2RTS 0x02
+#define OUT_FUNC_U5TX 0x03
+#define OUT_FUNC_U6RTS 0x04
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_OC4 0x0B
+#define OUT_FUNC_OC7 0x0C
+#define OUT_FUNC_REFCLKO1 0x0F
+#define OUT_FUNC_U3RTS 0x01
+#define OUT_FUNC_U4TX 0x02
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS1 0x05
+#define OUT_FUNC_SS3 0x07
+#define OUT_FUNC_SS4 0x08
+#define OUT_FUNC_SS5 0x09
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC5 0x0B
+#define OUT_FUNC_OC8 0x0C
+#define OUT_FUNC_C1OUT 0x0E
+#define OUT_FUNC_REFCLKO3 0x0F
+#define OUT_FUNC_U1RTS 0x01
+#define OUT_FUNC_U2TX 0x02
+#define OUT_FUNC_U5RTS 0x03
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS2 0x06
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC2 0x0B
+#define OUT_FUNC_OC1 0x0C
+#define OUT_FUNC_OC9 0x0D
+#define OUT_FUNC_C2TX 0x0F
+
+void pic32_pps_input(int function, int pin);
+void pic32_pps_output(int function, int pin);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c
new file mode 100644 (file)
index 0000000..775ff90
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Joshua Henderson, joshua.henderson@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/platform_data/sdhci-pic32.h>
+
+#include <asm/fw/fw.h>
+#include <asm/mips-boards/generic.h>
+#include <asm/prom.h>
+
+#include "pic32mzda.h"
+
+const char *get_system_type(void)
+{
+       return "PIC32MZDA";
+}
+
+static ulong get_fdtaddr(void)
+{
+       ulong ftaddr = 0;
+
+       if ((fw_arg0 == -2) && fw_arg1 && !fw_arg2 && !fw_arg3)
+               return (ulong)fw_arg1;
+
+       if (__dtb_start < __dtb_end)
+               ftaddr = (ulong)__dtb_start;
+
+       return ftaddr;
+}
+
+void __init plat_mem_setup(void)
+{
+       void *dtb;
+
+       dtb = (void *)get_fdtaddr();
+       if (!dtb) {
+               pr_err("pic32: no DTB found.\n");
+               return;
+       }
+
+       /*
+        * Load the builtin device tree. This causes the chosen node to be
+        * parsed resulting in our memory appearing.
+        */
+       __dt_setup_arch(dtb);
+
+       pr_info("Found following command lines\n");
+       pr_info(" boot_command_line: %s\n", boot_command_line);
+       pr_info(" arcs_cmdline     : %s\n", arcs_cmdline);
+#ifdef CONFIG_CMDLINE_BOOL
+       pr_info(" builtin_cmdline  : %s\n", CONFIG_CMDLINE);
+#endif
+       if (dtb != __dtb_start)
+               strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+
+#ifdef CONFIG_EARLY_PRINTK
+       fw_init_early_console(-1);
+#endif
+       pic32_config_init();
+}
+
+static __init void pic32_init_cmdline(int argc, char *argv[])
+{
+       unsigned int count = COMMAND_LINE_SIZE - 1;
+       int i;
+       char *dst = &(arcs_cmdline[0]);
+       char *src;
+
+       for (i = 1; i < argc && count; ++i) {
+               src = argv[i];
+               while (*src && count) {
+                       *dst++ = *src++;
+                       --count;
+               }
+               *dst++ = ' ';
+       }
+       if (i > 1)
+               --dst;
+
+       *dst = 0;
+}
+
+void __init prom_init(void)
+{
+       pic32_init_cmdline((int)fw_arg0, (char **)fw_arg1);
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
+
+void __init device_tree_init(void)
+{
+       if (!initial_boot_params)
+               return;
+
+       unflatten_and_copy_device_tree();
+}
+
+static struct pic32_sdhci_platform_data sdhci_data = {
+       .setup_dma = pic32_set_sdhci_adma_fifo_threshold,
+};
+
+static struct of_dev_auxdata pic32_auxdata_lookup[] __initdata = {
+       OF_DEV_AUXDATA("microchip,pic32mzda-sdhci", 0, "sdhci", &sdhci_data),
+       { /* sentinel */}
+};
+
+static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup)
+{
+       struct device_node *root, *np;
+       struct resource res;
+
+       root = of_find_node_by_path("/");
+
+       for (; lookup->compatible; lookup++) {
+               np = of_find_compatible_node(NULL, NULL, lookup->compatible);
+               if (np) {
+                       lookup->name = (char *)np->name;
+                       if (lookup->phys_addr)
+                               continue;
+                       if (!of_address_to_resource(np, 0, &res))
+                               lookup->phys_addr = res.start;
+               }
+       }
+
+       return 0;
+}
+
+static int __init plat_of_setup(void)
+{
+       if (!of_have_populated_dt())
+               panic("Device tree not present");
+
+       pic32_of_prepare_platform_data(pic32_auxdata_lookup);
+       if (of_platform_populate(NULL, of_default_bus_match_table,
+                                pic32_auxdata_lookup, NULL))
+               panic("Failed to populate DT");
+
+       return 0;
+}
+arch_initcall(plat_of_setup);
diff --git a/arch/mips/pic32/pic32mzda/pic32mzda.h b/arch/mips/pic32/pic32mzda/pic32mzda.h
new file mode 100644 (file)
index 0000000..96d10e2
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef PIC32MZDA_COMMON_H
+#define PIC32MZDA_COMMON_H
+
+/* early clock */
+u32 pic32_get_pbclk(int bus);
+u32 pic32_get_sysclk(void);
+
+/* Device configuration */
+void __init pic32_config_init(void);
+int pic32_set_lcd_mode(int mode);
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrs, u32 wthrs);
+u32 pic32_get_boot_status(void);
+int pic32_disable_lcd(void);
+int pic32_enable_lcd(void);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c
new file mode 100644 (file)
index 0000000..ca6a62b
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/time.h>
+
+#include "pic32mzda.h"
+
+static const struct of_device_id pic32_infra_match[] = {
+       { .compatible = "microchip,pic32mzda-infra", },
+       { },
+};
+
+#define DEFAULT_CORE_TIMER_INTERRUPT 0
+
+static unsigned int pic32_xlate_core_timer_irq(void)
+{
+       static struct device_node *node;
+       unsigned int irq;
+
+       node = of_find_matching_node(NULL, pic32_infra_match);
+
+       if (WARN_ON(!node))
+               goto default_map;
+
+       irq = irq_of_parse_and_map(node, 0);
+       if (!irq)
+               goto default_map;
+
+       return irq;
+
+default_map:
+
+       return irq_create_mapping(NULL, DEFAULT_CORE_TIMER_INTERRUPT);
+}
+
+unsigned int get_c0_compare_int(void)
+{
+       return pic32_xlate_core_timer_irq();
+}
+
+void __init plat_time_init(void)
+{
+       struct clk *clk;
+
+       of_clk_init(NULL);
+       clk = clk_get_sys("cpu_clk", NULL);
+       if (IS_ERR(clk))
+               panic("unable to get CPU clock, err=%ld", PTR_ERR(clk));
+
+       clk_prepare_enable(clk);
+       pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
+       mips_hpt_frequency = clk_get_rate(clk) / 2;
+
+       clocksource_probe();
+}
index e9bc8c96174e227a8d9cf730bcc4bfb0ff0fb1cf..813826a456ca33d30ff84dfb9724a2796871fb4a 100644 (file)
@@ -12,6 +12,11 @@ config RALINK_ILL_ACC
        depends on SOC_RT305X
        default y
 
+config IRQ_INTC
+       bool
+       default y
+       depends on !SOC_MT7621
+
 choice
        prompt "Ralink SoC selection"
        default SOC_RT305X
@@ -33,7 +38,18 @@ choice
 
        config SOC_MT7620
                bool "MT7620/8"
+               select HW_HAS_PCI
 
+       config SOC_MT7621
+               bool "MT7621"
+               select MIPS_CPU_SCACHE
+               select SYS_SUPPORTS_MULTITHREADING
+               select SYS_SUPPORTS_SMP
+               select SYS_SUPPORTS_MIPS_CPS
+               select MIPS_GIC
+               select COMMON_CLK
+               select CLKSRC_MIPS_GIC
+               select HW_HAS_PCI
 endchoice
 
 choice
index a6c9d00613269f6a5cbb1d416eb8b8450b9f3a65..0d1795a0321e373d93adbfc073be4ac67423d97b 100644 (file)
@@ -6,16 +6,24 @@
 # Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
 # Copyright (C) 2013 John Crispin <blogic@openwrt.org>
 
-obj-y := prom.o of.o reset.o clk.o irq.o timer.o
+obj-y := prom.o of.o reset.o
+
+ifndef CONFIG_MIPS_GIC
+       obj-y += clk.o timer.o
+endif
 
 obj-$(CONFIG_CLKEVT_RT3352) += cevt-rt3352.o
 
 obj-$(CONFIG_RALINK_ILL_ACC) += ill_acc.o
 
+obj-$(CONFIG_IRQ_INTC) += irq.o
+obj-$(CONFIG_MIPS_GIC) += irq-gic.o timer-gic.o
+
 obj-$(CONFIG_SOC_RT288X) += rt288x.o
 obj-$(CONFIG_SOC_RT305X) += rt305x.o
 obj-$(CONFIG_SOC_RT3883) += rt3883.o
 obj-$(CONFIG_SOC_MT7620) += mt7620.o
+obj-$(CONFIG_SOC_MT7621) += mt7621.o
 
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 
index 6d9c8c499f988d4949f72c6484f8e948dff6fec2..6095fcc334f42377b2bc3b44631eb429154e85a0 100644 (file)
@@ -27,3 +27,8 @@ cflags-$(CONFIG_SOC_RT3883)   += -I$(srctree)/arch/mips/include/asm/mach-ralink/rt
 #
 load-$(CONFIG_SOC_MT7620)      += 0xffffffff80000000
 cflags-$(CONFIG_SOC_MT7620)    += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7620
+
+# Ralink MT7621
+#
+load-$(CONFIG_SOC_MT7621)      += 0xffffffff80001000
+cflags-$(CONFIG_SOC_MT7621)    += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7621
diff --git a/arch/mips/ralink/irq-gic.c b/arch/mips/ralink/irq-gic.c
new file mode 100644 (file)
index 0000000..50d6c55
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/mips-gic.h>
+
+int get_c0_perfcount_int(void)
+{
+       return gic_get_c0_perfcount_int();
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
+
+void __init arch_init_irq(void)
+{
+       irqchip_init();
+}
index dfb04fcedb042d9b80621ac3197886a1ea3bccda..0d3d1a97895f167d613cd469422dc4e2bfe6087c 100644 (file)
@@ -107,31 +107,31 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
 };
 
 static struct rt2880_pmx_func pwm1_grp_mt7628[] = {
-       FUNC("sdcx", 3, 19, 1),
+       FUNC("sdxc d6", 3, 19, 1),
        FUNC("utif", 2, 19, 1),
        FUNC("gpio", 1, 19, 1),
-       FUNC("pwm", 0, 19, 1),
+       FUNC("pwm1", 0, 19, 1),
 };
 
 static struct rt2880_pmx_func pwm0_grp_mt7628[] = {
-       FUNC("sdcx", 3, 18, 1),
+       FUNC("sdxc d7", 3, 18, 1),
        FUNC("utif", 2, 18, 1),
        FUNC("gpio", 1, 18, 1),
-       FUNC("pwm", 0, 18, 1),
+       FUNC("pwm0", 0, 18, 1),
 };
 
 static struct rt2880_pmx_func uart2_grp_mt7628[] = {
-       FUNC("sdcx", 3, 20, 2),
+       FUNC("sdxc d5 d4", 3, 20, 2),
        FUNC("pwm", 2, 20, 2),
        FUNC("gpio", 1, 20, 2),
-       FUNC("uart", 0, 20, 2),
+       FUNC("uart2", 0, 20, 2),
 };
 
 static struct rt2880_pmx_func uart1_grp_mt7628[] = {
-       FUNC("sdcx", 3, 45, 2),
+       FUNC("sw_r", 3, 45, 2),
        FUNC("pwm", 2, 45, 2),
        FUNC("gpio", 1, 45, 2),
-       FUNC("uart", 0, 45, 2),
+       FUNC("uart1", 0, 45, 2),
 };
 
 static struct rt2880_pmx_func i2c_grp_mt7628[] = {
@@ -143,21 +143,21 @@ static struct rt2880_pmx_func i2c_grp_mt7628[] = {
 
 static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("reclk", 0, 36, 1) };
 static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 37, 1) };
-static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 15, 38) };
+static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) };
 static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) };
 
 static struct rt2880_pmx_func sd_mode_grp_mt7628[] = {
        FUNC("jtag", 3, 22, 8),
        FUNC("utif", 2, 22, 8),
        FUNC("gpio", 1, 22, 8),
-       FUNC("sdcx", 0, 22, 8),
+       FUNC("sdxc", 0, 22, 8),
 };
 
 static struct rt2880_pmx_func uart0_grp_mt7628[] = {
        FUNC("-", 3, 12, 2),
        FUNC("-", 2, 12, 2),
        FUNC("gpio", 1, 12, 2),
-       FUNC("uart", 0, 12, 2),
+       FUNC("uart0", 0, 12, 2),
 };
 
 static struct rt2880_pmx_func i2s_grp_mt7628[] = {
@@ -171,7 +171,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = {
        FUNC("-", 3, 6, 1),
        FUNC("refclk", 2, 6, 1),
        FUNC("gpio", 1, 6, 1),
-       FUNC("spi", 0, 6, 1),
+       FUNC("spi cs1", 0, 6, 1),
 };
 
 static struct rt2880_pmx_func spis_grp_mt7628[] = {
@@ -188,28 +188,44 @@ static struct rt2880_pmx_func gpio_grp_mt7628[] = {
        FUNC("gpio", 0, 11, 1),
 };
 
-#define MT7628_GPIO_MODE_MASK  0x3
-
-#define MT7628_GPIO_MODE_PWM1  30
-#define MT7628_GPIO_MODE_PWM0  28
-#define MT7628_GPIO_MODE_UART2 26
-#define MT7628_GPIO_MODE_UART1 24
-#define MT7628_GPIO_MODE_I2C   20
-#define MT7628_GPIO_MODE_REFCLK        18
-#define MT7628_GPIO_MODE_PERST 16
-#define MT7628_GPIO_MODE_WDT   14
-#define MT7628_GPIO_MODE_SPI   12
-#define MT7628_GPIO_MODE_SDMODE        10
-#define MT7628_GPIO_MODE_UART0 8
-#define MT7628_GPIO_MODE_I2S   6
-#define MT7628_GPIO_MODE_CS1   4
-#define MT7628_GPIO_MODE_SPIS  2
-#define MT7628_GPIO_MODE_GPIO  0
+static struct rt2880_pmx_func wled_kn_grp_mt7628[] = {
+       FUNC("rsvd", 3, 35, 1),
+       FUNC("rsvd", 2, 35, 1),
+       FUNC("gpio", 1, 35, 1),
+       FUNC("wled_kn", 0, 35, 1),
+};
+
+static struct rt2880_pmx_func wled_an_grp_mt7628[] = {
+       FUNC("rsvd", 3, 35, 1),
+       FUNC("rsvd", 2, 35, 1),
+       FUNC("gpio", 1, 35, 1),
+       FUNC("wled_an", 0, 35, 1),
+};
+
+#define MT7628_GPIO_MODE_MASK          0x3
+
+#define MT7628_GPIO_MODE_WLED_KN       48
+#define MT7628_GPIO_MODE_WLED_AN       32
+#define MT7628_GPIO_MODE_PWM1          30
+#define MT7628_GPIO_MODE_PWM0          28
+#define MT7628_GPIO_MODE_UART2         26
+#define MT7628_GPIO_MODE_UART1         24
+#define MT7628_GPIO_MODE_I2C           20
+#define MT7628_GPIO_MODE_REFCLK                18
+#define MT7628_GPIO_MODE_PERST         16
+#define MT7628_GPIO_MODE_WDT           14
+#define MT7628_GPIO_MODE_SPI           12
+#define MT7628_GPIO_MODE_SDMODE                10
+#define MT7628_GPIO_MODE_UART0         8
+#define MT7628_GPIO_MODE_I2S           6
+#define MT7628_GPIO_MODE_CS1           4
+#define MT7628_GPIO_MODE_SPIS          2
+#define MT7628_GPIO_MODE_GPIO          0
 
 static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
        GRP_G("pmw1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_PWM1),
-       GRP_G("pmw1", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
+       GRP_G("pmw0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_PWM0),
        GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_UART2),
@@ -233,6 +249,10 @@ static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
                                1, MT7628_GPIO_MODE_SPIS),
        GRP_G("gpio", gpio_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_GPIO),
+       GRP_G("wled_an", wled_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+                               1, MT7628_GPIO_MODE_WLED_AN),
+       GRP_G("wled_kn", wled_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+                               1, MT7628_GPIO_MODE_WLED_KN),
        { 0 }
 };
 
@@ -436,10 +456,13 @@ void __init ralink_clk_init(void)
        ralink_clk_add("10000100.timer", periph_rate);
        ralink_clk_add("10000120.watchdog", periph_rate);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000c00.uartlite", periph_rate);
+       ralink_clk_add("10000d00.uart1", periph_rate);
+       ralink_clk_add("10000e00.uart2", periph_rate);
        ralink_clk_add("10180000.wmac", xtal_rate);
 
-       if (IS_ENABLED(CONFIG_USB) && is_mt76x8()) {
+       if (IS_ENABLED(CONFIG_USB) && !is_mt76x8()) {
                /*
                 * When the CPU goes into sleep mode, the BUS clock will be
                 * too low for USB to function properly. Adjust the busses
@@ -552,7 +575,7 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
        }
 
        snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
-               "Ralink %s ver:%u eco:%u",
+               "MediaTek %s ver:%u eco:%u",
                name,
                (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
                (rev & CHIP_REV_ECO_MASK));
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
new file mode 100644 (file)
index 0000000..e9b9fa3
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/mipsregs.h>
+#include <asm/smp-ops.h>
+#include <asm/mips-cm.h>
+#include <asm/mips-cpc.h>
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7621.h>
+
+#include <pinmux.h>
+
+#include "common.h"
+
+#define SYSC_REG_SYSCFG                0x10
+#define SYSC_REG_CPLL_CLKCFG0  0x2c
+#define SYSC_REG_CUR_CLK_STS   0x44
+#define CPU_CLK_SEL            (BIT(30) | BIT(31))
+
+#define MT7621_GPIO_MODE_UART1         1
+#define MT7621_GPIO_MODE_I2C           2
+#define MT7621_GPIO_MODE_UART3_MASK    0x3
+#define MT7621_GPIO_MODE_UART3_SHIFT   3
+#define MT7621_GPIO_MODE_UART3_GPIO    1
+#define MT7621_GPIO_MODE_UART2_MASK    0x3
+#define MT7621_GPIO_MODE_UART2_SHIFT   5
+#define MT7621_GPIO_MODE_UART2_GPIO    1
+#define MT7621_GPIO_MODE_JTAG          7
+#define MT7621_GPIO_MODE_WDT_MASK      0x3
+#define MT7621_GPIO_MODE_WDT_SHIFT     8
+#define MT7621_GPIO_MODE_WDT_GPIO      1
+#define MT7621_GPIO_MODE_PCIE_RST      0
+#define MT7621_GPIO_MODE_PCIE_REF      2
+#define MT7621_GPIO_MODE_PCIE_MASK     0x3
+#define MT7621_GPIO_MODE_PCIE_SHIFT    10
+#define MT7621_GPIO_MODE_PCIE_GPIO     1
+#define MT7621_GPIO_MODE_MDIO_MASK     0x3
+#define MT7621_GPIO_MODE_MDIO_SHIFT    12
+#define MT7621_GPIO_MODE_MDIO_GPIO     1
+#define MT7621_GPIO_MODE_RGMII1                14
+#define MT7621_GPIO_MODE_RGMII2                15
+#define MT7621_GPIO_MODE_SPI_MASK      0x3
+#define MT7621_GPIO_MODE_SPI_SHIFT     16
+#define MT7621_GPIO_MODE_SPI_GPIO      1
+#define MT7621_GPIO_MODE_SDHCI_MASK    0x3
+#define MT7621_GPIO_MODE_SDHCI_SHIFT   18
+#define MT7621_GPIO_MODE_SDHCI_GPIO    1
+
+static struct rt2880_pmx_func uart1_grp[] =  { FUNC("uart1", 0, 1, 2) };
+static struct rt2880_pmx_func i2c_grp[] =  { FUNC("i2c", 0, 3, 2) };
+static struct rt2880_pmx_func uart3_grp[] = {
+       FUNC("uart3", 0, 5, 4),
+       FUNC("i2s", 2, 5, 4),
+       FUNC("spdif3", 3, 5, 4),
+};
+static struct rt2880_pmx_func uart2_grp[] = {
+       FUNC("uart2", 0, 9, 4),
+       FUNC("pcm", 2, 9, 4),
+       FUNC("spdif2", 3, 9, 4),
+};
+static struct rt2880_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) };
+static struct rt2880_pmx_func wdt_grp[] = {
+       FUNC("wdt rst", 0, 18, 1),
+       FUNC("wdt refclk", 2, 18, 1),
+};
+static struct rt2880_pmx_func pcie_rst_grp[] = {
+       FUNC("pcie rst", MT7621_GPIO_MODE_PCIE_RST, 19, 1),
+       FUNC("pcie refclk", MT7621_GPIO_MODE_PCIE_REF, 19, 1)
+};
+static struct rt2880_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) };
+static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) };
+static struct rt2880_pmx_func spi_grp[] = {
+       FUNC("spi", 0, 34, 7),
+       FUNC("nand1", 2, 34, 7),
+};
+static struct rt2880_pmx_func sdhci_grp[] = {
+       FUNC("sdhci", 0, 41, 8),
+       FUNC("nand2", 2, 41, 8),
+};
+static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) };
+
+static struct rt2880_pmx_group mt7621_pinmux_data[] = {
+       GRP("uart1", uart1_grp, 1, MT7621_GPIO_MODE_UART1),
+       GRP("i2c", i2c_grp, 1, MT7621_GPIO_MODE_I2C),
+       GRP_G("uart3", uart3_grp, MT7621_GPIO_MODE_UART3_MASK,
+               MT7621_GPIO_MODE_UART3_GPIO, MT7621_GPIO_MODE_UART3_SHIFT),
+       GRP_G("uart2", uart2_grp, MT7621_GPIO_MODE_UART2_MASK,
+               MT7621_GPIO_MODE_UART2_GPIO, MT7621_GPIO_MODE_UART2_SHIFT),
+       GRP("jtag", jtag_grp, 1, MT7621_GPIO_MODE_JTAG),
+       GRP_G("wdt", wdt_grp, MT7621_GPIO_MODE_WDT_MASK,
+               MT7621_GPIO_MODE_WDT_GPIO, MT7621_GPIO_MODE_WDT_SHIFT),
+       GRP_G("pcie", pcie_rst_grp, MT7621_GPIO_MODE_PCIE_MASK,
+               MT7621_GPIO_MODE_PCIE_GPIO, MT7621_GPIO_MODE_PCIE_SHIFT),
+       GRP_G("mdio", mdio_grp, MT7621_GPIO_MODE_MDIO_MASK,
+               MT7621_GPIO_MODE_MDIO_GPIO, MT7621_GPIO_MODE_MDIO_SHIFT),
+       GRP("rgmii2", rgmii2_grp, 1, MT7621_GPIO_MODE_RGMII2),
+       GRP_G("spi", spi_grp, MT7621_GPIO_MODE_SPI_MASK,
+               MT7621_GPIO_MODE_SPI_GPIO, MT7621_GPIO_MODE_SPI_SHIFT),
+       GRP_G("sdhci", sdhci_grp, MT7621_GPIO_MODE_SDHCI_MASK,
+               MT7621_GPIO_MODE_SDHCI_GPIO, MT7621_GPIO_MODE_SDHCI_SHIFT),
+       GRP("rgmii1", rgmii1_grp, 1, MT7621_GPIO_MODE_RGMII1),
+       { 0 }
+};
+
+phys_addr_t mips_cpc_default_phys_base(void)
+{
+       panic("Cannot detect cpc address");
+}
+
+void __init ralink_clk_init(void)
+{
+       int cpu_fdiv = 0;
+       int cpu_ffrac = 0;
+       int fbdiv = 0;
+       u32 clk_sts, syscfg;
+       u8 clk_sel = 0, xtal_mode;
+       u32 cpu_clk;
+
+       if ((rt_sysc_r32(SYSC_REG_CPLL_CLKCFG0) & CPU_CLK_SEL) != 0)
+               clk_sel = 1;
+
+       switch (clk_sel) {
+       case 0:
+               clk_sts = rt_sysc_r32(SYSC_REG_CUR_CLK_STS);
+               cpu_fdiv = ((clk_sts >> 8) & 0x1F);
+               cpu_ffrac = (clk_sts & 0x1F);
+               cpu_clk = (500 * cpu_ffrac / cpu_fdiv) * 1000 * 1000;
+               break;
+
+       case 1:
+               fbdiv = ((rt_sysc_r32(0x648) >> 4) & 0x7F) + 1;
+               syscfg = rt_sysc_r32(SYSC_REG_SYSCFG);
+               xtal_mode = (syscfg >> 6) & 0x7;
+               if (xtal_mode >= 6) {
+                       /* 25Mhz Xtal */
+                       cpu_clk = 25 * fbdiv * 1000 * 1000;
+               } else if (xtal_mode >= 3) {
+                       /* 40Mhz Xtal */
+                       cpu_clk = 40 * fbdiv * 1000 * 1000;
+               } else {
+                       /* 20Mhz Xtal */
+                       cpu_clk = 20 * fbdiv * 1000 * 1000;
+               }
+               break;
+       }
+}
+
+void __init ralink_of_remap(void)
+{
+       rt_sysc_membase = plat_of_remap_node("mtk,mt7621-sysc");
+       rt_memc_membase = plat_of_remap_node("mtk,mt7621-memc");
+
+       if (!rt_sysc_membase || !rt_memc_membase)
+               panic("Failed to remap core resources");
+}
+
+void prom_soc_init(struct ralink_soc_info *soc_info)
+{
+       void __iomem *sysc = (void __iomem *) KSEG1ADDR(MT7621_SYSC_BASE);
+       unsigned char *name = NULL;
+       u32 n0;
+       u32 n1;
+       u32 rev;
+
+       n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0);
+       n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1);
+
+       if (n0 == MT7621_CHIP_NAME0 && n1 == MT7621_CHIP_NAME1) {
+               name = "MT7621";
+               soc_info->compatible = "mtk,mt7621-soc";
+       } else {
+               panic("mt7621: unknown SoC, n0:%08x n1:%08x\n", n0, n1);
+       }
+
+       rev = __raw_readl(sysc + SYSC_REG_CHIP_REV);
+
+       snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
+               "MediaTek %s ver:%u eco:%u",
+               name,
+               (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
+               (rev & CHIP_REV_ECO_MASK));
+
+       soc_info->mem_size_min = MT7621_DDR2_SIZE_MIN;
+       soc_info->mem_size_max = MT7621_DDR2_SIZE_MAX;
+       soc_info->mem_base = MT7621_DRAM_BASE;
+
+       rt2880_pinmux_data = mt7621_pinmux_data;
+
+       /* Early detection of CMP support */
+       mips_cm_probe();
+       mips_cpc_probe();
+
+       if (mips_cm_numiocu()) {
+               /*
+                * mips_cm_probe() wipes out bootloader
+                * config for CM regions and we have to configure them
+                * again. This SoC cannot talk to pamlbus devices
+                * witout proper iocu region set up.
+                *
+                * FIXME: it would be better to do this with values
+                * from DT, but we need this very early because
+                * without this we cannot talk to pretty much anything
+                * including serial.
+                */
+               write_gcr_reg0_base(MT7621_PALMBUS_BASE);
+               write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE |
+                                   CM_GCR_REGn_MASK_CMTGT_IOCU0);
+       }
+
+       if (!register_cps_smp_ops())
+               return;
+       if (!register_cmp_smp_ops())
+               return;
+       if (!register_vsmp_smp_ops())
+               return;
+}
index 844f5cd55c8f1c1e96e20db2a888c22e1fb12327..3c84166ebcb779b1f50f9711c820977295873377 100644 (file)
@@ -119,5 +119,5 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
        soc_info->mem_size_max = RT2880_MEM_SIZE_MAX;
 
        rt2880_pinmux_data = rt2880_pinmux_data_act;
-       ralink_soc == RT2880_SOC;
+       ralink_soc = RT2880_SOC;
 }
index 9e45725920650bd55350549097f9330403f3ef6a..d7c4ba43a4284525b6de3b3f95daa42d8a846ad2 100644 (file)
@@ -201,6 +201,7 @@ void __init ralink_clk_init(void)
        ralink_clk_add("cpu", cpu_rate);
        ralink_clk_add("sys", sys_rate);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000100.timer", wdt_rate);
        ralink_clk_add("10000120.watchdog", wdt_rate);
        ralink_clk_add("10000500.uart", uart_rate);
index 582995aaaf4e7e8a9848c512d150b515f3628344..fafec947b27df7dc436d4701f698c734904e7421 100644 (file)
@@ -109,6 +109,7 @@ void __init ralink_clk_init(void)
        ralink_clk_add("10000120.watchdog", sys_rate);
        ralink_clk_add("10000500.uart", 40000000);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000c00.uartlite", 40000000);
        ralink_clk_add("10100000.ethernet", sys_rate);
        ralink_clk_add("10180000.wmac", 40000000);
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
new file mode 100644 (file)
index 0000000..5b4f186
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+
+#include "common.h"
+
+void __init plat_time_init(void)
+{
+       ralink_of_remap();
+
+       of_clk_init(NULL);
+       clocksource_probe();
+}
index 650d5d39f34d902a60324cd9def8a48af228b994..fd1108543a71d0398097741a4afdf038c574545e 100644 (file)
@@ -89,7 +89,7 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset)
        struct rb532_gpio_chip  *gpch;
 
        gpch = container_of(chip, struct rb532_gpio_chip, chip);
-       return rb532_get_bit(offset, gpch->regbase + GPIOD);
+       return !!rb532_get_bit(offset, gpch->regbase + GPIOD);
 }
 
 /*
index 9d9962ab7d25ccfea97238f402e5dc2f65e20d98..2fd350f31f4b54c4c505700c966cb941b62e4980 100644 (file)
@@ -689,7 +689,7 @@ static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset)
 {
        struct txx9_iocled_data *data =
                container_of(chip, struct txx9_iocled_data, chip);
-       return data->cur_val & (1 << offset);
+       return !!(data->cur_val & (1 << offset));
 }
 
 static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
index e4824fd04bb7449d262c1a7697b5f539b03f6bab..9faa18c4f3f702adceb4f555b05b72bc8437cf6c 100644 (file)
@@ -557,7 +557,7 @@ choice
 
 config PPC_4K_PAGES
        bool "4k page size"
-       select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+       select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
 
 config PPC_16K_PAGES
        bool "16k page size"
@@ -566,7 +566,7 @@ config PPC_16K_PAGES
 config PPC_64K_PAGES
        bool "64k page size"
        depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
-       select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+       select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
 
 config PPC_256K_PAGES
        bool "256k page size"
index 06f17e778c2750edd0b2380288da820f0c40f130..8d1c8162f0c1078d1842d754b64a078be8e1ea0e 100644 (file)
@@ -50,7 +50,9 @@
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-                        _PAGE_ACCESSED | _PAGE_THP_HUGE)
+                        _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+                        _PAGE_SOFT_DIRTY)
+
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
index 8204b0c393aac69285a666d1b70fae0961053614..ac07a30a7934265ed98706efb9f2c82ac8db85ce 100644 (file)
@@ -223,7 +223,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_pfn(pmd)           pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)         pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_mkold(pmd)         pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)     pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)       pte_pmd(pte_mkdirty(pmd_pte(pmd)))
@@ -282,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
 extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                            pmd_t *pmdp);
 
+#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+                                   unsigned long address, pmd_t *pmdp);
+
 #define pmd_move_must_withdraw pmd_move_must_withdraw
 struct spinlock;
 static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
index c5eb86f3d452fbe66d44ae1cce9bbfff91a8b14d..867c39b45df6ce4c1bd5a342ca314a888bb185bf 100644 (file)
@@ -81,6 +81,7 @@ struct pci_dn;
 #define EEH_PE_KEEP            (1 << 8)        /* Keep PE on hotplug   */
 #define EEH_PE_CFG_RESTRICTED  (1 << 9)        /* Block config on error */
 #define EEH_PE_REMOVED         (1 << 10)       /* Removed permanently  */
+#define EEH_PE_PRI_BUS         (1 << 11)       /* Cached primary bus   */
 
 struct eeh_pe {
        int type;                       /* PE type: PHB/Bus/Device      */
index 271fefbbe521badbc1a232b8f3583fb10833388b..9d08d8cbed1a1ec0e5893679c5e1fd9cb69dce09 100644 (file)
@@ -38,8 +38,7 @@
 
 #define KVM_MAX_VCPUS          NR_CPUS
 #define KVM_MAX_VCORES         NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS     512
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
index 5654ece02c0db5ee41da441c30d8a2c5f1cdcbc4..3fa9df70aa20dfa01e90eb1a6af06171a1d8fbde 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(mlock2)
+SYSCALL(copy_file_range)
index 8e86b48d03699047dda0f493a3955c8c05e34909..32e36b16773fd876a7b246ddb9e23c28193c3570 100644 (file)
@@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
 extern void hcall_tracepoint_regfunc(void);
 extern void hcall_tracepoint_unregfunc(void);
 
-TRACE_EVENT_FN(hcall_entry,
+TRACE_EVENT_FN_COND(hcall_entry,
 
        TP_PROTO(unsigned long opcode, unsigned long *args),
 
        TP_ARGS(opcode, args),
 
+       TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
        TP_STRUCT__entry(
                __field(unsigned long, opcode)
        ),
@@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry,
        hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
 );
 
-TRACE_EVENT_FN(hcall_exit,
+TRACE_EVENT_FN_COND(hcall_exit,
 
        TP_PROTO(unsigned long opcode, unsigned long retval,
                unsigned long *retbuf),
 
        TP_ARGS(opcode, retval, retbuf),
 
+       TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
        TP_STRUCT__entry(
                __field(unsigned long, opcode)
                __field(unsigned long, retval)
index 6a5ace5fa0c8a8bbf83a73a5fca8fb0150d5d3e8..1f2594d456054262bd2bc4a9eef265c983971afa 100644 (file)
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls            379
+#define NR_syscalls            380
 
 #define __NR__exit __NR_exit
 
index 12a05652377a28799a89ae59e3d38a7ddeaade92..940290d45b087220711cd0a508ae5c4223b21951 100644 (file)
 #define __NR_userfaultfd       364
 #define __NR_membarrier                365
 #define __NR_mlock2            378
+#define __NR_copy_file_range   379
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
index 938742135ee08fc8dd058df690cfba7eacdabc0b..650cfb31ea3d9b3cee0301948bd85e54400cb951 100644 (file)
@@ -418,8 +418,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
                eeh_pcid_put(dev);
                if (driver->err_handler &&
                    driver->err_handler->error_detected &&
-                   driver->err_handler->slot_reset &&
-                   driver->err_handler->resume)
+                   driver->err_handler->slot_reset)
                        return NULL;
        }
 
@@ -564,6 +563,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
         */
        eeh_pe_state_mark(pe, EEH_PE_KEEP);
        if (bus) {
+               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
                pci_lock_rescan_remove();
                pcibios_remove_pci_devices(bus);
                pci_unlock_rescan_remove();
@@ -803,6 +803,7 @@ perm_error:
         * the their PCI config any more.
         */
        if (frozen_bus) {
+               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
                eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 
                pci_lock_rescan_remove();
@@ -886,6 +887,7 @@ static void eeh_handle_special_event(void)
                                        continue;
 
                                /* Notify all devices to be down */
+                               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
                                bus = eeh_pe_bus_get(phb_pe);
                                eeh_pe_dev_traverse(pe,
                                        eeh_report_failure, NULL);
index 8654cb166c19b04fea827756513a1d23e4d92272..98f81800e00c1030b69c80d79eb8d43edbda9728 100644 (file)
@@ -883,32 +883,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
        struct pci_bus *bus = eeh_pe_bus_get(pe);
-       struct device_node *dn = pci_bus_to_OF_node(bus);
+       struct device_node *dn;
        const char *loc = NULL;
 
-       if (!dn)
-               goto out;
+       while (bus) {
+               dn = pci_bus_to_OF_node(bus);
+               if (!dn) {
+                       bus = bus->parent;
+                       continue;
+               }
 
-       /* PHB PE or root PE ? */
-       if (pci_is_root_bus(bus)) {
-               loc = of_get_property(dn, "ibm,loc-code", NULL);
-               if (!loc)
+               if (pci_is_root_bus(bus))
                        loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
+               else
+                       loc = of_get_property(dn, "ibm,slot-location-code",
+                                             NULL);
+
                if (loc)
-                       goto out;
+                       return loc;
 
-               /* Check the root port */
-               dn = dn->child;
-               if (!dn)
-                       goto out;
+               bus = bus->parent;
        }
 
-       loc = of_get_property(dn, "ibm,loc-code", NULL);
-       if (!loc)
-               loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
-       return loc ? loc : "N/A";
+       return "N/A";
 }
 
 /**
@@ -931,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
                bus = pe->phb->bus;
        } else if (pe->type & EEH_PE_BUS ||
                   pe->type & EEH_PE_DEVICE) {
-               if (pe->bus) {
+               if (pe->state & EEH_PE_PRI_BUS) {
                        bus = pe->bus;
                        goto out;
                }
index db475d41b57a5d4b5313d11f4494792c6070dc67..f28754c497e506cb3291462f94d422822be4e29d 100644 (file)
@@ -701,31 +701,3 @@ _GLOBAL(kexec_sequence)
        li      r5,0
        blr     /* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
-       .llong  __crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
-       .asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
-       .llong 0 /* .value */
-       .llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
index 59663af9315fc16123cd4aacb090a6efd8cbc33d..08b7a40de5f85ab1c6f7e5ed205ca52edd5ab12c 100644 (file)
@@ -326,7 +326,10 @@ static void dedotify_versions(struct modversion_info *vers,
                }
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
        unsigned int i;
@@ -334,8 +337,11 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
        for (i = 1; i < numsyms; i++) {
                if (syms[i].st_shndx == SHN_UNDEF) {
                        char *name = strtab + syms[i].st_name;
-                       if (name[0] == '.')
-                               memmove(name, name+1, strlen(name));
+                       if (name[0] == '.') {
+                               if (strcmp(name+1, "TOC.") == 0)
+                                       syms[i].st_shndx = SHN_ABS;
+                               syms[i].st_name++;
+                       }
                }
        }
 }
@@ -351,7 +357,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
        numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
 
        for (i = 1; i < numsyms; i++) {
-               if (syms[i].st_shndx == SHN_UNDEF
+               if (syms[i].st_shndx == SHN_ABS
                    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
                        return &syms[i];
        }
index dccc87e8fee5e6544de0d8fc732a97aa14f45907..3c5736e52a14b7d42dd0c28425e709b0a7147c3c 100644 (file)
@@ -1768,9 +1768,9 @@ static inline unsigned long brk_rnd(void)
 
        /* 8MB for 32bit, 1GB for 64bit */
        if (is_32bit_task())
-               rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+               rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
        else
-               rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+               rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
 
        return rnd << PAGE_SHIFT;
 }
index 774a253ca4e1eaa2ab9c8277d22cbcae0d85f676..9bf7031a67ffc0aedf742e7a9da6afebeb8fe64b 100644 (file)
@@ -377,15 +377,12 @@ no_seg_found:
 
 static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 {
-       struct kvmppc_vcpu_book3s *vcpu_book3s;
        u64 esid, esid_1t;
        int slb_nr;
        struct kvmppc_slb *slbe;
 
        dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
 
-       vcpu_book3s = to_book3s(vcpu);
-
        esid = GET_ESID(rb);
        esid_1t = GET_ESID_1T(rb);
        slb_nr = rb & 0xfff;
index cff207b72c46ae0b54cf48db5e4a3aa828f2fc29..baeddb06811d738a6ea8be4ce920e5ea39a9645a 100644 (file)
@@ -833,6 +833,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        vcpu->stat.sum_exits++;
 
+       /*
+        * This can happen if an interrupt occurs in the last stages
+        * of guest entry or the first stages of guest exit (i.e. after
+        * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+        * and before setting it to KVM_GUEST_MODE_HOST_HV).
+        * That can happen due to a bug, or due to a machine check
+        * occurring at just the wrong time.
+        */
+       if (vcpu->arch.shregs.msr & MSR_HV) {
+               printk(KERN_EMERG "KVM trap in HV mode!\n");
+               printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+                       vcpu->arch.trap, kvmppc_get_pc(vcpu),
+                       vcpu->arch.shregs.msr);
+               kvmppc_dump_regs(vcpu);
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
+               return RESUME_HOST;
+       }
        run->exit_reason = KVM_EXIT_UNKNOWN;
        run->ready_for_interrupt_injection = 1;
        switch (vcpu->arch.trap) {
index 3c6badcd53efced649af69ff9a4adf823b28d9a0..6ee26de9a1ded02e0e08f376fd612049eedee0b4 100644 (file)
@@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
 2:     rlwimi  r5, r4, 5, DAWRX_DR | DAWRX_DW
-       rlwimi  r5, r4, 1, DAWRX_WT
+       rlwimi  r5, r4, 2, DAWRX_WT
        clrrdi  r4, r4, 3
        std     r4, VCPU_DAWR(r3)
        std     r5, VCPU_DAWRX(r3)
@@ -2404,6 +2404,8 @@ machine_check_realmode:
         * guest as machine check causing guest to crash.
         */
        ld      r11, VCPU_MSR(r9)
+       rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+       bne     mc_cont                 /* if so, exit to host */
        andi.   r10, r11, MSR_RI        /* check for unrecoverable exception */
        beq     1f                      /* Deliver a machine check to guest */
        ld      r10, VCPU_PC(r9)
index 6fd2405c7f4aa2e5bdc5ae417a409b33b6c5f7bd..a3b182dcb823640540fcaeac38bc7d89550fddd6 100644 (file)
@@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-                               r = -ENXIO;
-                               break;
-                       }
-                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
@@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
+                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+                               r = -ENXIO;
+                               break;
+                       }
+                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
index 0762c1e08c88644f35cc8a3df5ba50c52c1801a2..edb09912f0c9b5ca316216bf3d912c7efe8d920a 100644 (file)
@@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
         */
        if (!(old_pte & _PAGE_COMBO)) {
                flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
-               old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
+               /*
+                * clear the old slot details from the old and new pte.
+                * On hash insert failure we use old pte value and we don't
+                * want slot information there if we have a insert failure.
+                */
+               old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+               new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
                goto htab_insert_hpte;
        }
        /*
index 49b152b0f926289e1dcaa595988932a036df5325..eb2accdd76fd8bb70a762c4a57df8bc2c425ffad 100644 (file)
@@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
                 * base page size. This is because demote_segment won't flush
                 * hash page table entries.
                 */
-               if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+               if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
                        flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
                                            ssize, flags);
+                       /*
+                        * With THP, we also clear the slot information with
+                        * respect to all the 64K hash pte mapping the 16MB
+                        * page. They are all invalid now. This make sure we
+                        * don't find the slot valid when we fault with 4k
+                        * base page size.
+                        *
+                        */
+                       memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+               }
        }
 
        valid = hpte_valid(hpte_slot_array, index);
index 22d94c3e6fc43bf7afbd63bfd483074a9e18589a..d0f0a514b04ed66da35d1ae6b27b964285ef2aec 100644 (file)
@@ -560,12 +560,12 @@ subsys_initcall(add_system_ram_resources);
  */
 int devmem_is_allowed(unsigned long pfn)
 {
+       if (page_is_rtas_user_buf(pfn))
+               return 1;
        if (iomem_is_exclusive(PFN_PHYS(pfn)))
                return 0;
        if (!page_is_ram(pfn))
                return 1;
-       if (page_is_rtas_user_buf(pfn))
-               return 1;
        return 0;
 }
 #endif /* CONFIG_STRICT_DEVMEM */
index 0f0502e12f6c4c8accbe7fc28eb4db08158decfb..4087705ba90f34241200e2f30765794ea6b74b55 100644 (file)
@@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void)
 
        /* 8MB for 32bit, 1GB for 64bit */
        if (is_32bit_task())
-               rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+               rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
        else
-               rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+               rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
 
        return rnd << PAGE_SHIFT;
 }
index 3124a20d0fab7a66b3a170356037da0d18c9da85..cdf2123d46db4813a4e87f30d29f8da359c313d9 100644 (file)
@@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
        return pgtable;
 }
 
+void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+                            unsigned long address, pmd_t *pmdp)
+{
+       VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+       VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+       /*
+        * We can't mark the pmd none here, because that will cause a race
+        * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+        * we spilt, but at the same time we wan't rest of the ppc64 code
+        * not to insert hash pte on this, because we will be modifying
+        * the deposited pgtable in the caller of this function. Hence
+        * clear the _PAGE_USER so that we move the fault handling to
+        * higher level function and that will serialize against ptl.
+        * We need to flush existing hash pte entries here even though,
+        * the translation is still valid, because we will withdraw
+        * pgtable_t after this.
+        */
+       pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
+}
+
+
 /*
  * set a new huge pmd. We should not be called for updating
  * an existing pmd entry. That should go via pmd_hugepage_update.
@@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
        return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                     pmd_t *pmdp)
 {
        pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+       /*
+        * This ensures that generic code that rely on IRQ disabling
+        * to prevent a parallel THP split work as expected.
+        */
+       kick_all_cpus_sync();
 }
 
 /*
index 7d5e295255b7b25934d51792fb6876c16462d420..9958ba8bf0d27fca74a182546170a487edb88346 100644 (file)
@@ -816,7 +816,7 @@ static struct power_pmu power8_pmu = {
        .get_constraint         = power8_get_constraint,
        .get_alternatives       = power8_get_alternatives,
        .disable_pmc            = power8_disable_pmc,
-       .flags                  = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+       .flags                  = PPMU_HAS_SIER | PPMU_ARCH_207S,
        .n_generic              = ARRAY_SIZE(power8_generic_events),
        .generic_events         = power8_generic_events,
        .cache_events           = &power8_cache_events,
index 5038fd578e65acaeeb15127d3af1aa7a75068635..2936a0044c049681d6eeeb80eddbb3ab4ad9295a 100644 (file)
@@ -1799,9 +1799,9 @@ static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int data
        struct inode *inode = file_inode(file);
        int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (!err) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                err = spufs_mfc_flush(file, NULL);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return err;
 }
index ad4840f86be1f53a127ac829b62d2cc41c95ce49..dfa863876778144baa5cb99ac65e61c4ca60ec70 100644 (file)
@@ -163,7 +163,7 @@ static void spufs_prune_dir(struct dentry *dir)
 {
        struct dentry *dentry, *tmp;
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
                spin_lock(&dentry->d_lock);
                if (simple_positive(dentry)) {
@@ -180,7 +180,7 @@ static void spufs_prune_dir(struct dentry *dir)
                }
        }
        shrink_dcache_parent(dir);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 }
 
 /* Caller must hold parent->i_mutex */
@@ -225,9 +225,9 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
        parent = d_inode(dir->d_parent);
        ctx = SPUFS_I(d_inode(dir))->i_ctx;
 
-       mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(parent, I_MUTEX_PARENT);
        ret = spufs_rmdir(parent, dir);
-       mutex_unlock(&parent->i_mutex);
+       inode_unlock(parent);
        WARN_ON(ret);
 
        return dcache_dir_close(inode, file);
@@ -270,7 +270,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        dget(dentry);
        inc_nlink(dir);
@@ -291,7 +291,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
        if (ret)
                spufs_rmdir(dir, dentry);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index 5f152b95ca0c8493536d787a51d741ae628adb8a..87f47e55aab65ac234df1d67c926ca17b1517f06 100644 (file)
@@ -444,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
         * PCI devices of the PE are expected to be removed prior
         * to PE reset.
         */
-       if (!edev->pe->bus)
+       if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
                edev->pe->bus = pci_find_bus(hose->global_number,
                                             pdn->busno);
+               if (edev->pe->bus)
+                       edev->pe->state |= EEH_PE_PRI_BUS;
+       }
 
        /*
         * Enable EEH explicitly so that we will do EEH check
index 573ae1994097fb91e15e3f7f6351fe1e73b35c59..f90dc04395bf47bcc0e662a7a1c17526220ccda2 100644 (file)
@@ -3180,6 +3180,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
 
 static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
        .dma_dev_setup = pnv_pci_dma_dev_setup,
+       .dma_bus_setup = pnv_pci_dma_bus_setup,
 #ifdef CONFIG_PCI_MSI
        .setup_msi_irqs = pnv_setup_msi_irqs,
        .teardown_msi_irqs = pnv_teardown_msi_irqs,
index 2f55c86df703554bfd9541a44f8ba26bec072729..b1ef84a6c9d13cff03c2d4a5234e57265ef5bc75 100644 (file)
@@ -599,6 +599,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
        u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
        long i;
 
+       if (proto_tce & TCE_PCI_WRITE)
+               proto_tce |= TCE_PCI_READ;
+
        for (i = 0; i < npages; i++) {
                unsigned long newtce = proto_tce |
                        ((rpn + i) << tbl->it_page_shift);
@@ -620,6 +623,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
 
        BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
 
+       if (newtce & TCE_PCI_WRITE)
+               newtce |= TCE_PCI_READ;
+
        oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
        *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
        *direction = iommu_tce_direction(oldtce);
@@ -760,6 +766,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
                phb->dma_dev_setup(phb, pdev);
 }
 
+void pnv_pci_dma_bus_setup(struct pci_bus *bus)
+{
+       struct pci_controller *hose = bus->sysdata;
+       struct pnv_phb *phb = hose->private_data;
+       struct pnv_ioda_pe *pe;
+
+       list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+               if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+                       continue;
+
+               if (!pe->pbus)
+                       continue;
+
+               if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+                       pe->pbus = bus;
+                       break;
+               }
+       }
+}
+
 void pnv_pci_shutdown(void)
 {
        struct pci_controller *hose;
index 7f56313e8d7223dfd9f22b927c807b20516cf941..00691a9b99af67b09967c73b39b652b973392f06 100644 (file)
@@ -242,6 +242,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
 extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
+extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
 extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
 extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
 
index b2e5902bd8f4d8e5f4f53cc3e6fad1cb183db7be..0f3da2cb2bd63c0d26649776fd53d844f3153d18 100644 (file)
@@ -67,7 +67,7 @@ static void hypfs_remove(struct dentry *dentry)
        struct dentry *parent;
 
        parent = dentry->d_parent;
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        if (simple_positive(dentry)) {
                if (d_is_dir(dentry))
                        simple_rmdir(d_inode(parent), dentry);
@@ -76,7 +76,7 @@ static void hypfs_remove(struct dentry *dentry)
        }
        d_delete(dentry);
        dput(dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 }
 
 static void hypfs_delete_tree(struct dentry *root)
@@ -331,7 +331,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry)) {
                dentry = ERR_PTR(-ENOMEM);
@@ -359,7 +359,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        d_instantiate(dentry, inode);
        dget(dentry);
 fail:
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return dentry;
 }
 
index ea91ddfe54ebb901007bb625f833f7457eff0253..629c90865a073174f61fcc47268d3311db4f3a9e 100644 (file)
@@ -40,6 +40,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
 static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
        fpregs->pad = 0;
+       fpregs->fpc = fpu->fpc;
        if (MACHINE_HAS_VX)
                convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
        else
@@ -49,6 +50,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 
 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
+       fpu->fpc = fpregs->fpc;
        if (MACHINE_HAS_VX)
                convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
        else
index 16aa0c779e0762e210fe6652a0a5efda24771381..595a275c36f87d5ea4fa58d0a580eb36c601ef34 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define ARCH_IRQ_ENABLED       (3UL << (BITS_PER_LONG - 8))
+
 /* store then OR system mask. */
 #define __arch_local_irq_stosm(__or)                                   \
 ({                                                                     \
@@ -54,14 +56,17 @@ static inline notrace void arch_local_irq_enable(void)
        __arch_local_irq_stosm(0x03);
 }
 
+/* This only restores external and I/O interrupt state */
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
-       __arch_local_irq_ssm(flags);
+       /* only disabled->disabled and disabled->enabled is valid */
+       if (flags & ARCH_IRQ_ENABLED)
+               arch_local_irq_enable();
 }
 
 static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
-       return !(flags & (3UL << (BITS_PER_LONG - 8)));
+       return !(flags & ARCH_IRQ_ENABLED);
 }
 
 static inline notrace bool arch_irqs_disabled(void)
index 6742414dbd6f30b66451f04602cb55de0d20f41c..8959ebb6d2c9eb35538b835a39522585321f9af4 100644 (file)
@@ -546,7 +546,6 @@ struct kvm_vcpu_arch {
        struct kvm_s390_sie_block *sie_block;
        unsigned int      host_acrs[NUM_ACRS];
        struct fpu        host_fpregs;
-       struct fpu        guest_fpregs;
        struct kvm_s390_local_interrupt local_int;
        struct hrtimer    ckc_timer;
        struct kvm_s390_pgm_info pgm;
index 7aa799134a11754b09ad523a00e54c92a6e1c7a9..a52b6cca873d380adc9bec726cc31816dba5cd6b 100644 (file)
@@ -37,7 +37,7 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
        regs->psw.addr = ip;
 }
 #else
-#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#error Include linux/livepatch.h, not asm/livepatch.h
 #endif
 
 #endif
index 1a9a98de5bdebc346c469f55818f08ed33b576e7..69aa18be61afcfc1e764c5c51ec5cbe5d0fe2043 100644 (file)
@@ -8,10 +8,13 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES         0x7fff
-#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK       0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK       0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT               48
+#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK       ((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES                                                 \
+       ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK                                               \
+       (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
 
 struct zpci_iomap_entry {
        u32 fh;
@@ -21,8 +24,9 @@ struct zpci_iomap_entry {
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
 #define ZPCI_IDX(addr)                                                         \
-       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
 #define ZPCI_OFFSET(addr)                                                      \
        ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
 
index f16debf6a612932a7312428465d9ceb1bee41915..1c4fe129486d2e0a9282dc72a3735c4727277b52 100644 (file)
@@ -166,14 +166,14 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
  */
 #define start_thread(regs, new_psw, new_stackp) do {                   \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;    \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        execve_tail();                                                  \
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {                 \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_BA;                  \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        crst_table_downgrade(current->mm, 1UL << 31);                   \
        execve_tail();                                                  \
index f00cd35c8ac4634e9270ff81e73e603ccc5573f0..99bc456cc26a3087cbec7f35ca82959a94cf2ab3 100644 (file)
@@ -149,7 +149,7 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
 #define arch_has_block_step()  (1)
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
 #define user_stack_pointer(regs)((regs)->gprs[15])
 #define profile_pc(regs) instruction_pointer(regs)
 
@@ -161,7 +161,7 @@ static inline long regs_return_value(struct pt_regs *regs)
 static inline void instruction_pointer_set(struct pt_regs *regs,
                                           unsigned long val)
 {
-       regs->psw.addr = val | PSW_ADDR_AMODE;
+       regs->psw.addr = val;
 }
 
 int regs_query_register_offset(const char *name);
@@ -171,7 +171,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
-       return regs->gprs[15] & PSW_ADDR_INSN;
+       return regs->gprs[15];
 }
 
 #endif /* __ASSEMBLY__ */
index 34ec202472c6cadb5b973b54c6c995bb6d2e897c..ab3aa6875a59ca4dc3760663d79189090286d7ab 100644 (file)
 #define __NR_recvmsg           372
 #define __NR_shutdown          373
 #define __NR_mlock2            374
-#define NR_syscalls 375
+#define __NR_copy_file_range   375
+#define NR_syscalls 376
 
 /* 
  * There are some system calls that are not present on 64 bit, some
index 66c94417c0ba09a471244a629c78d2457f38b17d..4af60374eba01fba89dc8b170a41428b094160b5 100644 (file)
@@ -271,7 +271,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
 
        /* Restore high gprs from signal stack */
        if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
-                            sizeof(&sregs_ext->gprs_high)))
+                            sizeof(sregs_ext->gprs_high)))
                return -EFAULT;
        for (i = 0; i < NUM_GPRS; i++)
                *(__u32 *)&regs->gprs[i] = gprs_high[i];
index fac4eeddef91fe3acb21e0108069f3f3a70ef72a..ae2cda5eee5a99b35b73e5b7868edd44cba1c6d2 100644 (file)
@@ -177,3 +177,4 @@ COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
index a92b39fd0e63d953db2d94bbfb090d9f257e867f..3986c9f6219174bf7462b88579b5b030d830185e 100644 (file)
@@ -59,8 +59,6 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
        struct save_area *sa;
 
        sa = (void *) memblock_alloc(sizeof(*sa), 8);
-       if (!sa)
-               return NULL;
        if (is_boot_cpu)
                list_add(&sa->list, &dump_save_areas);
        else
index 6fca0e46464e01842f613584e4c82e5ca1fe4270..c890a5589e59583bb06b1d7dffe7226069e522a0 100644 (file)
@@ -1470,7 +1470,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
                except_str = "*";
        else
                except_str = "-";
-       caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+       caller = (unsigned long) entry->caller;
        rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p  ",
                      area, (long long)time_spec.tv_sec,
                      time_spec.tv_nsec / 1000, level, except_str,
index dc8e20473484ce32f037f1e9115779a8a9d7d0cd..02bd02ff648b5ab570c5ffe920b27bbfbc3d5fd3 100644 (file)
@@ -34,22 +34,21 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
        unsigned long addr;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               addr = sf->gprs[8] & PSW_ADDR_INSN;
+               addr = sf->gprs[8];
                printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                }
                /* Zero backchain detected, check for interrupt frame. */
@@ -57,7 +56,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                low = sp;
                sp = regs->gprs[15];
index 20a5caf6d981d9f81852b015765662c745ed6b3b..c55576bbaa1f7555479994d6848c9aa5b60f189b 100644 (file)
@@ -252,14 +252,14 @@ static void early_pgm_check_handler(void)
        unsigned long addr;
 
        addr = S390_lowcore.program_old_psw.addr;
-       fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(addr);
        if (!fixup)
                disabled_wait(0);
        /* Disable low address protection before storing into lowcore. */
        __ctl_store(cr0, 0, 0);
        cr0_new = cr0 & ~(1UL << 28);
        __ctl_load(cr0_new, 0, 0);
-       S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+       S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
        __ctl_load(cr0, 0, 0);
 }
 
@@ -268,9 +268,9 @@ static noinline __init void setup_lowcore_early(void)
        psw_t psw;
 
        psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+       psw.addr = (unsigned long) s390_base_ext_handler;
        S390_lowcore.external_new_psw = psw;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+       psw.addr = (unsigned long) s390_base_pgm_handler;
        S390_lowcore.program_new_psw = psw;
        s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
index e0eaf11134b44706e1223daca7bd66e938bf37f0..0f7bfeba6da6a3632f2705ebbaccc065ffd356a2 100644 (file)
@@ -203,7 +203,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
                goto out;
        if (unlikely(atomic_read(&current->tracing_graph_pause)))
                goto out;
-       ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+       ip -= MCOUNT_INSN_SIZE;
        trace.func = ip;
        trace.depth = current->curr_ret_stack + 1;
        /* Only trace if the calling function expects to. */
index 0a5a6b661b938743684a6aec8d1d5ebcca537be1..f20abdb5630adceaa75690907ddf7cdedc098146 100644 (file)
@@ -2057,12 +2057,12 @@ void s390_reset_system(void)
        /* Set new machine check handler */
        S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+               (unsigned long) s390_base_mcck_handler;
 
        /* Set new program check handler */
        S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+               (unsigned long) s390_base_pgm_handler;
 
        /*
         * Clear subchannel ID and number to signal new kernel that no CCW or
index 389db56a2208bfa118d6f31aee9b52d090031960..250f5972536a8948a916e56a4514e942e03bde4a 100644 (file)
@@ -226,7 +226,7 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(per_kprobe, 9, 11);
        regs->psw.mask |= PSW_MASK_PER;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(enable_singlestep);
 
@@ -238,7 +238,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
        regs->psw.mask &= ~PSW_MASK_PER;
        regs->psw.mask |= kcb->kprobe_saved_imask;
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(disable_singlestep);
 
@@ -310,7 +310,7 @@ static int kprobe_handler(struct pt_regs *regs)
         */
        preempt_disable();
        kcb = get_kprobe_ctlblk();
-       p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+       p = get_kprobe((void *)(regs->psw.addr - 2));
 
        if (p) {
                if (kprobe_running()) {
@@ -460,7 +460,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
                        break;
        }
 
-       regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+       regs->psw.addr = orig_ret_address;
 
        pop_kprobe(get_kprobe_ctlblk());
        kretprobe_hash_unlock(current, &flags);
@@ -490,7 +490,7 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+       unsigned long ip = regs->psw.addr;
        int fixup = probe_get_fixup_type(p->ainsn.insn);
 
        /* Check if the kprobes location is an enabled ftrace caller */
@@ -605,9 +605,9 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
                 * In case the user-specified fault handler returned
                 * zero, try to fix up.
                 */
-               entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               entry = search_exception_tables(regs->psw.addr);
                if (entry) {
-                       regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(entry);
                        return 1;
                }
 
@@ -683,7 +683,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
        memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
        /* setup return addr to the jprobe handler routine */
-       regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) jp->entry;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
        /* r15 is the stack pointer */
index 61595c1f0a0fe7d502bb6c213519929effe48b02..0943b11a2f6e22c088dc13d3b8a0de9b4e6ef92c 100644 (file)
@@ -74,7 +74,7 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs)
 
 static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 {
-       return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+       return sie_block(regs)->gpsw.addr;
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -231,29 +231,27 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry,
        struct pt_regs *regs;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       perf_callchain_store(entry,
-                                            sf->gprs[8] & PSW_ADDR_INSN);
+                       perf_callchain_store(entry, sf->gprs[8]);
                }
                /* Zero backchain detected, check for interrupt frame. */
                sp = (unsigned long) (sf + 1);
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
@@ -262,12 +260,13 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry,
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
                           struct pt_regs *regs)
 {
-       unsigned long head;
+       unsigned long head, frame_size;
        struct stack_frame *head_sf;
 
        if (user_mode(regs))
                return;
 
+       frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
        head = regs->gprs[15];
        head_sf = (struct stack_frame *) head;
 
@@ -275,8 +274,9 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
                return;
 
        head = head_sf->back_chain;
-       head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
-                            S390_lowcore.async_stack);
+       head = __store_trace(entry, head,
+                            S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+                            S390_lowcore.async_stack + frame_size);
 
        __store_trace(entry, head, S390_lowcore.thread_info,
                      S390_lowcore.thread_info + THREAD_SIZE);
index 114ee8b96f17d31d5dba657c704bc34a2e1b9a9e..2bba7df4ac519fdc773551600b8bab2b9fef4789 100644 (file)
@@ -56,10 +56,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
                return 0;
        low = task_stack_page(tsk);
        high = (struct stack_frame *) task_pt_regs(tsk);
-       sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) tsk->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
-       sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) sf->back_chain;
        if (sf <= low || sf > high)
                return 0;
        return sf->gprs[8];
@@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
                memset(&frame->childregs, 0, sizeof(struct pt_regs));
                frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
                                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-               frame->childregs.psw.addr = PSW_ADDR_AMODE |
+               frame->childregs.psw.addr =
                                (unsigned long) kernel_thread_starter;
                frame->childregs.gprs[9] = new_stackp; /* function */
                frame->childregs.gprs[10] = arg;
@@ -220,14 +220,14 @@ unsigned long get_wchan(struct task_struct *p)
                return 0;
        low = task_stack_page(p);
        high = (struct stack_frame *) task_pt_regs(p);
-       sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) p->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
        for (count = 0; count < 16; count++) {
-               sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+               sf = (struct stack_frame *) sf->back_chain;
                if (sf <= low || sf > high)
                        return 0;
-               return_address = sf->gprs[8] & PSW_ADDR_INSN;
+               return_address = sf->gprs[8];
                if (!in_sched_functions(return_address))
                        return return_address;
        }
index 01c37b36caf964ec4616048c00d6602607a9abca..49b1c13bf6c96372d60815e8549c10b15b40914f 100644 (file)
@@ -84,7 +84,7 @@ void update_cr_regs(struct task_struct *task)
                if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
                        new.control |= PER_EVENT_IFETCH;
                new.start = 0;
-               new.end = PSW_ADDR_INSN;
+               new.end = -1UL;
        }
 
        /* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
        else if (addr == (addr_t) &dummy->cr11)
                /* End address of the active per set. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
-                       PSW_ADDR_INSN : child->thread.per_user.end;
+                       -1UL : child->thread.per_user.end;
        else if (addr == (addr_t) &dummy->bits)
                /* Single-step bit. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@ long arch_ptrace(struct task_struct *child, long request,
                }
                return 0;
        default:
-               /* Removing high order bit from addr (only for 31 bit). */
-               addr &= PSW_ADDR_INSN;
                return ptrace_request(child, request, addr, data);
        }
 }
index c6878fbbcf1324661ac52f4f55ba1e40840fbd89..9220db5c996aa5250ca4d904361c51cec33d7827 100644 (file)
@@ -301,25 +301,21 @@ static void __init setup_lowcore(void)
        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
        lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
        lc->restart_psw.mask = PSW_KERNEL_BITS;
-       lc->restart_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+       lc->restart_psw.addr = (unsigned long) restart_int_handler;
        lc->external_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->external_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+       lc->external_new_psw.addr = (unsigned long) ext_int_handler;
        lc->svc_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-       lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+       lc->svc_new_psw.addr = (unsigned long) system_call;
        lc->program_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+       lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
-       lc->mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+       lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
        lc->io_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+       lc->io_new_psw.addr = (unsigned long) io_int_handler;
        lc->clock_comparator = -1ULL;
        lc->kernel_stack = ((unsigned long) &init_thread_union)
                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
index 028cc46cb82ad77ac21a7c9b9ea897e2312480d4..d82562cf0a0e1f5057987f59f6d541ea374ea683 100644 (file)
@@ -331,13 +331,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ka->sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ka->sa.sa_restorer;
        } else {
                /* Signal frame without vector registers are short ! */
                __u16 __user *svc = (void __user *) frame + frame_size - 2;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Set up registers for signal handler */
@@ -347,7 +347,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ka->sa.sa_handler;
 
        regs->gprs[2] = sig;
        regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long)
-                       ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ksig->ka.sa.sa_restorer;
        } else {
                __u16 __user *svc = &frame->svc_insn;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
 
        regs->gprs[2] = ksig->sig;
        regs->gprs[3] = (unsigned long) &frame->info;
index a13468b9a9133b9f46d8c90575ea0b5ffcd41e04..3c65a8eae34d35b6a670066db2bc80c8fa3ccf67 100644 (file)
@@ -623,8 +623,6 @@ void __init smp_save_dump_cpus(void)
                return;
        /* Allocate a page as dumping area for the store status sigps */
        page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
-       if (!page)
-               panic("could not allocate memory for save area\n");
        /* Set multi-threading state to the previous system. */
        pcpu_set_smt(sclp.mtid_prev);
        boot_cpu_addr = stap();
index 1785cd82253caec869abbb7f5d7804b42effad04..8f64ebd63767c7a1a3e434b994b65110e890f741 100644 (file)
@@ -21,12 +21,11 @@ static unsigned long save_context_stack(struct stack_trace *trace,
        unsigned long addr;
 
        while(1) {
-               sp &= PSW_ADDR_INSN;
                if (sp < low || sp > high)
                        return sp;
                sf = (struct stack_frame *)sp;
                while(1) {
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
                        else
@@ -34,7 +33,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                        if (trace->nr_entries >= trace->max_entries)
                                return sp;
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
@@ -46,7 +45,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *)sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                if (savesched || !in_sched_functions(addr)) {
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
@@ -60,33 +59,43 @@ static unsigned long save_context_stack(struct stack_trace *trace,
        }
 }
 
-void save_stack_trace(struct stack_trace *trace)
+static void __save_stack_trace(struct stack_trace *trace, unsigned long sp)
 {
-       register unsigned long sp asm ("15");
-       unsigned long orig_sp, new_sp;
+       unsigned long new_sp, frame_size;
 
-       orig_sp = sp & PSW_ADDR_INSN;
-       new_sp = save_context_stack(trace, orig_sp,
-                                   S390_lowcore.panic_stack - PAGE_SIZE,
-                                   S390_lowcore.panic_stack, 1);
-       if (new_sp != orig_sp)
-               return;
+       frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+       new_sp = save_context_stack(trace, sp,
+                       S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
+                       S390_lowcore.panic_stack + frame_size, 1);
        new_sp = save_context_stack(trace, new_sp,
-                                   S390_lowcore.async_stack - ASYNC_SIZE,
-                                   S390_lowcore.async_stack, 1);
-       if (new_sp != orig_sp)
-               return;
+                       S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+                       S390_lowcore.async_stack + frame_size, 1);
        save_context_stack(trace, new_sp,
                           S390_lowcore.thread_info,
                           S390_lowcore.thread_info + THREAD_SIZE, 1);
 }
+
+void save_stack_trace(struct stack_trace *trace)
+{
+       register unsigned long r15 asm ("15");
+       unsigned long sp;
+
+       sp = r15;
+       __save_stack_trace(trace, sp);
+       if (trace->nr_entries < trace->max_entries)
+               trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
        unsigned long sp, low, high;
 
-       sp = tsk->thread.ksp & PSW_ADDR_INSN;
+       sp = tsk->thread.ksp;
+       if (tsk == current) {
+               /* Get current stack pointer. */
+               asm volatile("la %0,0(15)" : "=a" (sp));
+       }
        low = (unsigned long) task_stack_page(tsk);
        high = (unsigned long) task_pt_regs(tsk);
        save_context_stack(trace, sp, low, high, 0);
@@ -94,3 +103,14 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+       unsigned long sp;
+
+       sp = kernel_stack_pointer(regs);
+       __save_stack_trace(trace, sp);
+       if (trace->nr_entries < trace->max_entries)
+               trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
index 5378c3ea1b984918ed3719c8a9cf8d1c8c86574a..293d8b98fd525c992146fa4f5c85f1287d90bc62 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(sys_recvfrom,compat_sys_recvfrom)
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
index 21a5df99552bce6e4f84c872acdda43e6c5732c3..dde7654f5c68e5d117d3493d9c0f130f2e24e830 100644 (file)
@@ -18,6 +18,9 @@ void trace_s390_diagnose_norecursion(int diag_nr)
        unsigned long flags;
        unsigned int *depth;
 
+       /* Avoid lockdep recursion. */
+       if (IS_ENABLED(CONFIG_LOCKDEP))
+               return;
        local_irq_save(flags);
        depth = this_cpu_ptr(&diagnose_trace_depth);
        if (*depth == 0) {
index d69d648759c9f926ae91771cef6a8cc291ee403e..017eb03daee2e724a603420f65b6e93dde98ac6a 100644 (file)
@@ -32,8 +32,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
                address = *(unsigned long *)(current->thread.trap_tdb + 24);
        else
                address = regs->psw.addr;
-       return (void __user *)
-               ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+       return (void __user *) (address - (regs->int_code >> 16));
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -46,7 +45,7 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
                return;
        printk("User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr("in ", regs->psw.addr);
        printk("\n");
        show_regs(regs);
 }
@@ -69,13 +68,13 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
                report_user_fault(regs, si_signo);
         } else {
                 const struct exception_table_entry *fixup;
-                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               fixup = search_exception_tables(regs->psw.addr);
                 if (fixup)
-                       regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(fixup);
                else {
                        enum bug_trap_type btt;
 
-                       btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+                       btt = report_bug(regs->psw.addr, regs);
                        if (btt == BUG_TRAP_TYPE_WARN)
                                return;
                        die(regs, str);
index 5fce52cf0e57dc4831d0737ddd3c3a2cfbc386d7..5ea5af3c7db72479a69710a6606d8dddd6143b7a 100644 (file)
@@ -29,6 +29,7 @@ config KVM
        select HAVE_KVM_IRQFD
        select HAVE_KVM_IRQ_ROUTING
        select SRCU
+       select KVM_VFIO
        ---help---
          Support hosting paravirtualized guest machines using the SIE
          virtualization capability on the mainframe. This should work
index b3b553469650888fd31df5d975fe4b93b2399f70..d42fa38c24292157c0da0f015d2063bc1c5e5723 100644 (file)
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
index 47518a324d752286d4b0cb80784c43c316f31c45..d697312ce9ee325571f25c74fb0d963f50943537 100644 (file)
@@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
        if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr10 = 0;
-               *cr11 = PSW_ADDR_INSN;
+               *cr11 = -1UL;
        } else {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
                vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
                vcpu->arch.sie_block->gcr[10] = 0;
-               vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+               vcpu->arch.sie_block->gcr[11] = -1UL;
        }
 
        if (guestdbg_hw_bp_enabled(vcpu)) {
index 835d60bedb545fa1873b6b37e57dffbec3bb9600..4af21c771f9b3925bf860d0dc86cebcc803cf965 100644 (file)
@@ -1423,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination.  Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
-       dst->fpc = current->thread.fpu.fpc;
-       dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
-       current->thread.fpu.fpc = from->fpc;
-       current->thread.fpu.regs = from->regs;
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        /* Save host register state */
        save_fpu_regs();
-       save_fpu_to(&vcpu->arch.host_fpregs);
-
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-               /*
-                * Use the register save area in the SIE-control block
-                * for register restore and save in kvm_arch_vcpu_put()
-                */
-               current->thread.fpu.vxrs =
-                       (__vector128 *)&vcpu->run->s.regs.vrs;
-       } else
-               load_fpu_from(&vcpu->arch.guest_fpregs);
+       vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+       vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
 
+       /* Depending on MACHINE_HAS_VX, data stored to vrs either
+        * has vector register or floating point register format.
+        */
+       current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+       current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
        if (test_fp_ctl(current->thread.fpu.fpc))
                /* User space provided an invalid FPC, let's clear it */
                current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
 
+       /* Save guest register state */
        save_fpu_regs();
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 
-       if (test_kvm_facility(vcpu->kvm, 129))
-               /*
-                * kvm_arch_vcpu_load() set up the register save area to
-                * the &vcpu->run->s.regs.vrs and, thus, the vector registers
-                * are already saved.  Only the floating-point control must be
-                * copied.
-                */
-               vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-       else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
-       load_fpu_from(&vcpu->arch.host_fpregs);
+       /* Restore host register state */
+       current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+       current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
 
        save_access_regs(vcpu->run->s.regs.acrs);
        restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
        vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
        vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
-       vcpu->arch.guest_fpregs.fpc = 0;
-       asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+       /* make sure the new fpc will be lazily loaded */
+       save_fpu_regs();
+       current->thread.fpu.fpc = 0;
        vcpu->arch.sie_block->gbea = 1;
        vcpu->arch.sie_block->pp = 0;
        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        vcpu->arch.local_int.wq = &vcpu->wq;
        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
-       /*
-        * Allocate a save area for floating-point registers.  If the vector
-        * extension is available, register contents are saved in the SIE
-        * control block.  The allocated save area is still required in
-        * particular places, for example, in kvm_s390_vcpu_store_status().
-        */
-       vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-                                              GFP_KERNEL);
-       if (!vcpu->arch.guest_fpregs.fprs)
-               goto out_free_sie_block;
-
        rc = kvm_vcpu_init(vcpu, kvm, id);
        if (rc)
                goto out_free_sie_block;
@@ -1879,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
+       /* make sure the new values will be lazily loaded */
+       save_fpu_regs();
        if (test_fp_ctl(fpu->fpc))
                return -EINVAL;
-       memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-       vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-       save_fpu_regs();
-       load_fpu_from(&vcpu->arch.guest_fpregs);
+       current->thread.fpu.fpc = fpu->fpc;
+       if (MACHINE_HAS_VX)
+               convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+       else
+               memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
-       fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+       /* make sure we have the latest values */
+       save_fpu_regs();
+       if (MACHINE_HAS_VX)
+               convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+       else
+               memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+       fpu->fpc = current->thread.fpu.fpc;
        return 0;
 }
 
@@ -2396,6 +2362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
        unsigned char archmode = 1;
+       freg_t fprs[NUM_FPRS];
        unsigned int px;
        u64 clkcomp;
        int rc;
@@ -2411,8 +2378,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
                gpa = px;
        } else
                gpa -= __LC_FPREGS_SAVE_AREA;
-       rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
-                            vcpu->arch.guest_fpregs.fprs, 128);
+
+       /* manually convert vector registers if necessary */
+       if (MACHINE_HAS_VX) {
+               convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    fprs, 128);
+       } else {
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    vcpu->run->s.regs.vrs, 128);
+       }
        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
                              vcpu->run->s.regs.gprs, 128);
        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
                              &px, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
-                             &vcpu->arch.guest_fpregs.fpc, 4);
+                             &vcpu->run->s.regs.fpc, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
                              &vcpu->arch.sie_block->todpr, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
         * it into the save area
         */
        save_fpu_regs();
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               /*
-                * If the vector extension is available, the vector registers
-                * which overlaps with floating-point registers are saved in
-                * the SIE-control block.  Hence, extract the floating-point
-                * registers and the FPC value and store them in the
-                * guest_fpregs structure.
-                */
-               vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
-               convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
-                                current->thread.fpu.vxrs);
-       } else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
        save_access_regs(vcpu->run->s.regs.acrs);
 
        return kvm_s390_store_status_unloaded(vcpu, addr);
index 1b903f6ad54a327ca73ee88bdecee0a73e1eaf42..791a4146052c7a48fdb72a6ef3b9e0d1abc06eeb 100644 (file)
@@ -228,7 +228,7 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
                return;
        printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr(KERN_CONT "in ", regs->psw.addr);
        printk(KERN_CONT "\n");
        printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
               regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
@@ -256,9 +256,9 @@ static noinline void do_no_context(struct pt_regs *regs)
        const struct exception_table_entry *fixup;
 
        /* Are we prepared to handle this kernel fault?  */
-       fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(regs->psw.addr);
        if (fixup) {
-               regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+               regs->psw.addr = extable_fixup(fixup);
                return;
        }
 
index c722400c769784d8df6f13507693e117ad942497..73e29033709285b44ceb5609919c07aff86e9de9 100644 (file)
@@ -98,7 +98,7 @@ void __init paging_init(void)
        __ctl_load(S390_lowcore.kernel_asce, 1, 1);
        __ctl_load(S390_lowcore.kernel_asce, 7, 7);
        __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-       arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+       __arch_local_irq_stosm(0x04);
 
        sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();
index fec59c067d0dddf8ecb4a550124691d0838cda46..792f9c63fbca5b97e03d1251bfff1facc987f9c4 100644 (file)
@@ -93,15 +93,19 @@ static int __memcpy_real(void *dest, void *src, size_t count)
  */
 int memcpy_real(void *dest, void *src, size_t count)
 {
+       int irqs_disabled, rc;
        unsigned long flags;
-       int rc;
 
        if (!count)
                return 0;
-       local_irq_save(flags);
-       __arch_local_irq_stnsm(0xfbUL);
+       flags = __arch_local_irq_stnsm(0xf8UL);
+       irqs_disabled = arch_irqs_disabled_flags(flags);
+       if (!irqs_disabled)
+               trace_hardirqs_off();
        rc = __memcpy_real(dest, src, count);
-       local_irq_restore(flags);
+       if (!irqs_disabled)
+               trace_hardirqs_on();
+       __arch_local_irq_ssm(flags);
        return rc;
 }
 
index ea01477b4aa67155f87b88e8c6a87f5374eda247..45c4daa49930f9a2bdfaf1d07a6ab5e002aa99c2 100644 (file)
@@ -169,12 +169,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-       if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+       if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
                return 0;
        if (!(flags & MAP_FIXED))
                addr = 0;
        if ((addr + len) >= TASK_SIZE)
-               return crst_table_upgrade(current->mm, 1UL << 53);
+               return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
        return 0;
 }
 
@@ -189,9 +189,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
        area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -211,9 +211,9 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
        area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area_topdown(filp, addr, len,
index a809fa8e6f8bd01d2c0ad90891060d6aabd36a80..5109827883acaefc6afc0ac913956fcb79f8f8a0 100644 (file)
@@ -55,7 +55,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
        unsigned long entry;
        int flush;
 
-       BUG_ON(limit > (1UL << 53));
+       BUG_ON(limit > TASK_MAX_SIZE);
        flush = 0;
 repeat:
        table = crst_table_alloc(mm);
index 43f32ce60aa3d98af0b7665090fa3eb080d12fa7..2794845061c66fe3605c0b2e97b591b6cf176d32 100644 (file)
@@ -57,9 +57,7 @@ static __init pg_data_t *alloc_node_data(void)
 {
        pg_data_t *res;
 
-       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
-       if (!res)
-               panic("Could not allocate memory for node data!\n");
+       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
        memset(res, 0, sizeof(pg_data_t));
        return res;
 }
@@ -162,7 +160,7 @@ static int __init numa_init_late(void)
                register_one_node(nid);
        return 0;
 }
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
 
 static int __init parse_debug(char *parm)
 {
index 8a6811b2cdb9523a24cd32341dee0fcef317f661..1884e17595294bbfafdcaf8e183fd80e40d8ea48 100644 (file)
@@ -16,24 +16,23 @@ __show_trace(unsigned int *depth, unsigned long sp,
        struct pt_regs *regs;
 
        while (*depth) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
 
                /* Follow the backchain.  */
                while (*depth) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
                        (*depth)--;
-                       oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+                       oprofile_add_trace(sf->gprs[8]);
 
                }
 
@@ -46,7 +45,7 @@ __show_trace(unsigned int *depth, unsigned long sp,
                        return sp;
                regs = (struct pt_regs *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
@@ -55,12 +54,13 @@ __show_trace(unsigned int *depth, unsigned long sp,
 
 void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
-       unsigned long head;
+       unsigned long head, frame_size;
        struct stack_frame* head_sf;
 
        if (user_mode(regs))
                return;
 
+       frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
        head = regs->gprs[15];
        head_sf = (struct stack_frame*)head;
 
@@ -69,8 +69,9 @@ void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
 
        head = head_sf->back_chain;
 
-       head = __show_trace(&depth, head, S390_lowcore.async_stack - ASYNC_SIZE,
-                           S390_lowcore.async_stack);
+       head = __show_trace(&depth, head,
+                           S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+                           S390_lowcore.async_stack + frame_size);
 
        __show_trace(&depth, head, S390_lowcore.thread_info,
                     S390_lowcore.thread_info + THREAD_SIZE);
index 11d4f277e9f69ac3d266e14ca9804e4b6c844fac..8f19c8f9d660570839a69f60477f855181ba4e7c 100644 (file)
@@ -68,9 +68,12 @@ static struct airq_struct zpci_airq = {
        .isc = PCI_ISC,
 };
 
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES                                             \
+       min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),  \
+           ZPCI_IOMAP_MAX_ENTRIES)
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
@@ -265,27 +268,20 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
                              unsigned long max)
 {
        struct zpci_dev *zdev = to_zpci(pdev);
-       u64 addr;
        int idx;
 
-       if ((bar & 7) != bar)
+       if (!pci_resource_len(pdev, bar))
                return NULL;
 
        idx = zdev->bars[bar].map_idx;
        spin_lock(&zpci_iomap_lock);
-       if (zpci_iomap_start[idx].count++) {
-               BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
-                      zpci_iomap_start[idx].bar != bar);
-       } else {
-               zpci_iomap_start[idx].fh = zdev->fh;
-               zpci_iomap_start[idx].bar = bar;
-       }
        /* Detect overrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!++zpci_iomap_start[idx].count);
+       zpci_iomap_start[idx].fh = zdev->fh;
+       zpci_iomap_start[idx].bar = bar;
        spin_unlock(&zpci_iomap_lock);
 
-       addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
-       return (void __iomem *) addr + offset;
+       return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
 EXPORT_SYMBOL(pci_iomap_range);
 
@@ -297,12 +293,11 @@ EXPORT_SYMBOL(pci_iomap);
 
 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 {
-       unsigned int idx;
+       unsigned int idx = ZPCI_IDX(addr);
 
-       idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
        spin_lock(&zpci_iomap_lock);
        /* Detect underrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!zpci_iomap_start[idx].count);
        if (!--zpci_iomap_start[idx].count) {
                zpci_iomap_start[idx].fh = 0;
                zpci_iomap_start[idx].bar = 0;
@@ -544,15 +539,15 @@ static void zpci_irq_exit(void)
 
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
-       int entry;
+       unsigned long entry;
 
        spin_lock(&zpci_iomap_lock);
-       entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
-       if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+       entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+       if (entry == ZPCI_IOMAP_ENTRIES) {
                spin_unlock(&zpci_iomap_lock);
                return -ENOSPC;
        }
-       set_bit(entry, zpci_iomap);
+       set_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
        return entry;
 }
@@ -561,7 +556,7 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 {
        spin_lock(&zpci_iomap_lock);
        memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
-       clear_bit(entry, zpci_iomap);
+       clear_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
 }
 
@@ -611,8 +606,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
                if (zdev->bars[i].val & 4)
                        flags |= IORESOURCE_MEM_64;
 
-               addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+               addr = ZPCI_ADDR(entry);
                size = 1UL << zdev->bars[i].size;
 
                res = __alloc_res(zdev, addr, size, flags);
@@ -873,23 +867,30 @@ static int zpci_mem_init(void)
        zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
                                16, 0, NULL);
        if (!zdev_fmb_cache)
-               goto error_zdev;
+               goto error_fmb;
 
-       /* TODO: use realloc */
-       zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
-                                  GFP_KERNEL);
+       zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+                                  sizeof(*zpci_iomap_start), GFP_KERNEL);
        if (!zpci_iomap_start)
                goto error_iomap;
-       return 0;
 
+       zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+                                   sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+       if (!zpci_iomap_bitmap)
+               goto error_iomap_bitmap;
+
+       return 0;
+error_iomap_bitmap:
+       kfree(zpci_iomap_start);
 error_iomap:
        kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
        return -ENOMEM;
 }
 
 static void zpci_mem_exit(void)
 {
+       kfree(zpci_iomap_bitmap);
        kfree(zpci_iomap_start);
        kmem_cache_destroy(zdev_fmb_cache);
 }
index 369a3e05d468dbf1973df24c77e4ae07e580c3b6..b0e04751c5d599fbdbb4bdb68fdbed5dcb4039d5 100644 (file)
@@ -53,6 +53,11 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 
        pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
               pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+       if (!pdev)
+               return;
+
+       pdev->error_state = pci_channel_io_perm_failure;
 }
 
 void zpci_event_error(void *data)
index f887c6465a821b7b96a2a300268f077afd70ae5a..8a84e05adb2e122fa0d39a1c2acdfadf63202266 100644 (file)
@@ -33,7 +33,6 @@
 #endif
 
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
 
 #include <asm-generic/barrier.h>
 
index c690c8e16a96ef2758fca4e9af8080ec7af6c17a..b489e9759518182b6a3884935e5a1c22b1af3524 100644 (file)
@@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void)
        unsigned long rnd = 0UL;
 
        if (current->flags & PF_RANDOMIZE) {
-               unsigned long val = get_random_int();
+               unsigned long val = get_random_long();
                if (test_thread_flag(TIF_32BIT))
                        rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
                else
index e13d41c392ae4f4940a41e6c8049099c71d28a04..f878bec23576c54c619b633ce0c81508519ddb56 100644 (file)
@@ -34,21 +34,18 @@ struct page;
 
 #if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT)
 
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
+typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
-#define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32))
-
-#define pte_get_bits(pte, bits) ((pte).pte_low & (bits))
-#define pte_set_bits(pte, bits) ((pte).pte_low |= (bits))
-#define pte_clear_bits(pte, bits) ((pte).pte_low &= ~(bits))
-#define pte_copy(to, from) ({ (to).pte_high = (from).pte_high; \
-                             smp_wmb(); \
-                             (to).pte_low = (from).pte_low; })
-#define pte_is_zero(pte) (!((pte).pte_low & ~_PAGE_NEWPAGE) && !(pte).pte_high)
-#define pte_set_val(pte, phys, prot) \
-       ({ (pte).pte_high = (phys) >> 32; \
-          (pte).pte_low = (phys) | pgprot_val(prot); })
+#define pte_val(p) ((p).pte)
+
+#define pte_get_bits(p, bits) ((p).pte & (bits))
+#define pte_set_bits(p, bits) ((p).pte |= (bits))
+#define pte_clear_bits(p, bits) ((p).pte &= ~(bits))
+#define pte_copy(to, from) ({ (to).pte = (from).pte; })
+#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE))
+#define pte_set_val(p, phys, prot) \
+       ({ (p).pte = (phys) | pgprot_val(prot); })
 
 #define pmd_val(x)     ((x).pmd)
 #define __pmd(x) ((pmd_t) { (x) } )
index 330e738ccfc13633954a02677e46bb1b12605fa9..c46662f64c392050d1c45b722e3de7c01f87e250 100644 (file)
@@ -475,6 +475,7 @@ config X86_UV
        depends on X86_64
        depends on X86_EXTENDED_PLATFORM
        depends on NUMA
+       depends on EFI
        depends on X86_X2APIC
        depends on PCI
        ---help---
@@ -509,11 +510,10 @@ config X86_INTEL_CE
 
 config X86_INTEL_MID
        bool "Intel MID platform support"
-       depends on X86_32
        depends on X86_EXTENDED_PLATFORM
        depends on X86_PLATFORM_DEVICES
        depends on PCI
-       depends on PCI_GOANY
+       depends on X86_64 || (PCI_GOANY && X86_32)
        depends on X86_IO_APIC
        select SFI
        select I2C
@@ -778,8 +778,8 @@ config HPET_TIMER
          HPET is the next generation timer replacing legacy 8254s.
          The HPET provides a stable time base on SMP
          systems, unlike the TSC, but it is more expensive to access,
-         as it is off-chip.  You can find the HPET spec at
-         <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
+         as it is off-chip.  The interface used is documented
+         in the HPET spec, revision 1.
 
          You can safely choose Y here.  However, HPET will only be
          activated if the platform and the BIOS support this feature.
index 712b13047b41e9cdf7116048d0ffce510d3aa283..3a33124e91129e85c3f34fc6dc85ca43c8e35cff 100644 (file)
@@ -157,7 +157,9 @@ ENTRY(chacha20_4block_xor_ssse3)
        # done with the slightly better performing SSSE3 byte shuffling,
        # 7/12-bit word rotation uses traditional shift+OR.
 
-       sub             $0x40,%rsp
+       mov             %rsp,%r11
+       sub             $0x80,%rsp
+       and             $~63,%rsp
 
        # x0..15[0-3] = s0..3[0..3]
        movq            0x00(%rdi),%xmm1
@@ -620,6 +622,6 @@ ENTRY(chacha20_4block_xor_ssse3)
        pxor            %xmm1,%xmm15
        movdqu          %xmm15,0xf0(%rsi)
 
-       add             $0x40,%rsp
+       mov             %r11,%rsp
        ret
 ENDPROC(chacha20_4block_xor_ssse3)
index 77d8c5112900e0edb73741af9c7cc4a60b0bb8f5..bb3e376d0f33c8dda5a9cf7b4e2b3f2d39f6389a 100644 (file)
@@ -294,6 +294,7 @@ sysenter_past_esp:
        pushl   $__USER_DS              /* pt_regs->ss */
        pushl   %ebp                    /* pt_regs->sp (stashed in bp) */
        pushfl                          /* pt_regs->flags (except IF = 0) */
+       ASM_CLAC                        /* Clear AC after saving FLAGS */
        orl     $X86_EFLAGS_IF, (%esp)  /* Fix IF */
        pushl   $__USER_CS              /* pt_regs->cs */
        pushl   $0                      /* pt_regs->ip = 0 (placeholder) */
index ff1c6d61f332d22e6f95f54a782b150b39bf75dc..3c990eeee40bd6e83b4d830654fdf966250bb0e1 100644 (file)
@@ -261,6 +261,7 @@ ENTRY(entry_INT80_compat)
         * Interrupts are off on entry.
         */
        PARAVIRT_ADJUST_EXCEPTION_FRAME
+       ASM_CLAC                        /* Do this early to minimize exposure */
        SWAPGS
 
        /*
index 881b4768644aad3537364286e468146aba73ad56..e7de5c9a4fbdd2c5d99b78d82d2c58019d20dc04 100644 (file)
@@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu);
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
+struct irq_desc;
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-struct irq_desc;
 extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
index 19c099afa8613ead6565917f5787deb3df12c9e7..e795f5274217a47cf1713063399675a175d5a4ea 100644 (file)
@@ -41,7 +41,7 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
        regs->ip = ip;
 }
 #else
-#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#error Include linux/livepatch.h, not asm/livepatch.h
 #endif
 
 #endif /* _ASM_X86_LIVEPATCH_H */
index 46873fbd44e1b522b5d40ad5ebf963714df566a1..d08eacd298c2f4b2436b9841203dd5c90f295697 100644 (file)
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
        int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
                                                int reg, int len, u32 *val);
index 04c27a0131656db29e97805432e56d3a1aae5992..4432ab7f407ccc5f47ea9c19a850c29af821d8c9 100644 (file)
@@ -366,20 +366,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
 }
 static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
        return new;
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                          ((val & _PAGE_PAT_LARGE) >>
                           (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
index 1544fabcd7f9b7428a5d3ec7429f00f03b545945..c57fd1ea968957cd600ea1032caf1a9f379b38e5 100644 (file)
@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void)
 }
 
 /**
- * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * arch_wb_cache_pmem - write back a cache range with CLWB
  * @vaddr:     virtual start address
  * @size:      number of bytes to write back
  *
  * Write back a cache range using the CLWB (cache line write back)
  * instruction.  This function requires explicit ordering with an
- * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
+ * arch_wmb_pmem() call.
  */
-static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
 {
        u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
        unsigned long clflush_mask = x86_clflush_size - 1;
+       void *vaddr = (void __force *)addr;
        void *vend = vaddr + size;
        void *p;
 
@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
        len = copy_from_iter_nocache(vaddr, bytes, i);
 
        if (__iter_needs_pmem_wb(i))
-               __arch_wb_cache_pmem(vaddr, bytes);
+               arch_wb_cache_pmem(addr, bytes);
 
        return len;
 }
@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
        void *vaddr = (void __force *)addr;
 
        memset(vaddr, 0, size);
-       __arch_wb_cache_pmem(vaddr, size);
+       arch_wb_cache_pmem(addr, size);
 }
 
 static inline bool __arch_has_wmb_pmem(void)
index 2d5a50cb61a2d6ad5c68d5563636edcc112ff4f9..20c11d1aa4ccce11b0709c3fe56092a5505151cf 100644 (file)
@@ -766,7 +766,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
  * Return saved PC of a blocked thread.
  * What is this good for? it will be always the scheduler or ret_from_fork.
  */
-#define thread_saved_pc(t)     (*(unsigned long *)((t)->thread.sp - 8))
+#define thread_saved_pc(t)     READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
 
 #define task_pt_regs(tsk)      ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
index f5dcb5204dcd5b27e8b8e9a1b87612a28cda10c6..3fe0eac59462b39fc12ce28a6e22d6e347b8719a 100644 (file)
@@ -48,20 +48,28 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 
                switch (n) {
                case 1:
+                       __uaccess_begin();
                        __put_user_size(*(u8 *)from, (u8 __user *)to,
                                        1, ret, 1);
+                       __uaccess_end();
                        return ret;
                case 2:
+                       __uaccess_begin();
                        __put_user_size(*(u16 *)from, (u16 __user *)to,
                                        2, ret, 2);
+                       __uaccess_end();
                        return ret;
                case 4:
+                       __uaccess_begin();
                        __put_user_size(*(u32 *)from, (u32 __user *)to,
                                        4, ret, 4);
+                       __uaccess_end();
                        return ret;
                case 8:
+                       __uaccess_begin();
                        __put_user_size(*(u64 *)from, (u64 __user *)to,
                                        8, ret, 8);
+                       __uaccess_end();
                        return ret;
                }
        }
@@ -103,13 +111,19 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 
                switch (n) {
                case 1:
+                       __uaccess_begin();
                        __get_user_size(*(u8 *)to, from, 1, ret, 1);
+                       __uaccess_end();
                        return ret;
                case 2:
+                       __uaccess_begin();
                        __get_user_size(*(u16 *)to, from, 2, ret, 2);
+                       __uaccess_end();
                        return ret;
                case 4:
+                       __uaccess_begin();
                        __get_user_size(*(u32 *)to, from, 4, ret, 4);
+                       __uaccess_end();
                        return ret;
                }
        }
@@ -148,13 +162,19 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 
                switch (n) {
                case 1:
+                       __uaccess_begin();
                        __get_user_size(*(u8 *)to, from, 1, ret, 1);
+                       __uaccess_end();
                        return ret;
                case 2:
+                       __uaccess_begin();
                        __get_user_size(*(u16 *)to, from, 2, ret, 2);
+                       __uaccess_end();
                        return ret;
                case 4:
+                       __uaccess_begin();
                        __get_user_size(*(u32 *)to, from, 4, ret, 4);
+                       __uaccess_end();
                        return ret;
                }
        }
@@ -170,13 +190,19 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
 
                switch (n) {
                case 1:
+                       __uaccess_begin();
                        __get_user_size(*(u8 *)to, from, 1, ret, 1);
+                       __uaccess_end();
                        return ret;
                case 2:
+                       __uaccess_begin();
                        __get_user_size(*(u16 *)to, from, 2, ret, 2);
+                       __uaccess_end();
                        return ret;
                case 4:
+                       __uaccess_begin();
                        __get_user_size(*(u32 *)to, from, 4, ret, 4);
+                       __uaccess_end();
                        return ret;
                }
        }
index 968d57dd54c9d19ed0a93cb3be6633d3328dc4e7..f320ee32d5a139a522c1b26f75e1076c9606f563 100644 (file)
@@ -57,7 +57,7 @@ static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
 {
        if (xen_pci_frontend && xen_pci_frontend->enable_msi)
                return xen_pci_frontend->enable_msi(dev, vectors);
-       return -ENODEV;
+       return -ENOSYS;
 }
 static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
 {
@@ -69,7 +69,7 @@ static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
 {
        if (xen_pci_frontend && xen_pci_frontend->enable_msix)
                return xen_pci_frontend->enable_msix(dev, vectors, nvec);
-       return -ENODEV;
+       return -ENOSYS;
 }
 static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
 {
index f25321894ad294278c20d5ba7b4076ce2353a8fb..fdb0fbfb1197a4cf4e485c4f330b3a399b3a6d5f 100644 (file)
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
        const struct cpumask *mask;
+       struct irq_desc *desc;
        struct irq_data *idata;
        struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
                if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
                        continue;
 
-               idata = irq_get_irq_data(irq);
+               desc = irq_to_desc(irq);
+               raw_spin_lock_irq(&desc->lock);
+               idata = irq_desc_get_irq_data(desc);
 
                /*
                 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
                /* Might be lapic_chip for irq 0 */
                if (chip->irq_set_affinity)
                        chip->irq_set_affinity(idata, mask, false);
+               raw_spin_unlock_irq(&desc->lock);
        }
 }
 #endif
index 908cb37da171ebed1a361203dc520c15e4e42736..3b670df4ba7b429f48102b1401c9133957ecb6a9 100644 (file)
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
         */
        static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
        static int current_offset = VECTOR_OFFSET_START % 16;
-       int cpu, err;
+       int cpu, vector;
 
-       if (d->move_in_progress)
+       /*
+        * If there is still a move in progress or the previous move has not
+        * been cleaned up completely, tell the caller to come back later.
+        */
+       if (d->move_in_progress ||
+           cpumask_intersects(d->old_domain, cpu_online_mask))
                return -EBUSY;
 
        /* Only try and allocate irqs on cpus that are present */
-       err = -ENOSPC;
        cpumask_clear(d->old_domain);
+       cpumask_clear(searched_cpumask);
        cpu = cpumask_first_and(mask, cpu_online_mask);
        while (cpu < nr_cpu_ids) {
-               int new_cpu, vector, offset;
+               int new_cpu, offset;
 
+               /* Get the possible target cpus for @mask/@cpu from the apic */
                apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+               /*
+                * Clear the offline cpus from @vector_cpumask for searching
+                * and verify whether the result overlaps with @mask. If true,
+                * then the call to apic->cpu_mask_to_apicid_and() will
+                * succeed as well. If not, no point in trying to find a
+                * vector in this mask.
+                */
+               cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+               if (!cpumask_intersects(vector_searchmask, mask))
+                       goto next_cpu;
+
                if (cpumask_subset(vector_cpumask, d->domain)) {
-                       err = 0;
                        if (cpumask_equal(vector_cpumask, d->domain))
-                               break;
+                               goto success;
                        /*
-                        * New cpumask using the vector is a proper subset of
-                        * the current in use mask. So cleanup the vector
-                        * allocation for the members that are not used anymore.
+                        * Mark the cpus which are not longer in the mask for
+                        * cleanup.
                         */
-                       cpumask_andnot(d->old_domain, d->domain,
-                                      vector_cpumask);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-                       cpumask_and(d->domain, d->domain, vector_cpumask);
-                       break;
+                       cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+                       vector = d->cfg.vector;
+                       goto update;
                }
 
                vector = current_vector;
@@ -158,45 +170,60 @@ next:
                        vector = FIRST_EXTERNAL_VECTOR + offset;
                }
 
-               if (unlikely(current_vector == vector)) {
-                       cpumask_or(d->old_domain, d->old_domain,
-                                  vector_cpumask);
-                       cpumask_andnot(vector_cpumask, mask, d->old_domain);
-                       cpu = cpumask_first_and(vector_cpumask,
-                                               cpu_online_mask);
-                       continue;
-               }
+               /* If the search wrapped around, try the next cpu */
+               if (unlikely(current_vector == vector))
+                       goto next_cpu;
 
                if (test_bit(vector, used_vectors))
                        goto next;
 
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+               for_each_cpu(new_cpu, vector_searchmask) {
                        if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
                                goto next;
                }
                /* Found one! */
                current_vector = vector;
                current_offset = offset;
-               if (d->cfg.vector) {
+               /* Schedule the old vector for cleanup on all cpus */
+               if (d->cfg.vector)
                        cpumask_copy(d->old_domain, d->domain);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-               }
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+               for_each_cpu(new_cpu, vector_searchmask)
                        per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-               d->cfg.vector = vector;
-               cpumask_copy(d->domain, vector_cpumask);
-               err = 0;
-               break;
-       }
+               goto update;
 
-       if (!err) {
-               /* cache destination APIC IDs into cfg->dest_apicid */
-               err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-                                                  &d->cfg.dest_apicid);
+next_cpu:
+               /*
+                * We exclude the current @vector_cpumask from the requested
+                * @mask and try again with the next online cpu in the
+                * result. We cannot modify @mask, so we use @vector_cpumask
+                * as a temporary buffer here as it will be reassigned when
+                * calling apic->vector_allocation_domain() above.
+                */
+               cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+               cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+               cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+               continue;
        }
+       return -ENOSPC;
 
-       return err;
+update:
+       /*
+        * Exclude offline cpus from the cleanup mask and set the
+        * move_in_progress flag when the result is not empty.
+        */
+       cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+       d->move_in_progress = !cpumask_empty(d->old_domain);
+       d->cfg.vector = vector;
+       cpumask_copy(d->domain, vector_cpumask);
+success:
+       /*
+        * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+        * as we already established, that mask & d->domain & cpu_online_mask
+        * is not empty.
+        */
+       BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+                                           &d->cfg.dest_apicid));
+       return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
        struct irq_desc *desc;
-       unsigned long flags;
        int cpu, vector;
 
-       raw_spin_lock_irqsave(&vector_lock, flags);
        BUG_ON(!data->cfg.vector);
 
        vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
        data->cfg.vector = 0;
        cpumask_clear(data->domain);
 
-       if (likely(!data->move_in_progress)) {
-               raw_spin_unlock_irqrestore(&vector_lock, flags);
+       /*
+        * If move is in progress or the old_domain mask is not empty,
+        * i.e. the cleanup IPI has not been processed yet, we need to remove
+        * the old references to desc from all cpus vector tables.
+        */
+       if (!data->move_in_progress && cpumask_empty(data->old_domain))
                return;
-       }
 
        desc = irq_to_desc(irq);
        for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
                }
        }
        data->move_in_progress = 0;
-       raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
                                 unsigned int virq, unsigned int nr_irqs)
 {
+       struct apic_chip_data *apic_data;
        struct irq_data *irq_data;
+       unsigned long flags;
        int i;
 
        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
                if (irq_data && irq_data->chip_data) {
+                       raw_spin_lock_irqsave(&vector_lock, flags);
                        clear_irq_vector(virq + i, irq_data->chip_data);
-                       free_apic_chip_data(irq_data->chip_data);
+                       apic_data = irq_data->chip_data;
+                       irq_domain_reset_irq_data(irq_data);
+                       raw_spin_unlock_irqrestore(&vector_lock, flags);
+                       free_apic_chip_data(apic_data);
 #ifdef CONFIG_X86_IO_APIC
                        if (virq + i < nr_legacy_irqs())
                                legacy_irq_data[virq + i] = NULL;
 #endif
-                       irq_domain_reset_irq_data(irq_data);
                }
        }
 }
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
        arch_init_htirq_domain(x86_vector_domain);
 
        BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
        return arch_early_ioapic_init();
 }
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
                return -EINVAL;
 
        err = assign_irq_vector(irq, data, dest);
-       if (err) {
-               if (assign_irq_vector(irq, data,
-                                     irq_data_get_affinity_mask(irq_data)))
-                       pr_err("Failed to recover vector for irq %d\n", irq);
-               return err;
-       }
-
-       return IRQ_SET_MASK_OK;
+       return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
-
-               for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i),
-                                           IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
+       raw_spin_lock(&vector_lock);
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
        data->move_in_progress = 0;
+       if (!cpumask_empty(data->old_domain))
+               apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+       raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
 
                /*
-                * Check if the irq migration is in progress. If so, we
-                * haven't received the cleanup request yet for this irq.
+                * Nothing to cleanup if irq migration is in progress
+                * or this cpu is not set in the cleanup mask.
                 */
-               if (data->move_in_progress)
+               if (data->move_in_progress ||
+                   !cpumask_test_cpu(me, data->old_domain))
                        goto unlock;
 
+               /*
+                * We have two cases to handle here:
+                * 1) vector is unchanged but the target mask got reduced
+                * 2) vector and the target mask has changed
+                *
+                * #1 is obvious, but in #2 we have two vectors with the same
+                * irq descriptor: the old and the new vector. So we need to
+                * make sure that we only cleanup the old vector. The new
+                * vector has the current @vector number in the config and
+                * this cpu is part of the target mask. We better leave that
+                * one alone.
+                */
                if (vector == data->cfg.vector &&
                    cpumask_test_cpu(me, data->domain))
                        goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                }
                __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+               cpumask_clear_cpu(me, data->old_domain);
 unlock:
                raw_spin_unlock(&desc->lock);
        }
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
        __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+       struct apic_chip_data *data = apic_chip_data(irqdata);
+       struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
-       if (cfg)
-               __irq_complete_move(cfg, cfg->vector);
+       if (!cfg)
+               return;
+
+       __irq_complete_move(cfg, cfg->vector);
+
+       /*
+        * This is tricky. If the cleanup of @data->old_domain has not been
+        * done yet, then the following setaffinity call will fail with
+        * -EBUSY. This can leave the interrupt in a stale state.
+        *
+        * The cleanup cannot make progress because we hold @desc->lock. So in
+        * case @data->old_domain is not yet cleaned up, we need to drop the
+        * lock and acquire it again. @desc cannot go away, because the
+        * hotplug code holds the sparse irq lock.
+        */
+       raw_spin_lock(&vector_lock);
+       /* Clean out all offline cpus (including ourself) first. */
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+       while (!cpumask_empty(data->old_domain)) {
+               raw_spin_unlock(&vector_lock);
+               raw_spin_unlock(&desc->lock);
+               cpu_relax();
+               raw_spin_lock(&desc->lock);
+               /*
+                * Reevaluate apic_chip_data. It might have been cleared after
+                * we dropped @desc->lock.
+                */
+               data = apic_chip_data(irqdata);
+               if (!data)
+                       return;
+               raw_spin_lock(&vector_lock);
+       }
+       raw_spin_unlock(&vector_lock);
 }
 #endif
 
index d760c6bb37b53c25931bef3bc47c903def86c8ee..624db00583f44a76283c4d8d5e5377a8342f6517 100644 (file)
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
                return;
        }
        pr_info("UV: Found %s hub\n", hub);
-       map_low_mmrs();
+
+       /* We now only need to map the MMRs on UV1 */
+       if (is_uv1_hub())
+               map_low_mmrs();
 
        m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
        m_val = m_n_config.s.m_skt;
index 49742746a6c963c4a86c08c773430087225fec53..8836fc9fa84b08de4e35ddd60ac7b0f828fec231 100644 (file)
@@ -323,6 +323,8 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
        return 0;
 
 fail:
+       if (amd_uncore_nb)
+               *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
        kfree(uncore_nb);
        return -ENOMEM;
 }
index a667078a51807bb5bd1ca2e5d5c4ff205e9dd42b..fed2ab1f1065dfdd5a286822102857a013298dfc 100644 (file)
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
 
 static int intel_alt_er(int idx, u64 config)
 {
-       int alt_idx;
+       int alt_idx = idx;
+
        if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
                return idx;
 
@@ -2897,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
                return;
 
        if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-               void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
                for_each_cpu(i, topology_sibling_cpumask(cpu)) {
                        struct intel_shared_regs *pc;
 
                        pc = per_cpu(cpu_hw_events, i).shared_regs;
                        if (pc && pc->core_id == core_id) {
-                               *onln = cpuc->shared_regs;
+                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
                                cpuc->shared_regs = pc;
                                break;
                        }
index f97f8075bf04c9d6efed980370feca46227afc5a..3bf41d41377505ef28d90ed3363f08e83df70db3 100644 (file)
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
        case 87: /* Knights Landing */
                ret = knl_uncore_pci_init();
                break;
+       case 94: /* SkyLake */
+               ret = skl_uncore_pci_init();
+               break;
        default:
                return 0;
        }
index 07aa2d6bd71094e411a9f98535fc4c65208a66d4..a7086b862156627ebe7ca22996aed6126e61e770 100644 (file)
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
 int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
index 0b934820fafd1f64747640e09f1544b862eb8c01..2bd030ddd0db3bf6f5ada42830bd9be9ccc4c1b4 100644 (file)
@@ -8,6 +8,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
        { /* end: all zeroes */ },
 };
 
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
        .name           = "snb_uncore",
        .id_table       = snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
        .id_table       = bdw_uncore_pci_ids,
 };
 
+static struct pci_driver skl_uncore_pci_driver = {
+       .name           = "skl_uncore",
+       .id_table       = skl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
        __u32 pci_id;
        struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
        IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
        IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
        {  /* end marker */ }
 };
 
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
        return imc_uncore_pci_init();
 }
 
+int skl_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
index f129a9af635747ce616e354a43f30297f3fecabf..2c0f3407bd1f1ae8adcf43cacdb479c91c096669 100644 (file)
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
        reserve_ebda_region();
 
+       switch (boot_params.hdr.hardware_subarch) {
+       case X86_SUBARCH_INTEL_MID:
+               x86_intel_mid_early_setup();
+               break;
+       default:
+               break;
+       }
+
        start_kernel();
 }
index f8062aaf5df9c830ec287f6774ad270e13d34b32..61521dc19c102114e177cbc21a4f5da9d94c20cd 100644 (file)
@@ -462,7 +462,7 @@ void fixup_irqs(void)
                 * non intr-remapping case, we can't wait till this interrupt
                 * arrives at this cpu before completing the irq move.
                 */
-               irq_force_complete_move(irq);
+               irq_force_complete_move(desc);
 
                if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
                        break_affinity = 1;
@@ -470,6 +470,15 @@ void fixup_irqs(void)
                }
 
                chip = irq_data_get_irq_chip(data);
+               /*
+                * The interrupt descriptor might have been cleaned up
+                * already, but it is not yet removed from the radix tree
+                */
+               if (!chip) {
+                       raw_spin_unlock(&desc->lock);
+                       continue;
+               }
+
                if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
                        chip->irq_mask(data);
 
index 1505587d06e97826703b21aba540ea8403bd8f2c..b9b09fec173bf2fedb0b9ee122b3c4668eb6e471 100644 (file)
@@ -650,10 +650,10 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
        u16 sel;
 
        la = seg_base(ctxt, addr.seg) + addr.ea;
-       *linear = la;
        *max_size = 0;
        switch (mode) {
        case X86EMUL_MODE_PROT64:
+               *linear = la;
                if (is_noncanonical_address(la))
                        goto bad;
 
@@ -662,6 +662,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
                        goto bad;
                break;
        default:
+               *linear = la = (u32)la;
                usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
                                                addr.seg);
                if (!usable)
@@ -689,7 +690,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
                        if (size > *max_size)
                                goto bad;
                }
-               la &= (u32)-1;
                break;
        }
        if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
index 6c9fed957cce1c897f0a490ec74c3f4e4f76d4dc..2ce4f05e81d3804cef4ded542a105a5a917b8bed 100644 (file)
@@ -249,7 +249,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
                        return ret;
 
                kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
-               walker->ptes[level] = pte;
+               walker->ptes[level - 1] = pte;
        }
        return 0;
 }
index 4244c2baf57da55aa5dd266b63781642dcc8af12..f4891f2ece2330ed9d84bb48725a8ac9d377430d 100644 (file)
@@ -2752,6 +2752,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        }
 
        kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+       vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
index 982ce34f4a9bf66011fc2652b45466d9c2b276f9..27f89c79a44b7da6fc6aef54e3961426e542e7ed 100644 (file)
@@ -232,17 +232,31 @@ ENDPROC(copy_user_enhanced_fast_string)
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
+ * This will force destination out of cache for more performance.
+ *
+ * Note: Cached memory copy is used when destination or size is not
+ * naturally aligned. That is:
+ *  - Require 8-byte alignment when size is 8 bytes or larger.
+ *  - Require 4-byte alignment when size is 4 bytes.
  */
 ENTRY(__copy_user_nocache)
        ASM_STAC
+
+       /* If size is less than 8 bytes, go to 4-byte copy */
        cmpl $8,%edx
-       jb 20f          /* less then 8 bytes, go to byte copy loop */
+       jb .L_4b_nocache_copy_entry
+
+       /* If destination is not 8-byte aligned, "cache" copy to align it */
        ALIGN_DESTINATION
+
+       /* Set 4x8-byte copy count and remainder */
        movl %edx,%ecx
        andl $63,%edx
        shrl $6,%ecx
-       jz 17f
+       jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
+
+       /* Perform 4x8-byte nocache loop-copy */
+.L_4x8b_nocache_copy_loop:
 1:     movq (%rsi),%r8
 2:     movq 1*8(%rsi),%r9
 3:     movq 2*8(%rsi),%r10
@@ -262,60 +276,106 @@ ENTRY(__copy_user_nocache)
        leaq 64(%rsi),%rsi
        leaq 64(%rdi),%rdi
        decl %ecx
-       jnz 1b
-17:    movl %edx,%ecx
+       jnz .L_4x8b_nocache_copy_loop
+
+       /* Set 8-byte copy count and remainder */
+.L_8b_nocache_copy_entry:
+       movl %edx,%ecx
        andl $7,%edx
        shrl $3,%ecx
-       jz 20f
-18:    movq (%rsi),%r8
-19:    movnti %r8,(%rdi)
+       jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
+
+       /* Perform 8-byte nocache loop-copy */
+.L_8b_nocache_copy_loop:
+20:    movq (%rsi),%r8
+21:    movnti %r8,(%rdi)
        leaq 8(%rsi),%rsi
        leaq 8(%rdi),%rdi
        decl %ecx
-       jnz 18b
-20:    andl %edx,%edx
-       jz 23f
+       jnz .L_8b_nocache_copy_loop
+
+       /* If no byte left, we're done */
+.L_4b_nocache_copy_entry:
+       andl %edx,%edx
+       jz .L_finish_copy
+
+       /* If destination is not 4-byte aligned, go to byte copy: */
+       movl %edi,%ecx
+       andl $3,%ecx
+       jnz .L_1b_cache_copy_entry
+
+       /* Set 4-byte copy count (1 or 0) and remainder */
        movl %edx,%ecx
-21:    movb (%rsi),%al
-22:    movb %al,(%rdi)
+       andl $3,%edx
+       shrl $2,%ecx
+       jz .L_1b_cache_copy_entry       /* jump if count is 0 */
+
+       /* Perform 4-byte nocache copy: */
+30:    movl (%rsi),%r8d
+31:    movnti %r8d,(%rdi)
+       leaq 4(%rsi),%rsi
+       leaq 4(%rdi),%rdi
+
+       /* If no bytes left, we're done: */
+       andl %edx,%edx
+       jz .L_finish_copy
+
+       /* Perform byte "cache" loop-copy for the remainder */
+.L_1b_cache_copy_entry:
+       movl %edx,%ecx
+.L_1b_cache_copy_loop:
+40:    movb (%rsi),%al
+41:    movb %al,(%rdi)
        incq %rsi
        incq %rdi
        decl %ecx
-       jnz 21b
-23:    xorl %eax,%eax
+       jnz .L_1b_cache_copy_loop
+
+       /* Finished copying; fence the prior stores */
+.L_finish_copy:
+       xorl %eax,%eax
        ASM_CLAC
        sfence
        ret
 
        .section .fixup,"ax"
-30:    shll $6,%ecx
+.L_fixup_4x8b_copy:
+       shll $6,%ecx
        addl %ecx,%edx
-       jmp 60f
-40:    lea (%rdx,%rcx,8),%rdx
-       jmp 60f
-50:    movl %ecx,%edx
-60:    sfence
+       jmp .L_fixup_handle_tail
+.L_fixup_8b_copy:
+       lea (%rdx,%rcx,8),%rdx
+       jmp .L_fixup_handle_tail
+.L_fixup_4b_copy:
+       lea (%rdx,%rcx,4),%rdx
+       jmp .L_fixup_handle_tail
+.L_fixup_1b_copy:
+       movl %ecx,%edx
+.L_fixup_handle_tail:
+       sfence
        jmp copy_user_handle_tail
        .previous
 
-       _ASM_EXTABLE(1b,30b)
-       _ASM_EXTABLE(2b,30b)
-       _ASM_EXTABLE(3b,30b)
-       _ASM_EXTABLE(4b,30b)
-       _ASM_EXTABLE(5b,30b)
-       _ASM_EXTABLE(6b,30b)
-       _ASM_EXTABLE(7b,30b)
-       _ASM_EXTABLE(8b,30b)
-       _ASM_EXTABLE(9b,30b)
-       _ASM_EXTABLE(10b,30b)
-       _ASM_EXTABLE(11b,30b)
-       _ASM_EXTABLE(12b,30b)
-       _ASM_EXTABLE(13b,30b)
-       _ASM_EXTABLE(14b,30b)
-       _ASM_EXTABLE(15b,30b)
-       _ASM_EXTABLE(16b,30b)
-       _ASM_EXTABLE(18b,40b)
-       _ASM_EXTABLE(19b,40b)
-       _ASM_EXTABLE(21b,50b)
-       _ASM_EXTABLE(22b,50b)
+       _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
+       _ASM_EXTABLE(20b,.L_fixup_8b_copy)
+       _ASM_EXTABLE(21b,.L_fixup_8b_copy)
+       _ASM_EXTABLE(30b,.L_fixup_4b_copy)
+       _ASM_EXTABLE(31b,.L_fixup_4b_copy)
+       _ASM_EXTABLE(40b,.L_fixup_1b_copy)
+       _ASM_EXTABLE(41b,.L_fixup_1b_copy)
 ENDPROC(__copy_user_nocache)
index eef44d9a3f77e2acc88baa424683388f67985e24..e830c71a13232f4305adf9ef1eb8ce6ffe201d5b 100644 (file)
@@ -287,6 +287,9 @@ static noinline int vmalloc_fault(unsigned long address)
        if (!pmd_k)
                return -1;
 
+       if (pmd_huge(*pmd_k))
+               return 0;
+
        pte_k = pte_offset_kernel(pmd_k, address);
        if (!pte_present(*pte_k))
                return -1;
@@ -360,8 +363,6 @@ void vmalloc_sync_all(void)
  * 64-bit:
  *
  *   Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
  */
 static noinline int vmalloc_fault(unsigned long address)
 {
@@ -403,17 +404,23 @@ static noinline int vmalloc_fault(unsigned long address)
        if (pud_none(*pud_ref))
                return -1;
 
-       if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+       if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
                BUG();
 
+       if (pud_huge(*pud))
+               return 0;
+
        pmd = pmd_offset(pud, address);
        pmd_ref = pmd_offset(pud_ref, address);
        if (pmd_none(*pmd_ref))
                return -1;
 
-       if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+       if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
                BUG();
 
+       if (pmd_huge(*pmd))
+               return 0;
+
        pte_ref = pte_offset_kernel(pmd_ref, address);
        if (!pte_present(*pte_ref))
                return -1;
index 6d5eb59003721b79213a7661994be1a339bc5590..d8a798d8bf50ac037fa2ac4854085bd78759c249 100644 (file)
@@ -102,7 +102,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
                        return 0;
                }
 
-               page = pte_page(pte);
                if (pte_devmap(pte)) {
                        pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
                        if (unlikely(!pgmap)) {
@@ -115,6 +114,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
                        return 0;
                }
                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+               page = pte_page(pte);
                get_page(page);
                put_dev_pagemap(pgmap);
                SetPageReferenced(page);
index 42982b26e32be693713d90e8fa4e18b8f771eb4a..740d7ac03a552bc4937edfc8ff7e8b9d044a61b6 100644 (file)
@@ -173,10 +173,10 @@ static __init int setup_hugepagesz(char *opt)
 }
 __setup("hugepagesz=", setup_hugepagesz);
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 static __init int gigantic_pages_init(void)
 {
-       /* With CMA we can allocate gigantic pages at runtime */
+       /* With compaction or CMA we can allocate gigantic pages at runtime */
        if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
        return 0;
index 96bd1e2bffafb3b8ffbc28906a2998ad992d50e8..72bb52f93c3d619d0dc6c0355b7ab293a24c3199 100644 (file)
@@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void)
 
        if (mmap_is_ia32())
 #ifdef CONFIG_COMPAT
-               rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 #else
-               rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 #endif
        else
-               rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
        return rnd << PAGE_SHIFT;
 }
index b2fd67da1701433d9168ebf1d0a908330544bd7d..ef05755a190063cdf79210f7ac164aef1a0f4ec5 100644 (file)
@@ -123,7 +123,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
                break;
        }
 
-       if (regno > nr_registers) {
+       if (regno >= nr_registers) {
                WARN_ONCE(1, "decoded an instruction with an invalid register");
                return -EINVAL;
        }
index c3b3f653ed0c6c9112297164c5cf535494f19461..d04f8094bc2323bbaa76095ab09efa2e8357cc7e 100644 (file)
@@ -469,7 +469,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
 {
        int i, nid;
        nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
-       unsigned long start, end;
+       phys_addr_t start, end;
        struct memblock_region *r;
 
        /*
index fc6a4c8f6e2abc79c29803264ad16a48067904e8..9cf96d82147a8da38ce4f57413d75075003cb244 100644 (file)
@@ -33,7 +33,7 @@ struct cpa_data {
        pgd_t           *pgd;
        pgprot_t        mask_set;
        pgprot_t        mask_clr;
-       int             numpages;
+       unsigned long   numpages;
        int             flags;
        unsigned long   pfn;
        unsigned        force_split : 1;
@@ -419,24 +419,30 @@ pmd_t *lookup_pmd_address(unsigned long address)
 phys_addr_t slow_virt_to_phys(void *__virt_addr)
 {
        unsigned long virt_addr = (unsigned long)__virt_addr;
-       unsigned long phys_addr, offset;
+       phys_addr_t phys_addr;
+       unsigned long offset;
        enum pg_level level;
        pte_t *pte;
 
        pte = lookup_address(virt_addr, &level);
        BUG_ON(!pte);
 
+       /*
+        * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+        * before being left-shifted PAGE_SHIFT bits -- this trick is to
+        * make 32-PAE kernel work correctly.
+        */
        switch (level) {
        case PG_LEVEL_1G:
-               phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+               phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
                offset = virt_addr & ~PUD_PAGE_MASK;
                break;
        case PG_LEVEL_2M:
-               phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+               phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
                offset = virt_addr & ~PMD_PAGE_MASK;
                break;
        default:
-               phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+               phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
                offset = virt_addr & ~PAGE_MASK;
        }
 
@@ -1350,7 +1356,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 * CPA operation. Either a large page has been
                 * preserved or a single page update happened.
                 */
-               BUG_ON(cpa->numpages > numpages);
+               BUG_ON(cpa->numpages > numpages || !cpa->numpages);
                numpages -= cpa->numpages;
                if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
                        cpa->curpage++;
index 2879efc73a967bca70606547014decd3703c84bf..d34b5118b4e8e7a30c55d722733d03b00bade013 100644 (file)
@@ -711,28 +711,22 @@ int pcibios_add_device(struct pci_dev *dev)
        return 0;
 }
 
-int pcibios_alloc_irq(struct pci_dev *dev)
+int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-       /*
-        * If the PCI device was already claimed by core code and has
-        * MSI enabled, probing of the pcibios IRQ will overwrite
-        * dev->irq.  So bail out if MSI is already enabled.
-        */
-       if (pci_dev_msi_enabled(dev))
-               return -EBUSY;
+       int err;
 
-       return pcibios_enable_irq(dev);
-}
+       if ((err = pci_enable_resources(dev, mask)) < 0)
+               return err;
 
-void pcibios_free_irq(struct pci_dev *dev)
-{
-       if (pcibios_disable_irq)
-               pcibios_disable_irq(dev);
+       if (!pci_dev_msi_enabled(dev))
+               return pcibios_enable_irq(dev);
+       return 0;
 }
 
-int pcibios_enable_device(struct pci_dev *dev, int mask)
+void pcibios_disable_device (struct pci_dev *dev)
 {
-       return pci_enable_resources(dev, mask);
+       if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+               pcibios_disable_irq(dev);
 }
 
 int pci_ext_cfg_avail(void)
index 0d24e7c101454057f432dd976ea7e3d537e0a38c..8b93e634af84c4698e6f7c6ab718a018e2cdc9db 100644 (file)
@@ -215,7 +215,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
        int polarity;
        int ret;
 
-       if (pci_has_managed_irq(dev))
+       if (dev->irq_managed && dev->irq > 0)
                return 0;
 
        switch (intel_mid_identify_cpu()) {
@@ -256,13 +256,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-       if (pci_has_managed_irq(dev)) {
+       if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+           dev->irq > 0) {
                mp_unmap_irq(dev->irq);
                dev->irq_managed = 0;
-               /*
-                * Don't reset dev->irq here, otherwise
-                * intel_mid_pci_irq_enable() will fail on next call.
-                */
        }
 }
 
index 32e70343e6fdd0e58ab80644284dd01000dccd82..9bd115484745703791c6515b289938546d2478ab 100644 (file)
@@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
                        struct pci_dev *temp_dev;
                        int irq;
 
-                       if (pci_has_managed_irq(dev))
+                       if (dev->irq_managed && dev->irq > 0)
                                return 0;
 
                        irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
@@ -1230,7 +1230,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
                        }
                        dev = temp_dev;
                        if (irq >= 0) {
-                               pci_set_managed_irq(dev, irq);
+                               dev->irq_managed = 1;
+                               dev->irq = irq;
                                dev_info(&dev->dev, "PCI->APIC IRQ transform: "
                                         "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
                                return 0;
@@ -1256,10 +1257,24 @@ static int pirq_enable_irq(struct pci_dev *dev)
        return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+       if (dev->power.is_prepared)
+               return true;
+#ifdef CONFIG_PM
+       if (dev->power.runtime_status == RPM_SUSPENDING)
+               return true;
+#endif
+
+       return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-       if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
+       if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+           dev->irq_managed && dev->irq) {
                mp_unmap_irq(dev->irq);
-               pci_reset_managed_irq(dev);
+               dev->irq = 0;
+               dev->irq_managed = 0;
        }
 }
index ff31ab464213fc49f21d0dde5bdcb14cc0763020..beac4dfdade6c05c02591e924fd913d5db53abcd 100644 (file)
@@ -196,7 +196,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
        return 0;
 
 error:
-       dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+       if (ret == -ENOSYS)
+               dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+       else if (ret)
+               dev_err(&dev->dev, "Xen PCI frontend error: %d!\n", ret);
 free:
        kfree(v);
        return ret;
index 1c7380da65ffab641f0cb0aedc88cb7ef7e8f6cc..2d66db8f80f992d3b0609373502031aacf91f161 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -248,6 +249,16 @@ out:
        return ret;
 }
 
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+       { NULL, "SGI UV1",
+               {       DMI_MATCH(DMI_PRODUCT_NAME,     "Stoutland Platform"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION,  "1.0"),
+                       DMI_MATCH(DMI_BIOS_VENDOR,      "SGI.COM"),
+               }
+       },
+       { } /* NULL entry stops DMI scanning */
+};
+
 void __init efi_apply_memmap_quirks(void)
 {
        /*
@@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void)
                efi_unmap_memmap();
        }
 
-       /*
-        * UV doesn't support the new EFI pagetable mapping yet.
-        */
-       if (is_uv_system())
+       /* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
+       if (dmi_check_system(sgi_uv1_dmi))
                set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
 
index 1bbc21e2e4aeab9fadaaa1f21856ab0ab9a4cea1..90bb997ed0a21889fd95482bfe298862b46eaacf 100644 (file)
@@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void)
                intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
        else {
                intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-               pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+               pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
        }
 
 out:
@@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
        else if (strcmp("lapic_and_apbt", arg) == 0)
                intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
        else {
-               pr_warn("X86 INTEL_MID timer option %s not recognised"
-                          " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-                          arg);
+               pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+                       arg);
                return -EINVAL;
        }
        return 0;
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-
index c1bdafaac3ca828cec43af9e43d12933e4de7ea9..bfadcd0f4944276e39999cd00b86d6c786d1a085 100644 (file)
@@ -220,11 +220,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
                if (imr_is_enabled(&imr)) {
                        base = imr_to_phys(imr.addr_lo);
                        end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+                       size = end - base + 1;
                } else {
                        base = 0;
                        end = 0;
+                       size = 0;
                }
-               size = end - base;
                seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
                           "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
                           &base, &end, size, imr.rmask, imr.wmask,
@@ -579,6 +580,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 {
        phys_addr_t base = virt_to_phys(&_text);
        size_t size = virt_to_phys(&__end_rodata) - base;
+       unsigned long start, end;
        int i;
        int ret;
 
@@ -586,18 +588,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
        for (i = 0; i < idev->max_imr; i++)
                imr_clear(i);
 
+       start = (unsigned long)_text;
+       end = (unsigned long)__end_rodata - 1;
+
        /*
-        * Setup a locked IMR around the physical extent of the kernel
+        * Setup an unlocked IMR around the physical extent of the kernel
         * from the beginning of the .text secton to the end of the
         * .rodata section as one physically contiguous block.
+        *
+        * We don't round up @size since it is already PAGE_SIZE aligned.
+        * See vmlinux.lds.S for details.
         */
-       ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
+       ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
        if (ret < 0) {
-               pr_err("unable to setup IMR for kernel: (%p - %p)\n",
-                       &_text, &__end_rodata);
+               pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        } else {
-               pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
-                       size / 1024, &_text, &__end_rodata);
+               pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        }
 
 }
index 161491d0a879ed11ce2276a1923a60bc1d92479e..0363cd731320d8d9d93cdcd8e3fb5351682e6464 100644 (file)
@@ -88,6 +88,19 @@ config BLK_DEV_INTEGRITY
        T10/SCSI Data Integrity Field or the T13/ATA External Path
        Protection.  If in doubt, say N.
 
+config BLK_DEV_DAX
+       bool "Block device DAX support"
+       depends on FS_DAX
+       depends on BROKEN
+       help
+         When DAX support is available (CONFIG_FS_DAX) raw block
+         devices can also support direct userspace access to the
+         storage capacity via MMAP(2) similar to a file on a
+         DAX-enabled filesystem.  However, the DAX I/O-path disables
+         some standard I/O-statistics, and the MMAP(2) path has some
+         operational differences due to bypassing the page
+         cache.  If in doubt, say N.
+
 config BLK_DEV_THROTTLING
        bool "Block layer bio throttling support"
        depends on BLK_CGROUP=y
index db5f622c9d67e05a4e2a15778c922b367e9cd82b..9eda2322b2d40acfb308a8afa245de04c9b84989 100644 (file)
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
                        blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
                        blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-                       blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
+                       blk-lib.o blk-mq.o blk-mq-tag.o \
                        blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
                        genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
                        badblocks.o partitions/
index f6325d573c10a42c673f844e4fb9585a0d9bd292..711e4d8de6fa06020432da0f37a9edcdd67a6151 100644 (file)
@@ -66,7 +66,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
        }
 
        if (unlikely(!bip))
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        memset(bip, 0, sizeof(*bip));
 
@@ -89,7 +89,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
        return bip;
 err:
        mempool_free(bip, bs->bio_integrity_pool);
-       return NULL;
+       return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(bio_integrity_alloc);
 
@@ -298,10 +298,10 @@ int bio_integrity_prep(struct bio *bio)
 
        /* Allocate bio integrity payload and integrity vectors */
        bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
-       if (unlikely(bip == NULL)) {
+       if (IS_ERR(bip)) {
                printk(KERN_ERR "could not allocate data integrity bioset\n");
                kfree(buf);
-               return -EIO;
+               return PTR_ERR(bip);
        }
 
        bip->bip_flags |= BIP_BLOCK_INTEGRITY;
@@ -465,9 +465,8 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
        BUG_ON(bip_src == NULL);
 
        bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
-
-       if (bip == NULL)
-               return -EIO;
+       if (IS_ERR(bip))
+               return PTR_ERR(bip);
 
        memcpy(bip->bip_vec, bip_src->bip_vec,
               bip_src->bip_vcnt * sizeof(struct bio_vec));
index dbabd48b1934739bba125827e2019414cf34adee..cf7591551b1716b74fa3765cb1c271a554e8ef56 100644 (file)
@@ -874,7 +874,7 @@ int submit_bio_wait(int rw, struct bio *bio)
        bio->bi_private = &ret;
        bio->bi_end_io = submit_bio_wait_endio;
        submit_bio(rw, bio);
-       wait_for_completion(&ret.event);
+       wait_for_completion_io(&ret.event);
 
        return ret.error;
 }
@@ -1090,9 +1090,12 @@ int bio_uncopy_user(struct bio *bio)
        if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
                /*
                 * if we're in a workqueue, the request is orphaned, so
-                * don't copy into a random user address space, just free.
+                * don't copy into a random user address space, just free
+                * and return -EINTR so user space doesn't expect any data.
                 */
-               if (current->mm && bio_data_dir(bio) == READ)
+               if (!current->mm)
+                       ret = -EINTR;
+               else if (bio_data_dir(bio) == READ)
                        ret = bio_copy_to_iter(bio, bmd->iter);
                if (bmd->is_our_pages)
                        bio_free_pages(bio);
index 5a37188b559fba8feb1da48b00d5290c3a6a09b6..66e6f1aae02eeb14de30ada1e6696622082729c1 100644 (file)
@@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 {
        struct gendisk *disk;
        struct blkcg_gq *blkg;
+       struct module *owner;
        unsigned int major, minor;
        int key_len, part, ret;
        char *body;
@@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
        if (!disk)
                return -ENODEV;
        if (part) {
+               owner = disk->fops->owner;
                put_disk(disk);
+               module_put(owner);
                return -ENODEV;
        }
 
@@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
                ret = PTR_ERR(blkg);
                rcu_read_unlock();
                spin_unlock_irq(disk->queue->queue_lock);
+               owner = disk->fops->owner;
                put_disk(disk);
+               module_put(owner);
                /*
                 * If queue was bypassing, we should retry.  Do so after a
                 * short msleep().  It isn't strictly necessary but queue
@@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
        __releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
+       struct module *owner;
+
        spin_unlock_irq(ctx->disk->queue->queue_lock);
        rcu_read_unlock();
+       owner = ctx->disk->fops->owner;
        put_disk(ctx->disk);
+       module_put(owner);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
index 476244d5930917401dd6d41a314962a9edc6e6b3..b83d29755b5a04509edd3ebfe70f89f354a09932 100644 (file)
@@ -680,6 +680,13 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
        wake_up_all(&q->mq_freeze_wq);
 }
 
+static void blk_rq_timed_out_timer(unsigned long data)
+{
+       struct request_queue *q = (struct request_queue *)data;
+
+       kblockd_schedule_work(&q->timeout_work);
+}
+
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
        struct request_queue *q;
@@ -841,6 +848,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
        if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
                goto fail;
 
+       INIT_WORK(&q->timeout_work, blk_timeout_work);
        q->request_fn           = rfn;
        q->prep_rq_fn           = NULL;
        q->unprep_rq_fn         = NULL;
@@ -2447,14 +2455,16 @@ struct request *blk_peek_request(struct request_queue *q)
 
                        rq = NULL;
                        break;
-               } else if (ret == BLKPREP_KILL) {
+               } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
+                       int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
+
                        rq->cmd_flags |= REQ_QUIET;
                        /*
                         * Mark this request as started so we don't trigger
                         * any debug logic in the end I/O path.
                         */
                        blk_start_request(rq);
-                       __blk_end_request_all(rq, -EIO);
+                       __blk_end_request_all(rq, err);
                } else {
                        printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
                        break;
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
deleted file mode 100644 (file)
index 0736729..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Functions related to interrupt-poll handling in the block layer. This
- * is similar to NAPI for network devices.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/blk-iopoll.h>
-#include <linux/delay.h>
-
-#include "blk.h"
-
-static unsigned int blk_iopoll_budget __read_mostly = 256;
-
-static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
-
-/**
- * blk_iopoll_sched - Schedule a run of the iopoll handler
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Add this blk_iopoll structure to the pending poll list and trigger the
- *     raise of the blk iopoll softirq. The driver must already have gotten a
- *     successful return from blk_iopoll_sched_prep() before calling this.
- **/
-void blk_iopoll_sched(struct blk_iopoll *iop)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
-       __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_sched);
-
-/**
- * __blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     See blk_iopoll_complete(). This function must be called with interrupts
- *     disabled.
- **/
-void __blk_iopoll_complete(struct blk_iopoll *iop)
-{
-       list_del(&iop->list);
-       smp_mb__before_atomic();
-       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(__blk_iopoll_complete);
-
-/**
- * blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     If a driver consumes less than the assigned budget in its run of the
- *     iopoll handler, it'll end the polled mode by calling this function. The
- *     iopoll handler will not be invoked again before blk_iopoll_sched_prep()
- *     is called.
- **/
-void blk_iopoll_complete(struct blk_iopoll *iop)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __blk_iopoll_complete(iop);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_complete);
-
-static void blk_iopoll_softirq(struct softirq_action *h)
-{
-       struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
-       int rearm = 0, budget = blk_iopoll_budget;
-       unsigned long start_time = jiffies;
-
-       local_irq_disable();
-
-       while (!list_empty(list)) {
-               struct blk_iopoll *iop;
-               int work, weight;
-
-               /*
-                * If softirq window is exhausted then punt.
-                */
-               if (budget <= 0 || time_after(jiffies, start_time)) {
-                       rearm = 1;
-                       break;
-               }
-
-               local_irq_enable();
-
-               /* Even though interrupts have been re-enabled, this
-                * access is safe because interrupts can only add new
-                * entries to the tail of this list, and only ->poll()
-                * calls can remove this head entry from the list.
-                */
-               iop = list_entry(list->next, struct blk_iopoll, list);
-
-               weight = iop->weight;
-               work = 0;
-               if (test_bit(IOPOLL_F_SCHED, &iop->state))
-                       work = iop->poll(iop, weight);
-
-               budget -= work;
-
-               local_irq_disable();
-
-               /*
-                * Drivers must not modify the iopoll state, if they
-                * consume their assigned weight (or more, some drivers can't
-                * easily just stop processing, they have to complete an
-                * entire mask of commands).In such cases this code
-                * still "owns" the iopoll instance and therefore can
-                * move the instance around on the list at-will.
-                */
-               if (work >= weight) {
-                       if (blk_iopoll_disable_pending(iop))
-                               __blk_iopoll_complete(iop);
-                       else
-                               list_move_tail(&iop->list, list);
-               }
-       }
-
-       if (rearm)
-               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-
-       local_irq_enable();
-}
-
-/**
- * blk_iopoll_disable - Disable iopoll on this @iop
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Disable io polling and wait for any pending callbacks to have completed.
- **/
-void blk_iopoll_disable(struct blk_iopoll *iop)
-{
-       set_bit(IOPOLL_F_DISABLE, &iop->state);
-       while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
-               msleep(1);
-       clear_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_disable);
-
-/**
- * blk_iopoll_enable - Enable iopoll on this @iop
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Enable iopoll on this @iop. Note that the handler run will not be
- *     scheduled, it will only mark it as active.
- **/
-void blk_iopoll_enable(struct blk_iopoll *iop)
-{
-       BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
-       smp_mb__before_atomic();
-       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_enable);
-
-/**
- * blk_iopoll_init - Initialize this @iop
- * @iop:      The parent iopoll structure
- * @weight:   The default weight (or command completion budget)
- * @poll_fn:  The handler to invoke
- *
- * Description:
- *     Initialize this blk_iopoll structure. Before being actively used, the
- *     driver must call blk_iopoll_enable().
- **/
-void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
-{
-       memset(iop, 0, sizeof(*iop));
-       INIT_LIST_HEAD(&iop->list);
-       iop->weight = weight;
-       iop->poll = poll_fn;
-       set_bit(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_init);
-
-static int blk_iopoll_cpu_notify(struct notifier_block *self,
-                                unsigned long action, void *hcpu)
-{
-       /*
-        * If a CPU goes away, splice its entries to the current CPU
-        * and trigger a run of the softirq
-        */
-       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-               int cpu = (unsigned long) hcpu;
-
-               local_irq_disable();
-               list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
-                                this_cpu_ptr(&blk_cpu_iopoll));
-               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-               local_irq_enable();
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block blk_iopoll_cpu_notifier = {
-       .notifier_call  = blk_iopoll_cpu_notify,
-};
-
-static __init int blk_iopoll_setup(void)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
-
-       open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
-       register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
-       return 0;
-}
-subsys_initcall(blk_iopoll_setup);
index 1699df5b0493a721a5ee1945a07c8ec516856164..888a7fec81f715199c3050576516e55b692f2b9b 100644 (file)
@@ -70,6 +70,18 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
        return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
 }
 
+static inline unsigned get_max_io_size(struct request_queue *q,
+                                      struct bio *bio)
+{
+       unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+       unsigned mask = queue_logical_block_size(q) - 1;
+
+       /* aligned to logical block size */
+       sectors &= ~(mask >> 9);
+
+       return sectors;
+}
+
 static struct bio *blk_bio_segment_split(struct request_queue *q,
                                         struct bio *bio,
                                         struct bio_set *bs,
@@ -81,6 +93,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        unsigned front_seg_size = bio->bi_seg_front_size;
        bool do_split = true;
        struct bio *new = NULL;
+       const unsigned max_sectors = get_max_io_size(q, bio);
 
        bio_for_each_segment(bv, bio, iter) {
                /*
@@ -90,20 +103,19 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
                if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
                        goto split;
 
-               if (sectors + (bv.bv_len >> 9) >
-                               blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+               if (sectors + (bv.bv_len >> 9) > max_sectors) {
                        /*
                         * Consider this a new segment if we're splitting in
                         * the middle of this vector.
                         */
                        if (nsegs < queue_max_segments(q) &&
-                           sectors < blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector)) {
+                           sectors < max_sectors) {
                                nsegs++;
-                               sectors = blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector);
+                               sectors = max_sectors;
                        }
-                       goto split;
+                       if (sectors)
+                               goto split;
+                       /* Make this single bvec as the 1st segment */
                }
 
                if (bvprvp && blk_queue_cluster(q)) {
index 6889d7183a2ac1ec31d4132b03647191f8e4ed3a..56c0a726b619374ec9f8cf4b60649a14919f0625 100644 (file)
@@ -599,12 +599,12 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
                 * If a request wasn't started before the queue was
                 * marked dying, kill it here or it'll go unnoticed.
                 */
-               if (unlikely(blk_queue_dying(rq->q)))
-                       blk_mq_complete_request(rq, -EIO);
+               if (unlikely(blk_queue_dying(rq->q))) {
+                       rq->errors = -EIO;
+                       blk_mq_end_request(rq, rq->errors);
+               }
                return;
        }
-       if (rq->cmd_flags & REQ_NO_TIMEOUT)
-               return;
 
        if (time_after_eq(jiffies, rq->deadline)) {
                if (!blk_mark_rq_complete(rq))
@@ -615,15 +615,19 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
        }
 }
 
-static void blk_mq_rq_timer(unsigned long priv)
+static void blk_mq_timeout_work(struct work_struct *work)
 {
-       struct request_queue *q = (struct request_queue *)priv;
+       struct request_queue *q =
+               container_of(work, struct request_queue, timeout_work);
        struct blk_mq_timeout_data data = {
                .next           = 0,
                .next_set       = 0,
        };
        int i;
 
+       if (blk_queue_enter(q, true))
+               return;
+
        blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
 
        if (data.next_set) {
@@ -638,6 +642,7 @@ static void blk_mq_rq_timer(unsigned long priv)
                                blk_mq_tag_idle(hctx);
                }
        }
+       blk_queue_exit(q);
 }
 
 /*
@@ -2008,7 +2013,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
                hctxs[i]->queue_num = i;
        }
 
-       setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
+       INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
        blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
        q->nr_queues = nr_cpu_ids;
index dd49735839789167d427dc8b64da71c6b21f08f5..c7bb666aafd100a329c67b97e616c9f9037c508e 100644 (file)
@@ -91,8 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
        lim->virt_boundary_mask = 0;
        lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-       lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors =
-               BLK_SAFE_MAX_SECTORS;
+       lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+       lim->max_dev_sectors = 0;
        lim->chunk_sectors = 0;
        lim->max_write_same_sectors = 0;
        lim->max_discard_sectors = 0;
index e140cc487ce11349ff1917e4e866eccf0a18106c..dd93763057ce0187dcc1b81602b75a43310d451d 100644 (file)
@@ -147,10 +147,9 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
 
 static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
 {
-       unsigned long long val;
 
-       val = q->limits.max_hw_discard_sectors << 9;
-       return sprintf(page, "%llu\n", val);
+       return sprintf(page, "%llu\n",
+               (unsigned long long)q->limits.max_hw_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
index 3610af5617488a9dbfec02511972c8f52d75f20f..a30441a200c0952eec31f6694dd05641a54227fa 100644 (file)
@@ -127,13 +127,16 @@ static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout
        }
 }
 
-void blk_rq_timed_out_timer(unsigned long data)
+void blk_timeout_work(struct work_struct *work)
 {
-       struct request_queue *q = (struct request_queue *) data;
+       struct request_queue *q =
+               container_of(work, struct request_queue, timeout_work);
        unsigned long flags, next = 0;
        struct request *rq, *tmp;
        int next_set = 0;
 
+       if (blk_queue_enter(q, true))
+               return;
        spin_lock_irqsave(q->queue_lock, flags);
 
        list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
@@ -143,6 +146,7 @@ void blk_rq_timed_out_timer(unsigned long data)
                mod_timer(&q->timeout, round_jiffies_up(next));
 
        spin_unlock_irqrestore(q->queue_lock, flags);
+       blk_queue_exit(q);
 }
 
 /**
@@ -193,9 +197,6 @@ void blk_add_timer(struct request *req)
        struct request_queue *q = req->q;
        unsigned long expiry;
 
-       if (req->cmd_flags & REQ_NO_TIMEOUT)
-               return;
-
        /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
        if (!q->mq_ops && !q->rq_timed_out_fn)
                return;
index c43926d3d74d0578216d1a699b1dfefff2a899df..70e4aee9cdcb6ead5974dce6a5abf454344b5b24 100644 (file)
@@ -93,7 +93,7 @@ static inline void blk_flush_integrity(void)
 }
 #endif
 
-void blk_rq_timed_out_timer(unsigned long data);
+void blk_timeout_work(struct work_struct *work);
 unsigned long blk_rq_timeout(unsigned long timeout);
 void blk_add_timer(struct request *req);
 void blk_delete_timer(struct request *);
index a753df2b3fc29324a850442966b15199bc932508..d0dd7882d8c7fa7ffe80ea2e9a6ad3c0f5e97a68 100644 (file)
@@ -39,7 +39,6 @@ struct deadline_data {
         */
        struct request *next_rq[2];
        unsigned int batching;          /* number of sequential requests made */
-       sector_t last_sector;           /* head position */
        unsigned int starved;           /* times reads have starved writes */
 
        /*
@@ -210,8 +209,6 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
        dd->next_rq[WRITE] = NULL;
        dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-       dd->last_sector = rq_end_sector(rq);
-
        /*
         * take it off the sort and fifo list, move
         * to dispatch queue
index 2c84683aada56e4b52c9080b88ce99eb1d7b68c8..d8996bbd7f12805b9c74429813f5b3ee3d744bdc 100644 (file)
@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
 
        return true;
 }
-
-static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
-{
-       unsigned long arg;
-       int rc = 0;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       if (get_user(arg, (int __user *)(argp)))
-               return -EFAULT;
-       arg = !!arg;
-       if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
-               return 0;
-
-       if (arg)
-               arg = S_DAX;
-
-       if (arg && !blkdev_dax_capable(bdev))
-               return -ENOTTY;
-
-       mutex_lock(&bdev->bd_inode->i_mutex);
-       if (bdev->bd_map_count == 0)
-               inode_set_flags(bdev->bd_inode, arg, S_DAX);
-       else
-               rc = -EBUSY;
-       mutex_unlock(&bdev->bd_inode->i_mutex);
-       return rc;
-}
-#else
-static int blkdev_daxset(struct block_device *bdev, int arg)
-{
-       if (arg)
-               return -ENOTTY;
-       return 0;
-}
 #endif
 
 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
        case BLKTRACESETUP:
        case BLKTRACETEARDOWN:
                return blk_trace_ioctl(bdev, cmd, argp);
-       case BLKDAXSET:
-               return blkdev_daxset(bdev, arg);
        case BLKDAXGET:
                return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
                break;
index 746935a5973ca6c76b8f66cfff8d58ce50566994..fefd01b496a0852754d9a586c9b0dd6d940ff746 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kmod.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
+#include <linux/dax.h>
 #include <linux/blktrace_api.h>
 
 #include "partitions/check.h"
@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
        return 0;
 }
 
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+       return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
+                       NULL);
+}
+
+unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+{
        struct page *page;
 
-       page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-                                NULL);
+       /* don't populate page cache for dax capable devices */
+       if (IS_DAX(bdev->bd_inode))
+               page = read_dax_sector(bdev, n);
+       else
+               page = read_pagecache_sector(bdev, n);
+
        if (!IS_ERR(page)) {
                if (PageError(page))
                        goto fail;
index 7240821137fde371f0e32d9ae8ac25b64ab58862..3be07ad1d80dc835789c92ba020b58809010473a 100644 (file)
@@ -472,11 +472,13 @@ config CRYPTO_CRCT10DIF_PCLMUL
 config CRYPTO_GHASH
        tristate "GHASH digest algorithm"
        select CRYPTO_GF128MUL
+       select CRYPTO_HASH
        help
          GHASH is message digest algorithm for GCM (Galois/Counter Mode).
 
 config CRYPTO_POLY1305
        tristate "Poly1305 authenticator algorithm"
+       select CRYPTO_HASH
        help
          Poly1305 authenticator algorithm, RFC7539.
 
index a8e7aa3e257bbc3c6254b82d2966b9fde3031184..f5e18c2a48527bb3f5bbdc5202b37577689710b3 100644 (file)
@@ -76,6 +76,8 @@ int af_alg_register_type(const struct af_alg_type *type)
                goto unlock;
 
        type->ops->owner = THIS_MODULE;
+       if (type->ops_nokey)
+               type->ops_nokey->owner = THIS_MODULE;
        node->type = type;
        list_add(&node->list, &alg_types);
        err = 0;
@@ -125,6 +127,26 @@ int af_alg_release(struct socket *sock)
 }
 EXPORT_SYMBOL_GPL(af_alg_release);
 
+void af_alg_release_parent(struct sock *sk)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       unsigned int nokey = ask->nokey_refcnt;
+       bool last = nokey && !ask->refcnt;
+
+       sk = ask->parent;
+       ask = alg_sk(sk);
+
+       lock_sock(sk);
+       ask->nokey_refcnt -= nokey;
+       if (!last)
+               last = !--ask->refcnt;
+       release_sock(sk);
+
+       if (last)
+               sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_release_parent);
+
 static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
        const u32 forbidden = CRYPTO_ALG_INTERNAL;
@@ -133,6 +155,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct sockaddr_alg *sa = (void *)uaddr;
        const struct af_alg_type *type;
        void *private;
+       int err;
 
        if (sock->state == SS_CONNECTED)
                return -EINVAL;
@@ -160,16 +183,22 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                return PTR_ERR(private);
        }
 
+       err = -EBUSY;
        lock_sock(sk);
+       if (ask->refcnt | ask->nokey_refcnt)
+               goto unlock;
 
        swap(ask->type, type);
        swap(ask->private, private);
 
+       err = 0;
+
+unlock:
        release_sock(sk);
 
        alg_do_release(type, private);
 
-       return 0;
+       return err;
 }
 
 static int alg_setkey(struct sock *sk, char __user *ukey,
@@ -202,11 +231,15 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
        const struct af_alg_type *type;
-       int err = -ENOPROTOOPT;
+       int err = -EBUSY;
 
        lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock;
+
        type = ask->type;
 
+       err = -ENOPROTOOPT;
        if (level != SOL_ALG || !type)
                goto unlock;
 
@@ -238,6 +271,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
        struct alg_sock *ask = alg_sk(sk);
        const struct af_alg_type *type;
        struct sock *sk2;
+       unsigned int nokey;
        int err;
 
        lock_sock(sk);
@@ -257,20 +291,29 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
        security_sk_clone(sk, sk2);
 
        err = type->accept(ask->private, sk2);
-       if (err) {
-               sk_free(sk2);
+
+       nokey = err == -ENOKEY;
+       if (nokey && type->accept_nokey)
+               err = type->accept_nokey(ask->private, sk2);
+
+       if (err)
                goto unlock;
-       }
 
        sk2->sk_family = PF_ALG;
 
-       sock_hold(sk);
+       if (nokey || !ask->refcnt++)
+               sock_hold(sk);
+       ask->nokey_refcnt += nokey;
        alg_sk(sk2)->parent = sk;
        alg_sk(sk2)->type = type;
+       alg_sk(sk2)->nokey_refcnt = nokey;
 
        newsock->ops = type->ops;
        newsock->state = SS_CONNECTED;
 
+       if (nokey)
+               newsock->ops = type->ops_nokey;
+
        err = 0;
 
 unlock:
index 9c1dc8d6106a89a0f853271c1dfc49cd301ec983..d19b52324cf520ee777743ee895efb2f537f5e62 100644 (file)
@@ -451,6 +451,7 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
        struct ahash_alg *alg = crypto_ahash_alg(hash);
 
        hash->setkey = ahash_nosetkey;
+       hash->has_setkey = false;
        hash->export = ahash_no_export;
        hash->import = ahash_no_import;
 
@@ -463,8 +464,10 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
        hash->finup = alg->finup ?: ahash_def_finup;
        hash->digest = alg->digest;
 
-       if (alg->setkey)
+       if (alg->setkey) {
                hash->setkey = alg->setkey;
+               hash->has_setkey = true;
+       }
        if (alg->export)
                hash->export = alg->export;
        if (alg->import)
index b4c24fe3dcfb5ed575350406c9dd7ece3c006aef..68a5ceaa04c81072f453a7b2ca2605bed39bd5a1 100644 (file)
@@ -34,6 +34,11 @@ struct hash_ctx {
        struct ahash_request req;
 };
 
+struct algif_hash_tfm {
+       struct crypto_ahash *hash;
+       bool has_key;
+};
+
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
                        size_t ignored)
 {
@@ -49,7 +54,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        if (!ctx->more) {
-               err = crypto_ahash_init(&ctx->req);
+               err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
+                                               &ctx->completion);
                if (err)
                        goto unlock;
        }
@@ -120,6 +126,7 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
        } else {
                if (!ctx->more) {
                        err = crypto_ahash_init(&ctx->req);
+                       err = af_alg_wait_for_completion(err, &ctx->completion);
                        if (err)
                                goto unlock;
                }
@@ -235,19 +242,151 @@ static struct proto_ops algif_hash_ops = {
        .accept         =       hash_accept,
 };
 
+static int hash_check_key(struct socket *sock)
+{
+       int err = 0;
+       struct sock *psk;
+       struct alg_sock *pask;
+       struct algif_hash_tfm *tfm;
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+
+       lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock_child;
+
+       psk = ask->parent;
+       pask = alg_sk(ask->parent);
+       tfm = pask->private;
+
+       err = -ENOKEY;
+       lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+       if (!tfm->has_key)
+               goto unlock;
+
+       if (!pask->refcnt++)
+               sock_hold(psk);
+
+       ask->refcnt = 1;
+       sock_put(psk);
+
+       err = 0;
+
+unlock:
+       release_sock(psk);
+unlock_child:
+       release_sock(sk);
+
+       return err;
+}
+
+static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+                             size_t size)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_sendmsg(sock, msg, size);
+}
+
+static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page,
+                                  int offset, size_t size, int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_sendpage(sock, page, offset, size, flags);
+}
+
+static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+                             size_t ignored, int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_recvmsg(sock, msg, ignored, flags);
+}
+
+static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
+                            int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_accept(sock, newsock, flags);
+}
+
+static struct proto_ops algif_hash_ops_nokey = {
+       .family         =       PF_ALG,
+
+       .connect        =       sock_no_connect,
+       .socketpair     =       sock_no_socketpair,
+       .getname        =       sock_no_getname,
+       .ioctl          =       sock_no_ioctl,
+       .listen         =       sock_no_listen,
+       .shutdown       =       sock_no_shutdown,
+       .getsockopt     =       sock_no_getsockopt,
+       .mmap           =       sock_no_mmap,
+       .bind           =       sock_no_bind,
+       .setsockopt     =       sock_no_setsockopt,
+       .poll           =       sock_no_poll,
+
+       .release        =       af_alg_release,
+       .sendmsg        =       hash_sendmsg_nokey,
+       .sendpage       =       hash_sendpage_nokey,
+       .recvmsg        =       hash_recvmsg_nokey,
+       .accept         =       hash_accept_nokey,
+};
+
 static void *hash_bind(const char *name, u32 type, u32 mask)
 {
-       return crypto_alloc_ahash(name, type, mask);
+       struct algif_hash_tfm *tfm;
+       struct crypto_ahash *hash;
+
+       tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+       if (!tfm)
+               return ERR_PTR(-ENOMEM);
+
+       hash = crypto_alloc_ahash(name, type, mask);
+       if (IS_ERR(hash)) {
+               kfree(tfm);
+               return ERR_CAST(hash);
+       }
+
+       tfm->hash = hash;
+
+       return tfm;
 }
 
 static void hash_release(void *private)
 {
-       crypto_free_ahash(private);
+       struct algif_hash_tfm *tfm = private;
+
+       crypto_free_ahash(tfm->hash);
+       kfree(tfm);
 }
 
 static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-       return crypto_ahash_setkey(private, key, keylen);
+       struct algif_hash_tfm *tfm = private;
+       int err;
+
+       err = crypto_ahash_setkey(tfm->hash, key, keylen);
+       tfm->has_key = !err;
+
+       return err;
 }
 
 static void hash_sock_destruct(struct sock *sk)
@@ -261,12 +400,14 @@ static void hash_sock_destruct(struct sock *sk)
        af_alg_release_parent(sk);
 }
 
-static int hash_accept_parent(void *private, struct sock *sk)
+static int hash_accept_parent_nokey(void *private, struct sock *sk)
 {
        struct hash_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
-       unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private);
-       unsigned ds = crypto_ahash_digestsize(private);
+       struct algif_hash_tfm *tfm = private;
+       struct crypto_ahash *hash = tfm->hash;
+       unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
+       unsigned ds = crypto_ahash_digestsize(hash);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
@@ -286,7 +427,7 @@ static int hash_accept_parent(void *private, struct sock *sk)
 
        ask->private = ctx;
 
-       ahash_request_set_tfm(&ctx->req, private);
+       ahash_request_set_tfm(&ctx->req, hash);
        ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
                                   af_alg_complete, &ctx->completion);
 
@@ -295,12 +436,24 @@ static int hash_accept_parent(void *private, struct sock *sk)
        return 0;
 }
 
+static int hash_accept_parent(void *private, struct sock *sk)
+{
+       struct algif_hash_tfm *tfm = private;
+
+       if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash))
+               return -ENOKEY;
+
+       return hash_accept_parent_nokey(private, sk);
+}
+
 static const struct af_alg_type algif_type_hash = {
        .bind           =       hash_bind,
        .release        =       hash_release,
        .setkey         =       hash_setkey,
        .accept         =       hash_accept_parent,
+       .accept_nokey   =       hash_accept_parent_nokey,
        .ops            =       &algif_hash_ops,
+       .ops_nokey      =       &algif_hash_ops_nokey,
        .name           =       "hash",
        .owner          =       THIS_MODULE
 };
index eaa9f9be5b87824446c20f07d6bef14114e866b3..28556fce42671e2f182d5239d3dc6468e5b1d970 100644 (file)
@@ -31,6 +31,11 @@ struct skcipher_sg_list {
        struct scatterlist sg[0];
 };
 
+struct skcipher_tfm {
+       struct crypto_skcipher *skcipher;
+       bool has_key;
+};
+
 struct skcipher_ctx {
        struct list_head tsgl;
        struct af_alg_sgl rsgl;
@@ -60,18 +65,10 @@ struct skcipher_async_req {
        struct skcipher_async_rsgl first_sgl;
        struct list_head list;
        struct scatterlist *tsg;
-       char iv[];
+       atomic_t *inflight;
+       struct skcipher_request req;
 };
 
-#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \
-       crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req)))
-
-#define GET_REQ_SIZE(ctx) \
-       crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req))
-
-#define GET_IV_SIZE(ctx) \
-       crypto_skcipher_ivsize(crypto_skcipher_reqtfm(&ctx->req))
-
 #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
                      sizeof(struct scatterlist) - 1)
 
@@ -97,15 +94,12 @@ static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
 
 static void skcipher_async_cb(struct crypto_async_request *req, int err)
 {
-       struct sock *sk = req->data;
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct skcipher_async_req *sreq = GET_SREQ(req, ctx);
+       struct skcipher_async_req *sreq = req->data;
        struct kiocb *iocb = sreq->iocb;
 
-       atomic_dec(&ctx->inflight);
+       atomic_dec(sreq->inflight);
        skcipher_free_async_sgls(sreq);
-       kfree(req);
+       kzfree(sreq);
        iocb->ki_complete(iocb, err, err);
 }
 
@@ -301,8 +295,11 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
-       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
        unsigned ivsize = crypto_skcipher_ivsize(tfm);
        struct skcipher_sg_list *sgl;
        struct af_alg_control con = {};
@@ -387,7 +384,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 
                sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
                sg = sgl->sg;
-               sg_unmark_end(sg + sgl->cur);
+               if (sgl->cur)
+                       sg_unmark_end(sg + sgl->cur - 1);
                do {
                        i = sgl->cur;
                        plen = min_t(size_t, len, PAGE_SIZE);
@@ -503,37 +501,43 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
        struct skcipher_sg_list *sgl;
        struct scatterlist *sg;
        struct skcipher_async_req *sreq;
        struct skcipher_request *req;
        struct skcipher_async_rsgl *last_rsgl = NULL;
-       unsigned int txbufs = 0, len = 0, tx_nents = skcipher_all_sg_nents(ctx);
-       unsigned int reqlen = sizeof(struct skcipher_async_req) +
-                               GET_REQ_SIZE(ctx) + GET_IV_SIZE(ctx);
+       unsigned int txbufs = 0, len = 0, tx_nents;
+       unsigned int reqsize = crypto_skcipher_reqsize(tfm);
+       unsigned int ivsize = crypto_skcipher_ivsize(tfm);
        int err = -ENOMEM;
        bool mark = false;
+       char *iv;
 
-       lock_sock(sk);
-       req = kmalloc(reqlen, GFP_KERNEL);
-       if (unlikely(!req))
-               goto unlock;
+       sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
+       if (unlikely(!sreq))
+               goto out;
 
-       sreq = GET_SREQ(req, ctx);
+       req = &sreq->req;
+       iv = (char *)(req + 1) + reqsize;
        sreq->iocb = msg->msg_iocb;
-       memset(&sreq->first_sgl, '\0', sizeof(struct skcipher_async_rsgl));
        INIT_LIST_HEAD(&sreq->list);
+       sreq->inflight = &ctx->inflight;
+
+       lock_sock(sk);
+       tx_nents = skcipher_all_sg_nents(ctx);
        sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-       if (unlikely(!sreq->tsg)) {
-               kfree(req);
+       if (unlikely(!sreq->tsg))
                goto unlock;
-       }
        sg_init_table(sreq->tsg, tx_nents);
-       memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
-       skcipher_request_set_tfm(req, crypto_skcipher_reqtfm(&ctx->req));
-       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                     skcipher_async_cb, sk);
+       memcpy(iv, ctx->iv, ivsize);
+       skcipher_request_set_tfm(req, tfm);
+       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     skcipher_async_cb, sreq);
 
        while (iov_iter_count(&msg->msg_iter)) {
                struct skcipher_async_rsgl *rsgl;
@@ -609,20 +613,22 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
                sg_mark_end(sreq->tsg + txbufs - 1);
 
        skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-                                  len, sreq->iv);
+                                  len, iv);
        err = ctx->enc ? crypto_skcipher_encrypt(req) :
                         crypto_skcipher_decrypt(req);
        if (err == -EINPROGRESS) {
                atomic_inc(&ctx->inflight);
                err = -EIOCBQUEUED;
+               sreq = NULL;
                goto unlock;
        }
 free:
        skcipher_free_async_sgls(sreq);
-       kfree(req);
 unlock:
        skcipher_wmem_wakeup(sk);
        release_sock(sk);
+       kzfree(sreq);
+out:
        return err;
 }
 
@@ -631,9 +637,12 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
-       unsigned bs = crypto_skcipher_blocksize(crypto_skcipher_reqtfm(
-               &ctx->req));
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
+       unsigned bs = crypto_skcipher_blocksize(tfm);
        struct skcipher_sg_list *sgl;
        struct scatterlist *sg;
        int err = -EAGAIN;
@@ -642,13 +651,6 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        while (msg_data_left(msg)) {
-               sgl = list_first_entry(&ctx->tsgl,
-                                      struct skcipher_sg_list, list);
-               sg = sgl->sg;
-
-               while (!sg->length)
-                       sg++;
-
                if (!ctx->used) {
                        err = skcipher_wait_for_data(sk, flags);
                        if (err)
@@ -669,6 +671,13 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
                if (!used)
                        goto free;
 
+               sgl = list_first_entry(&ctx->tsgl,
+                                      struct skcipher_sg_list, list);
+               sg = sgl->sg;
+
+               while (!sg->length)
+                       sg++;
+
                skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
                                           ctx->iv);
 
@@ -748,19 +757,139 @@ static struct proto_ops algif_skcipher_ops = {
        .poll           =       skcipher_poll,
 };
 
+static int skcipher_check_key(struct socket *sock)
+{
+       int err = 0;
+       struct sock *psk;
+       struct alg_sock *pask;
+       struct skcipher_tfm *tfm;
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+
+       lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock_child;
+
+       psk = ask->parent;
+       pask = alg_sk(ask->parent);
+       tfm = pask->private;
+
+       err = -ENOKEY;
+       lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+       if (!tfm->has_key)
+               goto unlock;
+
+       if (!pask->refcnt++)
+               sock_hold(psk);
+
+       ask->refcnt = 1;
+       sock_put(psk);
+
+       err = 0;
+
+unlock:
+       release_sock(psk);
+unlock_child:
+       release_sock(sk);
+
+       return err;
+}
+
+static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+                                 size_t size)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_sendmsg(sock, msg, size);
+}
+
+static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
+                                      int offset, size_t size, int flags)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_sendpage(sock, page, offset, size, flags);
+}
+
+static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+                                 size_t ignored, int flags)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_recvmsg(sock, msg, ignored, flags);
+}
+
+static struct proto_ops algif_skcipher_ops_nokey = {
+       .family         =       PF_ALG,
+
+       .connect        =       sock_no_connect,
+       .socketpair     =       sock_no_socketpair,
+       .getname        =       sock_no_getname,
+       .ioctl          =       sock_no_ioctl,
+       .listen         =       sock_no_listen,
+       .shutdown       =       sock_no_shutdown,
+       .getsockopt     =       sock_no_getsockopt,
+       .mmap           =       sock_no_mmap,
+       .bind           =       sock_no_bind,
+       .accept         =       sock_no_accept,
+       .setsockopt     =       sock_no_setsockopt,
+
+       .release        =       af_alg_release,
+       .sendmsg        =       skcipher_sendmsg_nokey,
+       .sendpage       =       skcipher_sendpage_nokey,
+       .recvmsg        =       skcipher_recvmsg_nokey,
+       .poll           =       skcipher_poll,
+};
+
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 {
-       return crypto_alloc_skcipher(name, type, mask);
+       struct skcipher_tfm *tfm;
+       struct crypto_skcipher *skcipher;
+
+       tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+       if (!tfm)
+               return ERR_PTR(-ENOMEM);
+
+       skcipher = crypto_alloc_skcipher(name, type, mask);
+       if (IS_ERR(skcipher)) {
+               kfree(tfm);
+               return ERR_CAST(skcipher);
+       }
+
+       tfm->skcipher = skcipher;
+
+       return tfm;
 }
 
 static void skcipher_release(void *private)
 {
-       crypto_free_skcipher(private);
+       struct skcipher_tfm *tfm = private;
+
+       crypto_free_skcipher(tfm->skcipher);
+       kfree(tfm);
 }
 
 static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-       return crypto_skcipher_setkey(private, key, keylen);
+       struct skcipher_tfm *tfm = private;
+       int err;
+
+       err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
+       tfm->has_key = !err;
+
+       return err;
 }
 
 static void skcipher_wait(struct sock *sk)
@@ -788,24 +917,26 @@ static void skcipher_sock_destruct(struct sock *sk)
        af_alg_release_parent(sk);
 }
 
-static int skcipher_accept_parent(void *private, struct sock *sk)
+static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
        struct skcipher_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
-       unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
+       struct skcipher_tfm *tfm = private;
+       struct crypto_skcipher *skcipher = tfm->skcipher;
+       unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
                return -ENOMEM;
 
-       ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
+       ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
                               GFP_KERNEL);
        if (!ctx->iv) {
                sock_kfree_s(sk, ctx, len);
                return -ENOMEM;
        }
 
-       memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
+       memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 
        INIT_LIST_HEAD(&ctx->tsgl);
        ctx->len = len;
@@ -818,8 +949,9 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
 
        ask->private = ctx;
 
-       skcipher_request_set_tfm(&ctx->req, private);
-       skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+       skcipher_request_set_tfm(&ctx->req, skcipher);
+       skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+                                                CRYPTO_TFM_REQ_MAY_BACKLOG,
                                      af_alg_complete, &ctx->completion);
 
        sk->sk_destruct = skcipher_sock_destruct;
@@ -827,12 +959,24 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
        return 0;
 }
 
+static int skcipher_accept_parent(void *private, struct sock *sk)
+{
+       struct skcipher_tfm *tfm = private;
+
+       if (!tfm->has_key && crypto_skcipher_has_setkey(tfm->skcipher))
+               return -ENOKEY;
+
+       return skcipher_accept_parent_nokey(private, sk);
+}
+
 static const struct af_alg_type algif_type_skcipher = {
        .bind           =       skcipher_bind,
        .release        =       skcipher_release,
        .setkey         =       skcipher_setkey,
        .accept         =       skcipher_accept_parent,
+       .accept_nokey   =       skcipher_accept_parent_nokey,
        .ops            =       &algif_skcipher_ops,
+       .ops_nokey      =       &algif_skcipher_ops_nokey,
        .name           =       "skcipher",
        .owner          =       THIS_MODULE
 };
index 758acabf2d819727ecd7f4cf0c6aa21c1c40649f..8f3056cd03991ddf820c3d8442d35daf3db7ee84 100644 (file)
@@ -547,9 +547,7 @@ int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen,
        struct pkcs7_signed_info *sinfo = ctx->sinfo;
 
        if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) ||
-           !test_bit(sinfo_has_message_digest, &sinfo->aa_set) ||
-           (ctx->msg->data_type == OID_msIndirectData &&
-            !test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set))) {
+           !test_bit(sinfo_has_message_digest, &sinfo->aa_set)) {
                pr_warn("Missing required AuthAttr\n");
                return -EBADMSG;
        }
index 06f1b60f02b223eeea70c000ec4055c470d4eeae..4c0a0e27187694c6d0d7281c1d61b2f25557f269 100644 (file)
@@ -172,4 +172,3 @@ MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CRYPTO("crc32c");
 MODULE_ALIAS_CRYPTO("crc32c-generic");
-MODULE_SOFTDEP("pre: crc32c");
index 237f3795cfaaa1f988fadf5b07eefe3c44609091..43fe85f20d577b4f3d1bbd6576b6d752bc578531 100644 (file)
@@ -499,6 +499,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                if (link->dump == NULL)
                        return -EINVAL;
 
+               down_read(&crypto_alg_sem);
                list_for_each_entry(alg, &crypto_alg_list, cra_list)
                        dump_alloc += CRYPTO_REPORT_MAXSIZE;
 
@@ -508,8 +509,11 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                .done = link->done,
                                .min_dump_alloc = dump_alloc,
                        };
-                       return netlink_dump_start(crypto_nlsk, skb, nlh, &c);
+                       err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
                }
+               up_read(&crypto_alg_sem);
+
+               return err;
        }
 
        err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX,
index ecb1e3d39bf0776a0d8805a9f8fadd3ec288bcb0..359754591653c7b265a5e6b89d70e9a9d16d33e3 100644 (file)
@@ -354,9 +354,10 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
        crt->final = shash_async_final;
        crt->finup = shash_async_finup;
        crt->digest = shash_async_digest;
+       crt->setkey = shash_async_setkey;
+
+       crt->has_setkey = alg->setkey != shash_no_setkey;
 
-       if (alg->setkey)
-               crt->setkey = shash_async_setkey;
        if (alg->export)
                crt->export = shash_async_export;
        if (alg->import)
index 7591928be7ca789cfc909b3ea300fb6f2fd2cb6f..d199c0b1751c91cbcc5a978aadb97cdf609e33ab 100644 (file)
@@ -118,6 +118,7 @@ static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
        skcipher->decrypt = skcipher_decrypt_blkcipher;
 
        skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
+       skcipher->has_setkey = calg->cra_blkcipher.max_keysize;
 
        return 0;
 }
@@ -210,6 +211,7 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
        skcipher->ivsize = crypto_ablkcipher_ivsize(ablkcipher);
        skcipher->reqsize = crypto_ablkcipher_reqsize(ablkcipher) +
                            sizeof(struct ablkcipher_request);
+       skcipher->has_setkey = calg->cra_ablkcipher.max_keysize;
 
        return 0;
 }
index c570b1d9f09480ecec5513d3cfe61d33c26c81b5..0872d5fecb82f2dee893c7feb0ac6a408f903384 100644 (file)
@@ -880,7 +880,7 @@ static int acpi_lpss_platform_notify(struct notifier_block *nb,
                break;
        case BUS_NOTIFY_DRIVER_NOT_BOUND:
        case BUS_NOTIFY_UNBOUND_DRIVER:
-               pdev->dev.pm_domain = NULL;
+               dev_pm_domain_set(&pdev->dev, NULL);
                break;
        case BUS_NOTIFY_ADD_DEVICE:
                dev_pm_domain_set(&pdev->dev, &acpi_lpss_pm_domain);
index 6682c5daf742637024c1f72a19f7de750cb1bbe1..6e6bc1059301745fc1d5fd6ebf943d048082e821 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/hardirq.h>
 #include <linux/pstore.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
 #include <acpi/apei.h>
 
 #include "apei-internal.h"
@@ -532,10 +533,7 @@ retry:
                        return -ENOMEM;
                memcpy(new_entries, entries,
                       erst_record_id_cache.len * sizeof(entries[0]));
-               if (erst_record_id_cache.size < PAGE_SIZE)
-                       kfree(entries);
-               else
-                       vfree(entries);
+               kvfree(entries);
                erst_record_id_cache.entries = entries = new_entries;
                erst_record_id_cache.size = new_size;
        }
index ad6d8c6b777e7c83885cc3f854396808153653f3..fb53db1878544b88d060debd8042dcbeb48db83c 100644 (file)
@@ -469,37 +469,16 @@ static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
        nfit_mem->bdw = NULL;
 }
 
-static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
+static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
                struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
 {
        u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
        struct nfit_memdev *nfit_memdev;
        struct nfit_flush *nfit_flush;
-       struct nfit_dcr *nfit_dcr;
        struct nfit_bdw *nfit_bdw;
        struct nfit_idt *nfit_idt;
        u16 idt_idx, range_index;
 
-       list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
-               if (nfit_dcr->dcr->region_index != dcr)
-                       continue;
-               nfit_mem->dcr = nfit_dcr->dcr;
-               break;
-       }
-
-       if (!nfit_mem->dcr) {
-               dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n",
-                               spa->range_index, __to_nfit_memdev(nfit_mem)
-                               ? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR");
-               return -ENODEV;
-       }
-
-       /*
-        * We've found enough to create an nvdimm, optionally
-        * find an associated BDW
-        */
-       list_add(&nfit_mem->list, &acpi_desc->dimms);
-
        list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
                if (nfit_bdw->bdw->region_index != dcr)
                        continue;
@@ -508,12 +487,12 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
        }
 
        if (!nfit_mem->bdw)
-               return 0;
+               return;
 
        nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
 
        if (!nfit_mem->spa_bdw)
-               return 0;
+               return;
 
        range_index = nfit_mem->spa_bdw->range_index;
        list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
@@ -538,8 +517,6 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
                }
                break;
        }
-
-       return 0;
 }
 
 static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
@@ -548,7 +525,6 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
        struct nfit_mem *nfit_mem, *found;
        struct nfit_memdev *nfit_memdev;
        int type = nfit_spa_type(spa);
-       u16 dcr;
 
        switch (type) {
        case NFIT_SPA_DCR:
@@ -559,14 +535,18 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
        }
 
        list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-               int rc;
+               struct nfit_dcr *nfit_dcr;
+               u32 device_handle;
+               u16 dcr;
 
                if (nfit_memdev->memdev->range_index != spa->range_index)
                        continue;
                found = NULL;
                dcr = nfit_memdev->memdev->region_index;
+               device_handle = nfit_memdev->memdev->device_handle;
                list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
-                       if (__to_nfit_memdev(nfit_mem)->region_index == dcr) {
+                       if (__to_nfit_memdev(nfit_mem)->device_handle
+                                       == device_handle) {
                                found = nfit_mem;
                                break;
                        }
@@ -579,6 +559,31 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
                        if (!nfit_mem)
                                return -ENOMEM;
                        INIT_LIST_HEAD(&nfit_mem->list);
+                       list_add(&nfit_mem->list, &acpi_desc->dimms);
+               }
+
+               list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+                       if (nfit_dcr->dcr->region_index != dcr)
+                               continue;
+                       /*
+                        * Record the control region for the dimm.  For
+                        * the ACPI 6.1 case, where there are separate
+                        * control regions for the pmem vs blk
+                        * interfaces, be sure to record the extended
+                        * blk details.
+                        */
+                       if (!nfit_mem->dcr)
+                               nfit_mem->dcr = nfit_dcr->dcr;
+                       else if (nfit_mem->dcr->windows == 0
+                                       && nfit_dcr->dcr->windows)
+                               nfit_mem->dcr = nfit_dcr->dcr;
+                       break;
+               }
+
+               if (dcr && !nfit_mem->dcr) {
+                       dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
+                                       spa->range_index, dcr);
+                       return -ENODEV;
                }
 
                if (type == NFIT_SPA_DCR) {
@@ -595,6 +600,7 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
                                nfit_mem->idt_dcr = nfit_idt->idt;
                                break;
                        }
+                       nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
                } else {
                        /*
                         * A single dimm may belong to multiple SPA-PM
@@ -603,13 +609,6 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
                         */
                        nfit_mem->memdev_pmem = nfit_memdev->memdev;
                }
-
-               if (found)
-                       continue;
-
-               rc = nfit_mem_add(acpi_desc, nfit_mem, spa);
-               if (rc)
-                       return rc;
        }
 
        return 0;
@@ -1504,9 +1503,7 @@ static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
                case 1:
                        /* ARS unsupported, but we should never get here */
                        return 0;
-               case 2:
-                       return -EINVAL;
-               case 3:
+               case 6:
                        /* ARS is in progress */
                        msleep(1000);
                        break;
@@ -1517,13 +1514,13 @@ static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
 }
 
 static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
-               struct nd_cmd_ars_status *cmd)
+               struct nd_cmd_ars_status *cmd, u32 size)
 {
        int rc;
 
        while (1) {
                rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
-                       sizeof(*cmd));
+                       size);
                if (rc || cmd->status & 0xffff)
                        return -ENXIO;
 
@@ -1538,6 +1535,8 @@ static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
                case 2:
                        /* No ARS performed for the current boot */
                        return 0;
+               case 3:
+                       /* TODO: error list overflow support */
                default:
                        return -ENXIO;
                }
@@ -1581,6 +1580,7 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
        struct nd_cmd_ars_start *ars_start = NULL;
        struct nd_cmd_ars_cap *ars_cap = NULL;
        u64 start, len, cur, remaining;
+       u32 ars_status_size;
        int rc;
 
        ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
@@ -1610,14 +1610,14 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
         * Check if a full-range ARS has been run. If so, use those results
         * without having to start a new ARS.
         */
-       ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
-                       GFP_KERNEL);
+       ars_status_size = ars_cap->max_ars_out;
+       ars_status = kzalloc(ars_status_size, GFP_KERNEL);
        if (!ars_status) {
                rc = -ENOMEM;
                goto out;
        }
 
-       rc = ars_get_status(nd_desc, ars_status);
+       rc = ars_get_status(nd_desc, ars_status, ars_status_size);
        if (rc)
                goto out;
 
@@ -1647,7 +1647,7 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
                if (rc)
                        goto out;
 
-               rc = ars_get_status(nd_desc, ars_status);
+               rc = ars_get_status(nd_desc, ars_status, ars_status_size);
                if (rc)
                        goto out;
 
index d30184c7f3bcb851c62fe410906160b6e802a631..c8e169e46673ae253882872cda7a94249540c3a5 100644 (file)
@@ -406,7 +406,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
                return 0;
        }
 
-       if (pci_has_managed_irq(dev))
+       if (dev->irq_managed && dev->irq > 0)
                return 0;
 
        entry = acpi_pci_irq_lookup(dev, pin);
@@ -451,7 +451,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
                kfree(entry);
                return rc;
        }
-       pci_set_managed_irq(dev, rc);
+       dev->irq = rc;
+       dev->irq_managed = 1;
 
        if (link)
                snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -474,9 +475,17 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
        u8 pin;
 
        pin = dev->pin;
-       if (!pin || !pci_has_managed_irq(dev))
+       if (!pin || !dev->irq_managed || dev->irq <= 0)
                return;
 
+       /* Keep IOAPIC pin configuration when suspending */
+       if (dev->dev.power.is_prepared)
+               return;
+#ifdef CONFIG_PM
+       if (dev->dev.power.runtime_status == RPM_SUSPENDING)
+               return;
+#endif
+
        entry = acpi_pci_irq_lookup(dev, pin);
        if (!entry)
                return;
@@ -496,6 +505,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
        dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
        if (gsi >= 0) {
                acpi_unregister_gsi(gsi);
-               pci_reset_managed_irq(dev);
+               dev->irq_managed = 0;
        }
 }
index fa2863567eed3c944c884245118c0d55d5ec5fdd..ededa909df2fc02eb2b77fb495e43f8e850c5469 100644 (file)
@@ -4,7 +4,6 @@
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2002       Dominik Brodowski <devel@brodo.de>
- *  Copyright (c) 2015, The Linux Foundation. All rights reserved.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
@@ -438,6 +437,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq)
  * enabled system.
  */
 
+#define ACPI_MAX_IRQS          256
 #define ACPI_MAX_ISA_IRQ       16
 
 #define PIRQ_PENALTY_PCI_AVAILABLE     (0)
@@ -447,7 +447,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq)
 #define PIRQ_PENALTY_ISA_USED          (16*16*16*16*16)
 #define PIRQ_PENALTY_ISA_ALWAYS                (16*16*16*16*16*16)
 
-static int acpi_irq_isa_penalty[ACPI_MAX_ISA_IRQ] = {
+static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
        PIRQ_PENALTY_ISA_ALWAYS,        /* IRQ0 timer */
        PIRQ_PENALTY_ISA_ALWAYS,        /* IRQ1 keyboard */
        PIRQ_PENALTY_ISA_ALWAYS,        /* IRQ2 cascade */
@@ -464,68 +464,9 @@ static int acpi_irq_isa_penalty[ACPI_MAX_ISA_IRQ] = {
        PIRQ_PENALTY_ISA_USED,          /* IRQ13 fpe, sometimes */
        PIRQ_PENALTY_ISA_USED,          /* IRQ14 ide0 */
        PIRQ_PENALTY_ISA_USED,          /* IRQ15 ide1 */
+       /* >IRQ15 */
 };
 
-struct irq_penalty_info {
-       int irq;
-       int penalty;
-       struct list_head node;
-};
-
-static LIST_HEAD(acpi_irq_penalty_list);
-
-static int acpi_irq_get_penalty(int irq)
-{
-       struct irq_penalty_info *irq_info;
-
-       if (irq < ACPI_MAX_ISA_IRQ)
-               return acpi_irq_isa_penalty[irq];
-
-       list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
-               if (irq_info->irq == irq)
-                       return irq_info->penalty;
-       }
-
-       return 0;
-}
-
-static int acpi_irq_set_penalty(int irq, int new_penalty)
-{
-       struct irq_penalty_info *irq_info;
-
-       /* see if this is a ISA IRQ */
-       if (irq < ACPI_MAX_ISA_IRQ) {
-               acpi_irq_isa_penalty[irq] = new_penalty;
-               return 0;
-       }
-
-       /* next, try to locate from the dynamic list */
-       list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
-               if (irq_info->irq == irq) {
-                       irq_info->penalty  = new_penalty;
-                       return 0;
-               }
-       }
-
-       /* nope, let's allocate a slot for this IRQ */
-       irq_info = kzalloc(sizeof(*irq_info), GFP_KERNEL);
-       if (!irq_info)
-               return -ENOMEM;
-
-       irq_info->irq = irq;
-       irq_info->penalty = new_penalty;
-       list_add_tail(&irq_info->node, &acpi_irq_penalty_list);
-
-       return 0;
-}
-
-static void acpi_irq_add_penalty(int irq, int penalty)
-{
-       int curpen = acpi_irq_get_penalty(irq);
-
-       acpi_irq_set_penalty(irq, curpen + penalty);
-}
-
 int __init acpi_irq_penalty_init(void)
 {
        struct acpi_pci_link *link;
@@ -546,16 +487,15 @@ int __init acpi_irq_penalty_init(void)
                            link->irq.possible_count;
 
                        for (i = 0; i < link->irq.possible_count; i++) {
-                               if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ) {
-                                       int irqpos = link->irq.possible[i];
-
-                                       acpi_irq_add_penalty(irqpos, penalty);
-                               }
+                               if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ)
+                                       acpi_irq_penalty[link->irq.
+                                                        possible[i]] +=
+                                           penalty;
                        }
 
                } else if (link->irq.active) {
-                       acpi_irq_add_penalty(link->irq.active,
-                                            PIRQ_PENALTY_PCI_POSSIBLE);
+                       acpi_irq_penalty[link->irq.active] +=
+                           PIRQ_PENALTY_PCI_POSSIBLE;
                }
        }
 
@@ -607,12 +547,12 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
                 * the use of IRQs 9, 10, 11, and >15.
                 */
                for (i = (link->irq.possible_count - 1); i >= 0; i--) {
-                       if (acpi_irq_get_penalty(irq) >
-                           acpi_irq_get_penalty(link->irq.possible[i]))
+                       if (acpi_irq_penalty[irq] >
+                           acpi_irq_penalty[link->irq.possible[i]])
                                irq = link->irq.possible[i];
                }
        }
-       if (acpi_irq_get_penalty(irq) >= PIRQ_PENALTY_ISA_ALWAYS) {
+       if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
                printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
                            "Try pci=noacpi or acpi=off\n",
                            acpi_device_name(link->device),
@@ -628,8 +568,7 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
                            acpi_device_bid(link->device));
                return -ENODEV;
        } else {
-               acpi_irq_add_penalty(link->irq.active, PIRQ_PENALTY_PCI_USING);
-
+               acpi_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING;
                printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
                       acpi_device_name(link->device),
                       acpi_device_bid(link->device), link->irq.active);
@@ -839,7 +778,7 @@ static void acpi_pci_link_remove(struct acpi_device *device)
 }
 
 /*
- * modify penalty from cmdline
+ * modify acpi_irq_penalty[] from cmdline
  */
 static int __init acpi_irq_penalty_update(char *str, int used)
 {
@@ -857,10 +796,13 @@ static int __init acpi_irq_penalty_update(char *str, int used)
                if (irq < 0)
                        continue;
 
+               if (irq >= ARRAY_SIZE(acpi_irq_penalty))
+                       continue;
+
                if (used)
-                       acpi_irq_add_penalty(irq, PIRQ_PENALTY_ISA_USED);
+                       acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
                else
-                       acpi_irq_set_penalty(irq, PIRQ_PENALTY_PCI_AVAILABLE);
+                       acpi_irq_penalty[irq] = PIRQ_PENALTY_PCI_AVAILABLE;
 
                if (retval != 2)        /* no next number */
                        break;
@@ -877,15 +819,18 @@ static int __init acpi_irq_penalty_update(char *str, int used)
  */
 void acpi_penalize_isa_irq(int irq, int active)
 {
-       if (irq >= 0)
-               acpi_irq_add_penalty(irq, active ?
-                       PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
+       if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+               if (active)
+                       acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
+               else
+                       acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+       }
 }
 
 bool acpi_isa_irq_available(int irq)
 {
-       return irq >= 0 &&
-               (acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
+       return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
+                           acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
 }
 
 /*
@@ -895,18 +840,13 @@ bool acpi_isa_irq_available(int irq)
  */
 void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
 {
-       int penalty;
-
-       if (irq < 0)
-               return;
-
-       if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
-           polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
-               penalty = PIRQ_PENALTY_ISA_ALWAYS;
-       else
-               penalty = PIRQ_PENALTY_PCI_USING;
-
-       acpi_irq_add_penalty(irq, penalty);
+       if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+               if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
+                   polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
+                       acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS;
+               else
+                       acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+       }
 }
 
 /*
index 90e2d54be526bcc0df8ec41bf292a8016d99ba3d..1316ddd92fac0f6c59f4be85759947a2d1643334 100644 (file)
@@ -135,14 +135,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
                DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
                },
        },
-       {
-       .callback = video_detect_force_vendor,
-       .ident = "Dell Inspiron 5737",
-       .matches = {
-               DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-               DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"),
-               },
-       },
 
        /*
         * These models have a working acpi_video backlight control, and using
index 4a5c9d27905900f7bdc1d978f3ef89116e1d83c9..294ba6f363960617c3d1ac2e6191c81e4b384372 100644 (file)
@@ -4,7 +4,7 @@ config ARM_AMBA
 if ARM_AMBA
 
 config TEGRA_AHB
-       bool "Enable AHB driver for NVIDIA Tegra SoCs"
+       bool
        default y if ARCH_TEGRA
        help
          Adds AHB configuration functionality for NVIDIA Tegra SoCs,
index a39e85f9efa98854768f39b01502274401673957..7d00b7a015ead0182feaace6231a8d7fad719b64 100644 (file)
@@ -2074,7 +2074,7 @@ static int binder_thread_write(struct binder_proc *proc,
                        if (get_user(cookie, (binder_uintptr_t __user *)ptr))
                                return -EFAULT;
 
-                       ptr += sizeof(void *);
+                       ptr += sizeof(cookie);
                        list_for_each_entry(w, &proc->delivered_death, entry) {
                                struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);
 
index 594fcabd22cd16bfcc09626338a3da33481497cc..546a3692774f8d3819ef76d9bcc656ef1af1c99d 100644 (file)
@@ -264,6 +264,26 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */
        { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */
        { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b2), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b3), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b4), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b5), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b6), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19b7), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19bE), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19bF), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c0), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c1), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c2), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c3), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c4), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c5), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c6), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19c7), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */
+       { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */
        { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */
        { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */
        { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */
index a4faa438889c075070084fb1f1cd943d61082a88..a44c75d4c28452969d4012e973f173fce5db19d9 100644 (file)
@@ -250,6 +250,7 @@ enum {
        AHCI_HFLAG_MULTI_MSI            = 0,
        AHCI_HFLAG_MULTI_MSIX           = 0,
 #endif
+       AHCI_HFLAG_WAKE_BEFORE_STOP     = (1 << 22), /* wake before DMA stop */
 
        /* ap->flags bits */
 
index b36cae2fd04b2b2969cfefcfe54f63beadef48e2..e87bcec0fd7c31d9623e54cf84fa6c62b41f2f39 100644 (file)
@@ -317,6 +317,7 @@ static int brcm_ahci_probe(struct platform_device *pdev)
        if (IS_ERR(hpriv))
                return PTR_ERR(hpriv);
        hpriv->plat_data = priv;
+       hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP;
 
        brcm_sata_alpm_init(hpriv);
 
index d61740e78d6dc93a1b17d9e3c9d9425067190a21..402967902cbe341d12e3c0b803bb640f7c1cd48d 100644 (file)
@@ -496,8 +496,8 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv)
                }
        }
 
-       /* fabricate port_map from cap.nr_ports */
-       if (!port_map) {
+       /* fabricate port_map from cap.nr_ports for < AHCI 1.3 */
+       if (!port_map && vers < 0x10300) {
                port_map = (1 << ahci_nr_ports(cap)) - 1;
                dev_warn(dev, "forcing PORTS_IMPL to 0x%x\n", port_map);
 
@@ -593,8 +593,22 @@ EXPORT_SYMBOL_GPL(ahci_start_engine);
 int ahci_stop_engine(struct ata_port *ap)
 {
        void __iomem *port_mmio = ahci_port_base(ap);
+       struct ahci_host_priv *hpriv = ap->host->private_data;
        u32 tmp;
 
+       /*
+        * On some controllers, stopping a port's DMA engine while the port
+        * is in ALPM state (partial or slumber) results in failures on
+        * subsequent DMA engine starts.  For those controllers, put the
+        * port back in active state before stopping its DMA engine.
+        */
+       if ((hpriv->flags & AHCI_HFLAG_WAKE_BEFORE_STOP) &&
+           (ap->link.lpm_policy > ATA_LPM_MAX_POWER) &&
+           ahci_set_lpm(&ap->link, ATA_LPM_MAX_POWER, ATA_LPM_WAKE_ONLY)) {
+               dev_err(ap->host->dev, "Failed to wake up port before engine stop\n");
+               return -EIO;
+       }
+
        tmp = readl(port_mmio + PORT_CMD);
 
        /* check if the HBA is idle */
@@ -689,6 +703,9 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
        void __iomem *port_mmio = ahci_port_base(ap);
 
        if (policy != ATA_LPM_MAX_POWER) {
+               /* wakeup flag only applies to the max power policy */
+               hints &= ~ATA_LPM_WAKE_ONLY;
+
                /*
                 * Disable interrupts on Phy Ready. This keeps us from
                 * getting woken up due to spurious phy ready
@@ -704,7 +721,8 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                u32 cmd = readl(port_mmio + PORT_CMD);
 
                if (policy == ATA_LPM_MAX_POWER || !(hints & ATA_LPM_HIPM)) {
-                       cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE);
+                       if (!(hints & ATA_LPM_WAKE_ONLY))
+                               cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE);
                        cmd |= PORT_CMD_ICC_ACTIVE;
 
                        writel(cmd, port_mmio + PORT_CMD);
@@ -712,6 +730,9 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 
                        /* wait 10ms to be sure we've come out of LPM state */
                        ata_msleep(ap, 10);
+
+                       if (hints & ATA_LPM_WAKE_ONLY)
+                               return 0;
                } else {
                        cmd |= PORT_CMD_ALPE;
                        if (policy == ATA_LPM_MIN_POWER)
index cbb74719d2c1b80d61590ddf3fb2f9c71a31ce06..55e257c268ddde37677cf4ff85472f65e9a63c97 100644 (file)
@@ -4125,6 +4125,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "SAMSUNG CD-ROM SN-124", "N001",      ATA_HORKAGE_NODMA },
        { "Seagate STT20000A", NULL,            ATA_HORKAGE_NODMA },
        { " 2GB ATA Flash Disk", "ADMA428M",    ATA_HORKAGE_NODMA },
+       { "VRFDFC22048UCHC-TE*", NULL,          ATA_HORKAGE_NODMA },
        /* Odd clown on sil3726/4726 PMPs */
        { "Config  Disk",       NULL,           ATA_HORKAGE_DISABLE },
 
index cdf6215a9a22beb93ede75a702797e12c5ad4870..051b6158d1b7f45b7116b9debc98376a9d4d240f 100644 (file)
@@ -997,12 +997,9 @@ static inline int ata_hsm_ok_in_wq(struct ata_port *ap,
 static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 {
        struct ata_port *ap = qc->ap;
-       unsigned long flags;
 
        if (ap->ops->error_handler) {
                if (in_wq) {
-                       spin_lock_irqsave(ap->lock, flags);
-
                        /* EH might have kicked in while host lock is
                         * released.
                         */
@@ -1014,8 +1011,6 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
                                } else
                                        ata_port_freeze(ap);
                        }
-
-                       spin_unlock_irqrestore(ap->lock, flags);
                } else {
                        if (likely(!(qc->err_mask & AC_ERR_HSM)))
                                ata_qc_complete(qc);
@@ -1024,10 +1019,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
                }
        } else {
                if (in_wq) {
-                       spin_lock_irqsave(ap->lock, flags);
                        ata_sff_irq_on(ap);
                        ata_qc_complete(qc);
-                       spin_unlock_irqrestore(ap->lock, flags);
                } else
                        ata_qc_complete(qc);
        }
@@ -1048,9 +1041,10 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 {
        struct ata_link *link = qc->dev->link;
        struct ata_eh_info *ehi = &link->eh_info;
-       unsigned long flags = 0;
        int poll_next;
 
+       lockdep_assert_held(ap->lock);
+
        WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0);
 
        /* Make sure ata_sff_qc_issue() does not throw things
@@ -1112,14 +1106,6 @@ fsm_start:
                        }
                }
 
-               /* Send the CDB (atapi) or the first data block (ata pio out).
-                * During the state transition, interrupt handler shouldn't
-                * be invoked before the data transfer is complete and
-                * hsm_task_state is changed. Hence, the following locking.
-                */
-               if (in_wq)
-                       spin_lock_irqsave(ap->lock, flags);
-
                if (qc->tf.protocol == ATA_PROT_PIO) {
                        /* PIO data out protocol.
                         * send first data block.
@@ -1135,9 +1121,6 @@ fsm_start:
                        /* send CDB */
                        atapi_send_cdb(ap, qc);
 
-               if (in_wq)
-                       spin_unlock_irqrestore(ap->lock, flags);
-
                /* if polling, ata_sff_pio_task() handles the rest.
                 * otherwise, interrupt handler takes over from here.
                 */
@@ -1296,7 +1279,8 @@ fsm_start:
                break;
        default:
                poll_next = 0;
-               BUG();
+               WARN(true, "ata%d: SFF host state machine in invalid state %d",
+                    ap->print_id, ap->hsm_task_state);
        }
 
        return poll_next;
@@ -1361,12 +1345,14 @@ static void ata_sff_pio_task(struct work_struct *work)
        u8 status;
        int poll_next;
 
+       spin_lock_irq(ap->lock);
+
        BUG_ON(ap->sff_pio_task_link == NULL);
        /* qc can be NULL if timeout occurred */
        qc = ata_qc_from_tag(ap, link->active_tag);
        if (!qc) {
                ap->sff_pio_task_link = NULL;
-               return;
+               goto out_unlock;
        }
 
 fsm_start:
@@ -1381,11 +1367,14 @@ fsm_start:
         */
        status = ata_sff_busy_wait(ap, ATA_BUSY, 5);
        if (status & ATA_BUSY) {
+               spin_unlock_irq(ap->lock);
                ata_msleep(ap, 2);
+               spin_lock_irq(ap->lock);
+
                status = ata_sff_busy_wait(ap, ATA_BUSY, 10);
                if (status & ATA_BUSY) {
                        ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE);
-                       return;
+                       goto out_unlock;
                }
        }
 
@@ -1402,6 +1391,8 @@ fsm_start:
         */
        if (poll_next)
                goto fsm_start;
+out_unlock:
+       spin_unlock_irq(ap->lock);
 }
 
 /**
index 89f5cf68d80a143198c2c253d6d21c0d9fb38388..04a1582e80bb83d00fa80c3fb95d325bb165b261 100644 (file)
@@ -206,6 +206,8 @@ static void component_match_release(struct device *master,
                if (mc->release)
                        mc->release(master, mc->data);
        }
+
+       kfree(match->compare);
 }
 
 static void devm_component_match_release(struct device *dev, void *res)
@@ -221,14 +223,14 @@ static int component_match_realloc(struct device *dev,
        if (match->alloc == num)
                return 0;
 
-       new = devm_kmalloc_array(dev, num, sizeof(*new), GFP_KERNEL);
+       new = kmalloc_array(num, sizeof(*new), GFP_KERNEL);
        if (!new)
                return -ENOMEM;
 
        if (match->compare) {
                memcpy(new, match->compare, sizeof(*new) *
                                            min(match->num, num));
-               devm_kfree(dev, match->compare);
+               kfree(match->compare);
        }
        match->compare = new;
        match->alloc = num;
@@ -283,6 +285,24 @@ void component_match_add_release(struct device *master,
 }
 EXPORT_SYMBOL(component_match_add_release);
 
+static void free_master(struct master *master)
+{
+       struct component_match *match = master->match;
+       int i;
+
+       list_del(&master->node);
+
+       if (match) {
+               for (i = 0; i < match->num; i++) {
+                       struct component *c = match->compare[i].component;
+                       if (c)
+                               c->master = NULL;
+               }
+       }
+
+       kfree(master);
+}
+
 int component_master_add_with_match(struct device *dev,
        const struct component_master_ops *ops,
        struct component_match *match)
@@ -309,11 +329,9 @@ int component_master_add_with_match(struct device *dev,
 
        ret = try_to_bring_up_master(master, NULL);
 
-       if (ret < 0) {
-               /* Delete off the list if we weren't successful */
-               list_del(&master->node);
-               kfree(master);
-       }
+       if (ret < 0)
+               free_master(master);
+
        mutex_unlock(&component_mutex);
 
        return ret < 0 ? ret : 0;
@@ -324,25 +342,12 @@ void component_master_del(struct device *dev,
        const struct component_master_ops *ops)
 {
        struct master *master;
-       int i;
 
        mutex_lock(&component_mutex);
        master = __master_find(dev, ops);
        if (master) {
-               struct component_match *match = master->match;
-
                take_down_master(master);
-
-               list_del(&master->node);
-
-               if (match) {
-                       for (i = 0; i < match->num; i++) {
-                               struct component *c = match->compare[i].component;
-                               if (c)
-                                       c->master = NULL;
-                       }
-               }
-               kfree(master);
+               free_master(master);
        }
        mutex_unlock(&component_mutex);
 }
@@ -486,6 +491,8 @@ int component_add(struct device *dev, const struct component_ops *ops)
 
        ret = try_to_bring_up_masters(component);
        if (ret < 0) {
+               if (component->master)
+                       remove_component(component->master, component);
                list_del(&component->node);
 
                kfree(component);
index 68f03141e432a2c5a03358d59ed541ac9ae79529..44a74cf1372c6e710d2b102ac51dd34ee44ffd0e 100644 (file)
@@ -215,9 +215,9 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                newattrs.ia_uid = uid;
                newattrs.ia_gid = gid;
                newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
-               mutex_lock(&d_inode(dentry)->i_mutex);
+               inode_lock(d_inode(dentry));
                notify_change(dentry, &newattrs, NULL);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
 
                /* mark as kernel-created inode */
                d_inode(dentry)->i_private = &thread;
@@ -244,7 +244,7 @@ static int dev_rmdir(const char *name)
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+       inode_unlock(d_inode(parent.dentry));
        path_put(&parent);
        return err;
 }
@@ -321,9 +321,9 @@ static int handle_remove(const char *nodename, struct device *dev)
                        newattrs.ia_mode = stat.mode & ~0777;
                        newattrs.ia_valid =
                                ATTR_UID|ATTR_GID|ATTR_MODE;
-                       mutex_lock(&d_inode(dentry)->i_mutex);
+                       inode_lock(d_inode(dentry));
                        notify_change(dentry, &newattrs, NULL);
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
                        if (!err || err == -ENOENT)
                                deleted = 1;
@@ -332,7 +332,7 @@ static int handle_remove(const char *nodename, struct device *dev)
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+       inode_unlock(d_inode(parent.dentry));
 
        path_put(&parent);
        if (deleted && strchr(nodename, '/'))
index 47c43386786b13229f4fbeb5cbdbfe03a2009804..279e53989374f065d01039caf110a14b95f887e0 100644 (file)
@@ -284,6 +284,7 @@ out_free_priv_data:
 
        return err;
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
 
 /**
  * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
@@ -301,6 +302,7 @@ void platform_msi_domain_free_irqs(struct device *dev)
        msi_domain_free_irqs(dev->msi_domain, dev);
        platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
 
 /**
  * platform_msi_get_host_data - Query the private data associated with
index 73d6e5d39e33e1edef9033d5e97b99ecc50a80cf..f437afa17f2b1d2933ed0882d7e7f291c6d86309 100644 (file)
@@ -558,10 +558,15 @@ static int platform_drv_probe(struct device *_dev)
                return ret;
 
        ret = dev_pm_domain_attach(_dev, true);
-       if (ret != -EPROBE_DEFER && drv->probe) {
-               ret = drv->probe(dev);
-               if (ret)
-                       dev_pm_domain_detach(_dev, true);
+       if (ret != -EPROBE_DEFER) {
+               if (drv->probe) {
+                       ret = drv->probe(dev);
+                       if (ret)
+                               dev_pm_domain_detach(_dev, true);
+               } else {
+                       /* don't fail if just dev_pm_domain_attach failed */
+                       ret = 0;
+               }
        }
 
        if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
index 93ed14cc22524ccdd04301dbe460e0958bb69acf..f6a9ad52cbbff0663e58540f33bb29e08abf0e96 100644 (file)
@@ -146,7 +146,7 @@ void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd)
        if (dev->pm_domain == pd)
                return;
 
-       WARN(device_is_bound(dev),
+       WARN(pd && device_is_bound(dev),
             "PM domains can only be changed for unbound devices\n");
        dev->pm_domain = pd;
        device_pm_check_callbacks(dev);
index 6ac9a7f33b640ae1105a72f76974e8939a30ae45..301b785f9f56f5d00b7f83798b6f7cce53bf0e03 100644 (file)
@@ -162,7 +162,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 
 /**
  * genpd_queue_power_off_work - Queue up the execution of genpd_poweroff().
- * @genpd: PM domait to power off.
+ * @genpd: PM domain to power off.
  *
  * Queue up the execution of genpd_poweroff() unless it's already been done
  * before.
@@ -172,16 +172,15 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
        queue_work(pm_wq, &genpd->power_off_work);
 }
 
-static int genpd_poweron(struct generic_pm_domain *genpd);
-
 /**
- * __genpd_poweron - Restore power to a given PM domain and its masters.
+ * genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
+ * @depth: nesting count for lockdep.
  *
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-static int __genpd_poweron(struct generic_pm_domain *genpd)
+static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 {
        struct gpd_link *link;
        int ret = 0;
@@ -196,11 +195,16 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
         * with it.
         */
        list_for_each_entry(link, &genpd->slave_links, slave_node) {
-               genpd_sd_counter_inc(link->master);
+               struct generic_pm_domain *master = link->master;
+
+               genpd_sd_counter_inc(master);
+
+               mutex_lock_nested(&master->lock, depth + 1);
+               ret = genpd_poweron(master, depth + 1);
+               mutex_unlock(&master->lock);
 
-               ret = genpd_poweron(link->master);
                if (ret) {
-                       genpd_sd_counter_dec(link->master);
+                       genpd_sd_counter_dec(master);
                        goto err;
                }
        }
@@ -223,20 +227,6 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
        return ret;
 }
 
-/**
- * genpd_poweron - Restore power to a given PM domain and its masters.
- * @genpd: PM domain to power up.
- */
-static int genpd_poweron(struct generic_pm_domain *genpd)
-{
-       int ret;
-
-       mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
-       mutex_unlock(&genpd->lock);
-       return ret;
-}
-
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
        return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
@@ -484,7 +474,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
        }
 
        mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
+       ret = genpd_poweron(genpd, 0);
        mutex_unlock(&genpd->lock);
 
        if (ret)
@@ -1339,8 +1329,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
        if (!link)
                return -ENOMEM;
 
-       mutex_lock(&genpd->lock);
-       mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (genpd->status == GPD_STATE_POWER_OFF
            &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1363,8 +1353,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
                genpd_sd_counter_inc(genpd);
 
  out:
-       mutex_unlock(&subdomain->lock);
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
        if (ret)
                kfree(link);
        return ret;
@@ -1385,7 +1375,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
        if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
                return -EINVAL;
 
-       mutex_lock(&genpd->lock);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
                pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1398,22 +1389,19 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
                if (link->slave != subdomain)
                        continue;
 
-               mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
-
                list_del(&link->master_node);
                list_del(&link->slave_node);
                kfree(link);
                if (subdomain->status != GPD_STATE_POWER_OFF)
                        genpd_sd_counter_dec(genpd);
 
-               mutex_unlock(&subdomain->lock);
-
                ret = 0;
                break;
        }
 
 out:
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
 
        return ret;
 }
@@ -1818,8 +1806,10 @@ int genpd_dev_pm_attach(struct device *dev)
 
        dev->pm_domain->detach = genpd_dev_pm_detach;
        dev->pm_domain->sync = genpd_dev_pm_sync;
-       ret = genpd_poweron(pd);
 
+       mutex_lock(&pd->lock);
+       ret = genpd_poweron(pd, 0);
+       mutex_unlock(&pd->lock);
 out:
        return ret ? -EPROBE_DEFER : 0;
 }
index 8812bfb9e3b89256f4a5d1468cf45484b18e4cd1..eea51569f0eb9c9e36f5a4bca90de61c8f833444 100644 (file)
@@ -133,17 +133,17 @@ static int regmap_mmio_gather_write(void *context,
        while (val_size) {
                switch (ctx->val_bytes) {
                case 1:
-                       __raw_writeb(*(u8 *)val, ctx->regs + offset);
+                       writeb(*(u8 *)val, ctx->regs + offset);
                        break;
                case 2:
-                       __raw_writew(*(u16 *)val, ctx->regs + offset);
+                       writew(*(u16 *)val, ctx->regs + offset);
                        break;
                case 4:
-                       __raw_writel(*(u32 *)val, ctx->regs + offset);
+                       writel(*(u32 *)val, ctx->regs + offset);
                        break;
 #ifdef CONFIG_64BIT
                case 8:
-                       __raw_writeq(*(u64 *)val, ctx->regs + offset);
+                       writeq(*(u64 *)val, ctx->regs + offset);
                        break;
 #endif
                default:
@@ -193,17 +193,17 @@ static int regmap_mmio_read(void *context,
        while (val_size) {
                switch (ctx->val_bytes) {
                case 1:
-                       *(u8 *)val = __raw_readb(ctx->regs + offset);
+                       *(u8 *)val = readb(ctx->regs + offset);
                        break;
                case 2:
-                       *(u16 *)val = __raw_readw(ctx->regs + offset);
+                       *(u16 *)val = readw(ctx->regs + offset);
                        break;
                case 4:
-                       *(u32 *)val = __raw_readl(ctx->regs + offset);
+                       *(u32 *)val = readl(ctx->regs + offset);
                        break;
 #ifdef CONFIG_64BIT
                case 8:
-                       *(u64 *)val = __raw_readq(ctx->regs + offset);
+                       *(u64 *)val = readq(ctx->regs + offset);
                        break;
 #endif
                default:
index ad80c85e0857064c1ed3cd6420a1ef582ad76c4a..d048d2009e897a7e76d523ec704b1fd1f77dd76b 100644 (file)
@@ -964,9 +964,9 @@ aoecmd_sleepwork(struct work_struct *work)
                ssize = get_capacity(d->gd);
                bd = bdget_disk(d->gd, 0);
                if (bd) {
-                       mutex_lock(&bd->bd_inode->i_mutex);
+                       inode_lock(bd->bd_inode);
                        i_size_write(bd->bd_inode, (loff_t)ssize<<9);
-                       mutex_unlock(&bd->bd_inode->i_mutex);
+                       inode_unlock(bd->bd_inode);
                        bdput(bd);
                }
                spin_lock_irq(&d->lock);
index b3868e7a1ffddbb57e7165a65b6102d872ca4983..10459a14506224ede01f9bbca97be127397142cd 100644 (file)
@@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
        return need_transaction;
 }
 
-static int al_write_transaction(struct drbd_device *device);
+#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
+/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
+ * are still coupled, or assume too much about their relation.
+ * Code below will not work if this is violated.
+ * Will be cleaned up with some followup patch.
+ */
+# error FIXME
+#endif
+
+static unsigned int al_extent_to_bm_page(unsigned int al_enr)
+{
+       return al_enr >>
+               /* bit to page */
+               ((PAGE_SHIFT + 3) -
+               /* al extent number to bit */
+                (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
+}
+
+static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
+{
+       const unsigned int stripes = device->ldev->md.al_stripes;
+       const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
+
+       /* transaction number, modulo on-disk ring buffer wrap around */
+       unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
+
+       /* ... to aligned 4k on disk block */
+       t = ((t % stripes) * stripe_size_4kB) + t/stripes;
+
+       /* ... to 512 byte sector in activity log */
+       t *= 8;
+
+       /* ... plus offset to the on disk position */
+       return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
+}
+
+static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
+{
+       struct lc_element *e;
+       sector_t sector;
+       int i, mx;
+       unsigned extent_nr;
+       unsigned crc = 0;
+       int err = 0;
+
+       memset(buffer, 0, sizeof(*buffer));
+       buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
+       buffer->tr_number = cpu_to_be32(device->al_tr_number);
+
+       i = 0;
+
+       /* Even though no one can start to change this list
+        * once we set the LC_LOCKED -- from drbd_al_begin_io(),
+        * lc_try_lock_for_transaction() --, someone may still
+        * be in the process of changing it. */
+       spin_lock_irq(&device->al_lock);
+       list_for_each_entry(e, &device->act_log->to_be_changed, list) {
+               if (i == AL_UPDATES_PER_TRANSACTION) {
+                       i++;
+                       break;
+               }
+               buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
+               buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
+               if (e->lc_number != LC_FREE)
+                       drbd_bm_mark_for_writeout(device,
+                                       al_extent_to_bm_page(e->lc_number));
+               i++;
+       }
+       spin_unlock_irq(&device->al_lock);
+       BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
+
+       buffer->n_updates = cpu_to_be16(i);
+       for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
+               buffer->update_slot_nr[i] = cpu_to_be16(-1);
+               buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
+       }
+
+       buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
+       buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
+
+       mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
+                  device->act_log->nr_elements - device->al_tr_cycle);
+       for (i = 0; i < mx; i++) {
+               unsigned idx = device->al_tr_cycle + i;
+               extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
+               buffer->context[i] = cpu_to_be32(extent_nr);
+       }
+       for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
+               buffer->context[i] = cpu_to_be32(LC_FREE);
+
+       device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
+       if (device->al_tr_cycle >= device->act_log->nr_elements)
+               device->al_tr_cycle = 0;
+
+       sector = al_tr_number_to_on_disk_sector(device);
+
+       crc = crc32c(0, buffer, 4096);
+       buffer->crc32c = cpu_to_be32(crc);
+
+       if (drbd_bm_write_hinted(device))
+               err = -EIO;
+       else {
+               bool write_al_updates;
+               rcu_read_lock();
+               write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
+               rcu_read_unlock();
+               if (write_al_updates) {
+                       if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
+                               err = -EIO;
+                               drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
+                       } else {
+                               device->al_tr_number++;
+                               device->al_writ_cnt++;
+                       }
+               }
+       }
+
+       return err;
+}
+
+static int al_write_transaction(struct drbd_device *device)
+{
+       struct al_transaction_on_disk *buffer;
+       int err;
+
+       if (!get_ldev(device)) {
+               drbd_err(device, "disk is %s, cannot start al transaction\n",
+                       drbd_disk_str(device->state.disk));
+               return -EIO;
+       }
+
+       /* The bitmap write may have failed, causing a state change. */
+       if (device->state.disk < D_INCONSISTENT) {
+               drbd_err(device,
+                       "disk is %s, cannot write al transaction\n",
+                       drbd_disk_str(device->state.disk));
+               put_ldev(device);
+               return -EIO;
+       }
+
+       /* protects md_io_buffer, al_tr_cycle, ... */
+       buffer = drbd_md_get_buffer(device, __func__);
+       if (!buffer) {
+               drbd_err(device, "disk failed while waiting for md_io buffer\n");
+               put_ldev(device);
+               return -ENODEV;
+       }
+
+       err = __al_write_transaction(device, buffer);
+
+       drbd_md_put_buffer(device);
+       put_ldev(device);
+
+       return err;
+}
+
 
 void drbd_al_begin_io_commit(struct drbd_device *device)
 {
@@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
        wake_up(&device->al_wait);
 }
 
-#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
-/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
- * are still coupled, or assume too much about their relation.
- * Code below will not work if this is violated.
- * Will be cleaned up with some followup patch.
- */
-# error FIXME
-#endif
-
-static unsigned int al_extent_to_bm_page(unsigned int al_enr)
-{
-       return al_enr >>
-               /* bit to page */
-               ((PAGE_SHIFT + 3) -
-               /* al extent number to bit */
-                (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
-}
-
-static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
-{
-       const unsigned int stripes = device->ldev->md.al_stripes;
-       const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
-
-       /* transaction number, modulo on-disk ring buffer wrap around */
-       unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
-
-       /* ... to aligned 4k on disk block */
-       t = ((t % stripes) * stripe_size_4kB) + t/stripes;
-
-       /* ... to 512 byte sector in activity log */
-       t *= 8;
-
-       /* ... plus offset to the on disk position */
-       return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
-}
-
-int al_write_transaction(struct drbd_device *device)
-{
-       struct al_transaction_on_disk *buffer;
-       struct lc_element *e;
-       sector_t sector;
-       int i, mx;
-       unsigned extent_nr;
-       unsigned crc = 0;
-       int err = 0;
-
-       if (!get_ldev(device)) {
-               drbd_err(device, "disk is %s, cannot start al transaction\n",
-                       drbd_disk_str(device->state.disk));
-               return -EIO;
-       }
-
-       /* The bitmap write may have failed, causing a state change. */
-       if (device->state.disk < D_INCONSISTENT) {
-               drbd_err(device,
-                       "disk is %s, cannot write al transaction\n",
-                       drbd_disk_str(device->state.disk));
-               put_ldev(device);
-               return -EIO;
-       }
-
-       /* protects md_io_buffer, al_tr_cycle, ... */
-       buffer = drbd_md_get_buffer(device, __func__);
-       if (!buffer) {
-               drbd_err(device, "disk failed while waiting for md_io buffer\n");
-               put_ldev(device);
-               return -ENODEV;
-       }
-
-       memset(buffer, 0, sizeof(*buffer));
-       buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
-       buffer->tr_number = cpu_to_be32(device->al_tr_number);
-
-       i = 0;
-
-       /* Even though no one can start to change this list
-        * once we set the LC_LOCKED -- from drbd_al_begin_io(),
-        * lc_try_lock_for_transaction() --, someone may still
-        * be in the process of changing it. */
-       spin_lock_irq(&device->al_lock);
-       list_for_each_entry(e, &device->act_log->to_be_changed, list) {
-               if (i == AL_UPDATES_PER_TRANSACTION) {
-                       i++;
-                       break;
-               }
-               buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
-               buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
-               if (e->lc_number != LC_FREE)
-                       drbd_bm_mark_for_writeout(device,
-                                       al_extent_to_bm_page(e->lc_number));
-               i++;
-       }
-       spin_unlock_irq(&device->al_lock);
-       BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
-
-       buffer->n_updates = cpu_to_be16(i);
-       for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
-               buffer->update_slot_nr[i] = cpu_to_be16(-1);
-               buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
-       }
-
-       buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
-       buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
-
-       mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
-                  device->act_log->nr_elements - device->al_tr_cycle);
-       for (i = 0; i < mx; i++) {
-               unsigned idx = device->al_tr_cycle + i;
-               extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
-               buffer->context[i] = cpu_to_be32(extent_nr);
-       }
-       for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
-               buffer->context[i] = cpu_to_be32(LC_FREE);
-
-       device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
-       if (device->al_tr_cycle >= device->act_log->nr_elements)
-               device->al_tr_cycle = 0;
-
-       sector = al_tr_number_to_on_disk_sector(device);
-
-       crc = crc32c(0, buffer, 4096);
-       buffer->crc32c = cpu_to_be32(crc);
-
-       if (drbd_bm_write_hinted(device))
-               err = -EIO;
-       else {
-               bool write_al_updates;
-               rcu_read_lock();
-               write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
-               rcu_read_unlock();
-               if (write_al_updates) {
-                       if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
-                               err = -EIO;
-                               drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
-                       } else {
-                               device->al_tr_number++;
-                               device->al_writ_cnt++;
-                       }
-               }
-       }
-
-       drbd_md_put_buffer(device);
-       put_ldev(device);
-
-       return err;
-}
-
 static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
 {
        int rv;
@@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device)
        wake_up(&device->al_wait);
 }
 
-int drbd_initialize_al(struct drbd_device *device, void *buffer)
+int drbd_al_initialize(struct drbd_device *device, void *buffer)
 {
        struct al_transaction_on_disk *al = buffer;
        struct drbd_md *md = &device->ldev->md;
-       sector_t al_base = md->md_offset + md->al_offset;
        int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
        int i;
 
-       memset(al, 0, 4096);
-       al->magic = cpu_to_be32(DRBD_AL_MAGIC);
-       al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
-       al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+       __al_write_transaction(device, al);
+       /* There may or may not have been a pending transaction. */
+       spin_lock_irq(&device->al_lock);
+       lc_committed(device->act_log);
+       spin_unlock_irq(&device->al_lock);
 
-       for (i = 0; i < al_size_4k; i++) {
-               int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE);
+       /* The rest of the transactions will have an empty "updates" list, and
+        * are written out only to provide the context, and to initialize the
+        * on-disk ring buffer. */
+       for (i = 1; i < al_size_4k; i++) {
+               int err = __al_write_transaction(device, al);
                if (err)
                        return err;
        }
index 9462d27528507d693d8e4efe0e6464597ab1768b..92d6fc020a657c0694cc02c99d134fdf47c85c12 100644 (file)
@@ -24,7 +24,7 @@
 
 #define pr_fmt(fmt)    KBUILD_MODNAME ": " fmt
 
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/vmalloc.h>
 #include <linux/string.h>
 #include <linux/drbd.h>
@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number)
        }
 }
 
-static void bm_vk_free(void *ptr, int v)
+static inline void bm_vk_free(void *ptr)
 {
-       if (v)
-               vfree(ptr);
-       else
-               kfree(ptr);
+       kvfree(ptr);
 }
 
 /*
@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 {
        struct page **old_pages = b->bm_pages;
        struct page **new_pages, *page;
-       unsigned int i, bytes, vmalloced = 0;
+       unsigned int i, bytes;
        unsigned long have = b->bm_number_of_pages;
 
        BUG_ON(have == 0 && old_pages != NULL);
@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                                PAGE_KERNEL);
                if (!new_pages)
                        return NULL;
-               vmalloced = 1;
        }
 
        if (want >= have) {
@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                        page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
                        if (!page) {
                                bm_free_pages(new_pages + have, i - have);
-                               bm_vk_free(new_pages, vmalloced);
+                               bm_vk_free(new_pages);
                                return NULL;
                        }
                        /* we want to know which page it is
@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                */
        }
 
-       if (vmalloced)
-               b->bm_flags |= BM_P_VMALLOCED;
-       else
-               b->bm_flags &= ~BM_P_VMALLOCED;
-
        return new_pages;
 }
 
@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device)
        if (!expect(device->bitmap))
                return;
        bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
-       bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
+       bm_vk_free(device->bitmap->bm_pages);
        kfree(device->bitmap);
        device->bitmap = NULL;
 }
@@ -479,8 +470,14 @@ void drbd_bm_cleanup(struct drbd_device *device)
  * this masks out the remaining bits.
  * Returns the number of bits cleared.
  */
+#ifndef BITS_PER_PAGE
 #define BITS_PER_PAGE          (1UL << (PAGE_SHIFT + 3))
 #define BITS_PER_PAGE_MASK     (BITS_PER_PAGE - 1)
+#else
+# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
+#  error "ambiguous BITS_PER_PAGE"
+# endif
+#endif
 #define BITS_PER_LONG_MASK     (BITS_PER_LONG - 1)
 static int bm_clear_surplus(struct drbd_bitmap *b)
 {
@@ -559,21 +556,19 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
        unsigned long *p_addr;
        unsigned long bits = 0;
        unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
-       int idx, i, last_word;
+       int idx, last_word;
 
        /* all but last page */
        for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
                p_addr = __bm_map_pidx(b, idx);
-               for (i = 0; i < LWPP; i++)
-                       bits += hweight_long(p_addr[i]);
+               bits += bitmap_weight(p_addr, BITS_PER_PAGE);
                __bm_unmap(p_addr);
                cond_resched();
        }
        /* last (or only) page */
        last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
        p_addr = __bm_map_pidx(b, idx);
-       for (i = 0; i < last_word; i++)
-               bits += hweight_long(p_addr[i]);
+       bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
        p_addr[last_word] &= cpu_to_lel(mask);
        bits += hweight_long(p_addr[last_word]);
        /* 32bit arch, may have an unused padding long */
@@ -639,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
        unsigned long want, have, onpages; /* number of pages */
        struct page **npages, **opages = NULL;
        int err = 0, growing;
-       int opages_vmalloced;
 
        if (!expect(b))
                return -ENOMEM;
@@ -652,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
        if (capacity == b->bm_dev_capacity)
                goto out;
 
-       opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
-
        if (capacity == 0) {
                spin_lock_irq(&b->bm_lock);
                opages = b->bm_pages;
@@ -667,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
                b->bm_dev_capacity = 0;
                spin_unlock_irq(&b->bm_lock);
                bm_free_pages(opages, onpages);
-               bm_vk_free(opages, opages_vmalloced);
+               bm_vk_free(opages);
                goto out;
        }
        bits  = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
@@ -740,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
 
        spin_unlock_irq(&b->bm_lock);
        if (opages != npages)
-               bm_vk_free(opages, opages_vmalloced);
+               bm_vk_free(opages);
        if (!growing)
                b->bm_set = bm_count_bits(b);
        drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
@@ -1419,6 +1411,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
        int bits;
        int changed = 0;
        unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
+
+       /* I think it is more cache line friendly to hweight_long then set to ~0UL,
+        * than to first bitmap_weight() all words, then bitmap_fill() all words */
        for (i = first_word; i < last_word; i++) {
                bits = hweight_long(paddr[i]);
                paddr[i] = ~0UL;
@@ -1628,8 +1623,7 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
                int n = e-s;
                p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
                bm = p_addr + MLPP(s);
-               while (n--)
-                       count += hweight_long(*bm++);
+               count += bitmap_weight(bm, n * BITS_PER_LONG);
                bm_unmap(p_addr);
        } else {
                drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
index 6b88a35fb048ed8f302f6025b743d30d7646ec41..4de95bbff4860b2d39ad6fc40cbf95ab849929b1 100644 (file)
@@ -434,12 +434,12 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
        if (!parent || d_really_is_negative(parent))
                goto out;
        /* serialize with d_delete() */
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        /* Make sure the object is still alive */
        if (simple_positive(file->f_path.dentry)
        && kref_get_unless_zero(kref))
                ret = 0;
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        if (!ret) {
                ret = single_open(file, show, data);
                if (ret)
@@ -771,6 +771,13 @@ static int device_data_gen_id_show(struct seq_file *m, void *ignored)
        return 0;
 }
 
+static int device_ed_gen_id_show(struct seq_file *m, void *ignored)
+{
+       struct drbd_device *device = m->private;
+       seq_printf(m, "0x%016llX\n", (unsigned long long)device->ed_uuid);
+       return 0;
+}
+
 #define drbd_debugfs_device_attr(name)                                         \
 static int device_ ## name ## _open(struct inode *inode, struct file *file)    \
 {                                                                              \
@@ -796,6 +803,7 @@ drbd_debugfs_device_attr(oldest_requests)
 drbd_debugfs_device_attr(act_log_extents)
 drbd_debugfs_device_attr(resync_extents)
 drbd_debugfs_device_attr(data_gen_id)
+drbd_debugfs_device_attr(ed_gen_id)
 
 void drbd_debugfs_device_add(struct drbd_device *device)
 {
@@ -839,6 +847,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
        DCF(act_log_extents);
        DCF(resync_extents);
        DCF(data_gen_id);
+       DCF(ed_gen_id);
 #undef DCF
        return;
 
@@ -854,6 +863,7 @@ void drbd_debugfs_device_cleanup(struct drbd_device *device)
        drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
        drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
        drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
+       drbd_debugfs_remove(&device->debugfs_vol_ed_gen_id);
        drbd_debugfs_remove(&device->debugfs_vol);
 }
 
index e66d453a5f2b1bcdf10ffa367917205081bd7587..34bc84efc29e99085d9ccdeb6a4fa49f5b079446 100644 (file)
@@ -77,13 +77,6 @@ extern int fault_devs;
 extern char usermode_helper[];
 
 
-/* I don't remember why XCPU ...
- * This is used to wake the asender,
- * and to interrupt sending the sending task
- * on disconnect.
- */
-#define DRBD_SIG SIGXCPU
-
 /* This is used to stop/restart our threads.
  * Cannot use SIGTERM nor SIGKILL, since these
  * are sent out by init on runlevel changes
@@ -292,6 +285,9 @@ struct drbd_device_work {
 
 extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
 
+extern void lock_all_resources(void);
+extern void unlock_all_resources(void);
+
 struct drbd_request {
        struct drbd_work w;
        struct drbd_device *device;
@@ -504,7 +500,6 @@ enum {
 
        MD_NO_FUA,              /* Users wants us to not use FUA/FLUSH on meta data dev */
 
-       SUSPEND_IO,             /* suspend application io */
        BITMAP_IO,              /* suspend application io;
                                   once no more io in flight, start bitmap io */
        BITMAP_IO_QUEUED,       /* Started bitmap IO */
@@ -541,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */
 /* definition of bits in bm_flags to be used in drbd_bm_lock
  * and drbd_bitmap_io and friends. */
 enum bm_flag {
-       /* do we need to kfree, or vfree bm_pages? */
-       BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
-
        /* currently locked for bulk operation */
        BM_LOCKED_MASK = 0xf,
 
@@ -632,12 +624,6 @@ struct bm_io_work {
        void (*done)(struct drbd_device *device, int rv);
 };
 
-enum write_ordering_e {
-       WO_none,
-       WO_drain_io,
-       WO_bdev_flush,
-};
-
 struct fifo_buffer {
        unsigned int head_index;
        unsigned int size;
@@ -650,8 +636,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size);
 enum {
        NET_CONGESTED,          /* The data socket is congested */
        RESOLVE_CONFLICTS,      /* Set on one node, cleared on the peer! */
-       SEND_PING,              /* whether asender should send a ping asap */
-       SIGNAL_ASENDER,         /* whether asender wants to be interrupted */
+       SEND_PING,
        GOT_PING_ACK,           /* set when we receive a ping_ack packet, ping_wait gets woken */
        CONN_WD_ST_CHG_REQ,     /* A cluster wide state change on the connection is active */
        CONN_WD_ST_CHG_OKAY,
@@ -670,6 +655,8 @@ enum {
        DEVICE_WORK_PENDING,    /* tell worker that some device has pending work */
 };
 
+enum which_state { NOW, OLD = NOW, NEW };
+
 struct drbd_resource {
        char *name;
 #ifdef CONFIG_DEBUG_FS
@@ -755,7 +742,8 @@ struct drbd_connection {
        unsigned long last_reconnect_jif;
        struct drbd_thread receiver;
        struct drbd_thread worker;
-       struct drbd_thread asender;
+       struct drbd_thread ack_receiver;
+       struct workqueue_struct *ack_sender;
 
        /* cached pointers,
         * so we can look up the oldest pending requests more quickly.
@@ -774,6 +762,8 @@ struct drbd_connection {
        struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
 
        struct {
+               unsigned long last_sent_barrier_jif;
+
                /* whether this sender thread
                 * has processed a single write yet. */
                bool seen_any_write_yet;
@@ -788,6 +778,17 @@ struct drbd_connection {
        } send;
 };
 
+static inline bool has_net_conf(struct drbd_connection *connection)
+{
+       bool has_net_conf;
+
+       rcu_read_lock();
+       has_net_conf = rcu_dereference(connection->net_conf);
+       rcu_read_unlock();
+
+       return has_net_conf;
+}
+
 void __update_timing_details(
                struct drbd_thread_timing_details *tdp,
                unsigned int *cb_nr,
@@ -811,6 +812,7 @@ struct drbd_peer_device {
        struct list_head peer_devices;
        struct drbd_device *device;
        struct drbd_connection *connection;
+       struct work_struct send_acks_work;
 #ifdef CONFIG_DEBUG_FS
        struct dentry *debugfs_peer_dev;
 #endif
@@ -829,6 +831,7 @@ struct drbd_device {
        struct dentry *debugfs_vol_act_log_extents;
        struct dentry *debugfs_vol_resync_extents;
        struct dentry *debugfs_vol_data_gen_id;
+       struct dentry *debugfs_vol_ed_gen_id;
 #endif
 
        unsigned int vnr;       /* volume number within the connection */
@@ -873,6 +876,7 @@ struct drbd_device {
        atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
        atomic_t unacked_cnt;    /* Need to send replies for */
        atomic_t local_cnt;      /* Waiting for local completion */
+       atomic_t suspend_cnt;
 
        /* Interval tree of pending local requests */
        struct rb_root read_requests;
@@ -1020,6 +1024,12 @@ static inline struct drbd_peer_device *first_peer_device(struct drbd_device *dev
        return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
 }
 
+static inline struct drbd_peer_device *
+conn_peer_device(struct drbd_connection *connection, int volume_number)
+{
+       return idr_find(&connection->peer_devices, volume_number);
+}
+
 #define for_each_resource(resource, _resources) \
        list_for_each_entry(resource, _resources, resources)
 
@@ -1113,7 +1123,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
 extern int drbd_send_bitmap(struct drbd_device *device);
 extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
 extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
-extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
+extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
 extern void drbd_device_cleanup(struct drbd_device *device);
 void drbd_print_uuids(struct drbd_device *device, const char *text);
 
@@ -1424,7 +1434,7 @@ extern struct bio_set *drbd_md_io_bio_set;
 /* to allocate from that set */
 extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
 
-extern rwlock_t global_state_lock;
+extern struct mutex resources_mutex;
 
 extern int conn_lowest_minor(struct drbd_connection *connection);
 extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
@@ -1454,6 +1464,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
 
 /* drbd_nl.c */
+
+extern struct mutex notification_mutex;
+
 extern void drbd_suspend_io(struct drbd_device *device);
 extern void drbd_resume_io(struct drbd_device *device);
 extern char *ppsize(char *buf, unsigned long long size);
@@ -1536,7 +1549,9 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
 
 /* drbd_receiver.c */
 extern int drbd_receiver(struct drbd_thread *thi);
-extern int drbd_asender(struct drbd_thread *thi);
+extern int drbd_ack_receiver(struct drbd_thread *thi);
+extern void drbd_send_ping_wf(struct work_struct *ws);
+extern void drbd_send_acks_wf(struct work_struct *ws);
 extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
 extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
                bool throttle_if_app_is_waiting);
@@ -1649,7 +1664,7 @@ extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int s
 #define drbd_rs_failed_io(device, sector, size) \
        __drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
 extern void drbd_al_shrink(struct drbd_device *device);
-extern int drbd_initialize_al(struct drbd_device *, void *);
+extern int drbd_al_initialize(struct drbd_device *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */
@@ -1668,6 +1683,29 @@ struct sib_info {
 };
 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
 
+extern void notify_resource_state(struct sk_buff *,
+                                 unsigned int,
+                                 struct drbd_resource *,
+                                 struct resource_info *,
+                                 enum drbd_notification_type);
+extern void notify_device_state(struct sk_buff *,
+                               unsigned int,
+                               struct drbd_device *,
+                               struct device_info *,
+                               enum drbd_notification_type);
+extern void notify_connection_state(struct sk_buff *,
+                                   unsigned int,
+                                   struct drbd_connection *,
+                                   struct connection_info *,
+                                   enum drbd_notification_type);
+extern void notify_peer_device_state(struct sk_buff *,
+                                    unsigned int,
+                                    struct drbd_peer_device *,
+                                    struct peer_device_info *,
+                                    enum drbd_notification_type);
+extern void notify_helper(enum drbd_notification_type, struct drbd_device *,
+                         struct drbd_connection *, const char *, int);
+
 /*
  * inline helper functions
  *************************/
@@ -1694,19 +1732,6 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r
        return 0;
 }
 
-static inline enum drbd_state_rv
-_drbd_set_state(struct drbd_device *device, union drbd_state ns,
-               enum chg_state_flags flags, struct completion *done)
-{
-       enum drbd_state_rv rv;
-
-       read_lock(&global_state_lock);
-       rv = __drbd_set_state(device, ns, flags, done);
-       read_unlock(&global_state_lock);
-
-       return rv;
-}
-
 static inline union drbd_state drbd_read_state(struct drbd_device *device)
 {
        struct drbd_resource *resource = device->resource;
@@ -1937,16 +1962,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit)
 
 extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
 
-static inline void wake_asender(struct drbd_connection *connection)
+/* To get the ack_receiver out of the blocking network stack,
+ * so it can change its sk_rcvtimeo from idle- to ping-timeout,
+ * and send a ping, we need to send a signal.
+ * Which signal we send is irrelevant. */
+static inline void wake_ack_receiver(struct drbd_connection *connection)
 {
-       if (test_bit(SIGNAL_ASENDER, &connection->flags))
-               force_sig(DRBD_SIG, connection->asender.task);
+       struct task_struct *task = connection->ack_receiver.task;
+       if (task && get_t_state(&connection->ack_receiver) == RUNNING)
+               force_sig(SIGXCPU, task);
 }
 
 static inline void request_ping(struct drbd_connection *connection)
 {
        set_bit(SEND_PING, &connection->flags);
-       wake_asender(connection);
+       wake_ack_receiver(connection);
 }
 
 extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
@@ -2230,7 +2260,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device)
 
        if (drbd_suspended(device))
                return false;
-       if (test_bit(SUSPEND_IO, &device->flags))
+       if (atomic_read(&device->suspend_cnt))
                return false;
 
        /* to avoid potential deadlock or bitmap corruption,
index 74d97f4bac3488ac767ef97b20dfb3e2277e50cf..5b43dfb798191d0c90cc3b5115956575e56410f9 100644 (file)
@@ -117,6 +117,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
  */
 struct idr drbd_devices;
 struct list_head drbd_resources;
+struct mutex resources_mutex;
 
 struct kmem_cache *drbd_request_cache;
 struct kmem_cache *drbd_ee_cache;      /* peer requests */
@@ -1435,8 +1436,8 @@ static int we_should_drop_the_connection(struct drbd_connection *connection, str
        /* long elapsed = (long)(jiffies - device->last_received); */
 
        drop_it =   connection->meta.socket == sock
-               || !connection->asender.task
-               || get_t_state(&connection->asender) != RUNNING
+               || !connection->ack_receiver.task
+               || get_t_state(&connection->ack_receiver) != RUNNING
                || connection->cstate < C_WF_REPORT_PARAMS;
 
        if (drop_it)
@@ -1793,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
                drbd_update_congested(connection);
        }
        do {
-               /* STRANGE
-                * tcp_sendmsg does _not_ use its size parameter at all ?
-                *
-                * -EAGAIN on timeout, -EINTR on signal.
-                */
-/* THINK
- * do we need to block DRBD_SIG if sock == &meta.socket ??
- * otherwise wake_asender() might interrupt some send_*Ack !
- */
                rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
                if (rv == -EAGAIN) {
                        if (we_should_drop_the_connection(connection, sock))
@@ -2000,7 +1992,7 @@ void drbd_device_cleanup(struct drbd_device *device)
                drbd_bm_cleanup(device);
        }
 
-       drbd_free_ldev(device->ldev);
+       drbd_backing_dev_free(device, device->ldev);
        device->ldev = NULL;
 
        clear_bit(AL_SUSPENDED, &device->flags);
@@ -2179,7 +2171,7 @@ void drbd_destroy_device(struct kref *kref)
        if (device->this_bdev)
                bdput(device->this_bdev);
 
-       drbd_free_ldev(device->ldev);
+       drbd_backing_dev_free(device, device->ldev);
        device->ldev = NULL;
 
        drbd_release_all_peer_reqs(device);
@@ -2563,7 +2555,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
                cpumask_copy(resource->cpu_mask, new_cpu_mask);
                for_each_connection_rcu(connection, resource) {
                        connection->receiver.reset_cpu_mask = 1;
-                       connection->asender.reset_cpu_mask = 1;
+                       connection->ack_receiver.reset_cpu_mask = 1;
                        connection->worker.reset_cpu_mask = 1;
                }
        }
@@ -2590,7 +2582,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
        kref_init(&resource->kref);
        idr_init(&resource->devices);
        INIT_LIST_HEAD(&resource->connections);
-       resource->write_ordering = WO_bdev_flush;
+       resource->write_ordering = WO_BDEV_FLUSH;
        list_add_tail_rcu(&resource->resources, &drbd_resources);
        mutex_init(&resource->conf_update);
        mutex_init(&resource->adm_mutex);
@@ -2652,8 +2644,8 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
        connection->receiver.connection = connection;
        drbd_thread_init(resource, &connection->worker, drbd_worker, "worker");
        connection->worker.connection = connection;
-       drbd_thread_init(resource, &connection->asender, drbd_asender, "asender");
-       connection->asender.connection = connection;
+       drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv");
+       connection->ack_receiver.connection = connection;
 
        kref_init(&connection->kref);
 
@@ -2702,8 +2694,8 @@ static int init_submitter(struct drbd_device *device)
 {
        /* opencoded create_singlethread_workqueue(),
         * to be able to say "drbd%d", ..., minor */
-       device->submit.wq = alloc_workqueue("drbd%u_submit",
-                       WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor);
+       device->submit.wq =
+               alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor);
        if (!device->submit.wq)
                return -ENOMEM;
 
@@ -2820,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
                        goto out_idr_remove_from_resource;
                }
                kref_get(&connection->kref);
+               INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
        }
 
        if (init_submitter(device)) {
@@ -2923,7 +2916,7 @@ static int __init drbd_init(void)
        drbd_proc = NULL; /* play safe for drbd_cleanup */
        idr_init(&drbd_devices);
 
-       rwlock_init(&global_state_lock);
+       mutex_init(&resources_mutex);
        INIT_LIST_HEAD(&drbd_resources);
 
        err = drbd_genl_register();
@@ -2971,18 +2964,6 @@ fail:
        return err;
 }
 
-void drbd_free_ldev(struct drbd_backing_dev *ldev)
-{
-       if (ldev == NULL)
-               return;
-
-       blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
-       blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
-
-       kfree(ldev->disk_conf);
-       kfree(ldev);
-}
-
 static void drbd_free_one_sock(struct drbd_socket *ds)
 {
        struct socket *s;
@@ -3277,6 +3258,10 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
         * and read it. */
        bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
        bdev->md.md_offset = drbd_md_ss(bdev);
+       /* Even for (flexible or indexed) external meta data,
+        * initially restrict us to the 4k superblock for now.
+        * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
+       bdev->md.md_size_sect = 8;
 
        if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
                /* NOTE: can't do normal error processing here as this is
@@ -3578,7 +3563,9 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
 
        spin_lock_irq(&device->resource->req_lock);
        set_bit(BITMAP_IO, &device->flags);
-       if (atomic_read(&device->ap_bio_cnt) == 0) {
+       /* don't wait for pending application IO if the caller indicates that
+        * application IO does not conflict anyways. */
+       if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
                if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
                        drbd_queue_work(&first_peer_device(device)->connection->sender_work,
                                        &device->bm_io_work.w);
@@ -3746,6 +3733,27 @@ int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i)
        return 0;
 }
 
+void lock_all_resources(void)
+{
+       struct drbd_resource *resource;
+       int __maybe_unused i = 0;
+
+       mutex_lock(&resources_mutex);
+       local_irq_disable();
+       for_each_resource(resource, &drbd_resources)
+               spin_lock_nested(&resource->req_lock, i++);
+}
+
+void unlock_all_resources(void)
+{
+       struct drbd_resource *resource;
+
+       for_each_resource(resource, &drbd_resources)
+               spin_unlock(&resource->req_lock);
+       local_irq_enable();
+       mutex_unlock(&resources_mutex);
+}
+
 #ifdef CONFIG_DRBD_FAULT_INJECTION
 /* Fault insertion support including random number generator shamelessly
  * stolen from kernel/rcutorture.c */
index e80cbefbc2b548761fe5f531773b1022a21353eb..c055c5e12f248dc77b60c89d4b723f0de3f934b3 100644 (file)
@@ -36,6 +36,7 @@
 #include "drbd_int.h"
 #include "drbd_protocol.h"
 #include "drbd_req.h"
+#include "drbd_state_change.h"
 #include <asm/unaligned.h>
 #include <linux/drbd_limits.h>
 #include <linux/kthread.h>
@@ -75,11 +76,24 @@ int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
 /* .dumpit */
 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_devices_done(struct netlink_callback *cb);
+int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_connections_done(struct netlink_callback *cb);
+int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
+int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
 
 #include <linux/drbd_genl_api.h>
 #include "drbd_nla.h"
 #include <linux/genl_magic_func.h>
 
+static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
+static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
+
+DEFINE_MUTEX(notification_mutex);
+
 /* used blkdev_get_by_path, to claim our meta data device(s) */
 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
 
@@ -349,6 +363,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
        sib.sib_reason = SIB_HELPER_PRE;
        sib.helper_name = cmd;
        drbd_bcast_event(device, &sib);
+       notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
        ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
        if (ret)
                drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -361,6 +376,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
        sib.sib_reason = SIB_HELPER_POST;
        sib.helper_exit_code = ret;
        drbd_bcast_event(device, &sib);
+       notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
 
        if (current == connection->worker.task)
                clear_bit(CALLBACK_PENDING, &connection->flags);
@@ -388,6 +404,7 @@ static int conn_khelper(struct drbd_connection *connection, char *cmd)
 
        drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
        /* TODO: conn_bcast_event() ?? */
+       notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
 
        ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
        if (ret)
@@ -399,6 +416,7 @@ static int conn_khelper(struct drbd_connection *connection, char *cmd)
                          usermode_helper, cmd, resource_name,
                          (ret >> 8) & 0xff, ret);
        /* TODO: conn_bcast_event() ?? */
+       notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
 
        if (ret < 0) /* Ignore any ERRNOs we got. */
                ret = 0;
@@ -847,9 +865,11 @@ char *ppsize(char *buf, unsigned long long size)
  * and can be long lived.
  * This changes an device->flag, is triggered by drbd internals,
  * and should be short-lived. */
+/* It needs to be a counter, since multiple threads might
+   independently suspend and resume IO. */
 void drbd_suspend_io(struct drbd_device *device)
 {
-       set_bit(SUSPEND_IO, &device->flags);
+       atomic_inc(&device->suspend_cnt);
        if (drbd_suspended(device))
                return;
        wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
@@ -857,8 +877,8 @@ void drbd_suspend_io(struct drbd_device *device)
 
 void drbd_resume_io(struct drbd_device *device)
 {
-       clear_bit(SUSPEND_IO, &device->flags);
-       wake_up(&device->misc_wait);
+       if (atomic_dec_and_test(&device->suspend_cnt))
+               wake_up(&device->misc_wait);
 }
 
 /**
@@ -871,27 +891,32 @@ void drbd_resume_io(struct drbd_device *device)
 enum determine_dev_size
 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
-       sector_t prev_first_sect, prev_size; /* previous meta location */
-       sector_t la_size_sect, u_size;
+       struct md_offsets_and_sizes {
+               u64 last_agreed_sect;
+               u64 md_offset;
+               s32 al_offset;
+               s32 bm_offset;
+               u32 md_size_sect;
+
+               u32 al_stripes;
+               u32 al_stripe_size_4k;
+       } prev;
+       sector_t u_size, size;
        struct drbd_md *md = &device->ldev->md;
-       u32 prev_al_stripe_size_4k;
-       u32 prev_al_stripes;
-       sector_t size;
        char ppb[10];
        void *buffer;
 
        int md_moved, la_size_changed;
        enum determine_dev_size rv = DS_UNCHANGED;
 
-       /* race:
-        * application request passes inc_ap_bio,
-        * but then cannot get an AL-reference.
-        * this function later may wait on ap_bio_cnt == 0. -> deadlock.
+       /* We may change the on-disk offsets of our meta data below.  Lock out
+        * anything that may cause meta data IO, to avoid acting on incomplete
+        * layout changes or scribbling over meta data that is in the process
+        * of being moved.
         *
-        * to avoid that:
-        * Suspend IO right here.
-        * still lock the act_log to not trigger ASSERTs there.
-        */
+        * Move is not exactly correct, btw, currently we have all our meta
+        * data in core memory, to "move" it we just write it all out, there
+        * are no reads. */
        drbd_suspend_io(device);
        buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
        if (!buffer) {
@@ -899,19 +924,17 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
                return DS_ERROR;
        }
 
-       /* no wait necessary anymore, actually we could assert that */
-       wait_event(device->al_wait, lc_try_lock(device->act_log));
-
-       prev_first_sect = drbd_md_first_sector(device->ldev);
-       prev_size = device->ldev->md.md_size_sect;
-       la_size_sect = device->ldev->md.la_size_sect;
+       /* remember current offset and sizes */
+       prev.last_agreed_sect = md->la_size_sect;
+       prev.md_offset = md->md_offset;
+       prev.al_offset = md->al_offset;
+       prev.bm_offset = md->bm_offset;
+       prev.md_size_sect = md->md_size_sect;
+       prev.al_stripes = md->al_stripes;
+       prev.al_stripe_size_4k = md->al_stripe_size_4k;
 
        if (rs) {
                /* rs is non NULL if we should change the AL layout only */
-
-               prev_al_stripes = md->al_stripes;
-               prev_al_stripe_size_4k = md->al_stripe_size_4k;
-
                md->al_stripes = rs->al_stripes;
                md->al_stripe_size_4k = rs->al_stripe_size / 4;
                md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
@@ -924,7 +947,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
        rcu_read_unlock();
        size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
 
-       if (size < la_size_sect) {
+       if (size < prev.last_agreed_sect) {
                if (rs && u_size == 0) {
                        /* Remove "rs &&" later. This check should always be active, but
                           right now the receiver expects the permissive behavior */
@@ -945,30 +968,29 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
                err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
                if (unlikely(err)) {
                        /* currently there is only one error: ENOMEM! */
-                       size = drbd_bm_capacity(device)>>1;
+                       size = drbd_bm_capacity(device);
                        if (size == 0) {
                                drbd_err(device, "OUT OF MEMORY! "
                                    "Could not allocate bitmap!\n");
                        } else {
                                drbd_err(device, "BM resizing failed. "
-                                   "Leaving size unchanged at size = %lu KB\n",
-                                   (unsigned long)size);
+                                   "Leaving size unchanged\n");
                        }
                        rv = DS_ERROR;
                }
                /* racy, see comments above. */
                drbd_set_my_capacity(device, size);
-               device->ldev->md.la_size_sect = size;
+               md->la_size_sect = size;
                drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
                     (unsigned long long)size>>1);
        }
        if (rv <= DS_ERROR)
                goto err_out;
 
-       la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
+       la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
 
-       md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
-               || prev_size       != device->ldev->md.md_size_sect;
+       md_moved = prev.md_offset    != md->md_offset
+               || prev.md_size_sect != md->md_size_sect;
 
        if (la_size_changed || md_moved || rs) {
                u32 prev_flags;
@@ -977,20 +999,29 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
                 * Clear the timer, to avoid scary "timer expired!" messages,
                 * "Superblock" is written out at least twice below, anyways. */
                del_timer(&device->md_sync_timer);
-               drbd_al_shrink(device); /* All extents inactive. */
 
+               /* We won't change the "al-extents" setting, we just may need
+                * to move the on-disk location of the activity log ringbuffer.
+                * Lock for transaction is good enough, it may well be "dirty"
+                * or even "starving". */
+               wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
+
+               /* mark current on-disk bitmap and activity log as unreliable */
                prev_flags = md->flags;
-               md->flags &= ~MDF_PRIMARY_IND;
+               md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
                drbd_md_write(device, buffer);
 
+               drbd_al_initialize(device, buffer);
+
                drbd_info(device, "Writing the whole bitmap, %s\n",
                         la_size_changed && md_moved ? "size changed and md moved" :
                         la_size_changed ? "size changed" : "md moved");
                /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
                drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
                               "size changed", BM_LOCKED_MASK);
-               drbd_initialize_al(device, buffer);
 
+               /* on-disk bitmap and activity log is authoritative again
+                * (unless there was an IO error meanwhile...) */
                md->flags = prev_flags;
                drbd_md_write(device, buffer);
 
@@ -999,20 +1030,22 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
                                  md->al_stripes, md->al_stripe_size_4k * 4);
        }
 
-       if (size > la_size_sect)
-               rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
-       if (size < la_size_sect)
+       if (size > prev.last_agreed_sect)
+               rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
+       if (size < prev.last_agreed_sect)
                rv = DS_SHRUNK;
 
        if (0) {
        err_out:
-               if (rs) {
-                       md->al_stripes = prev_al_stripes;
-                       md->al_stripe_size_4k = prev_al_stripe_size_4k;
-                       md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
-
-                       drbd_md_set_sector_offsets(device, device->ldev);
-               }
+               /* restore previous offset and sizes */
+               md->la_size_sect = prev.last_agreed_sect;
+               md->md_offset = prev.md_offset;
+               md->al_offset = prev.al_offset;
+               md->bm_offset = prev.bm_offset;
+               md->md_size_sect = prev.md_size_sect;
+               md->al_stripes = prev.al_stripes;
+               md->al_stripe_size_4k = prev.al_stripe_size_4k;
+               md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
        }
        lc_unlock(device->act_log);
        wake_up(&device->al_wait);
@@ -1115,8 +1148,7 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
                lc_destroy(n);
                return -EBUSY;
        } else {
-               if (t)
-                       lc_destroy(t);
+               lc_destroy(t);
        }
        drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
        return 0;
@@ -1151,21 +1183,20 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
        if (b) {
                struct drbd_connection *connection = first_peer_device(device)->connection;
 
+               blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
+
                if (blk_queue_discard(b) &&
                    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
-                       /* For now, don't allow more than one activity log extent worth of data
-                        * to be discarded in one go. We may need to rework drbd_al_begin_io()
-                        * to allow for even larger discard ranges */
-                       blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
-
+                       /* We don't care, stacking below should fix it for the local device.
+                        * Whether or not it is a suitable granularity on the remote device
+                        * is not our problem, really. If you care, you need to
+                        * use devices with similar topology on all peers. */
+                       q->limits.discard_granularity = 512;
                        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
-                       /* REALLY? Is stacking secdiscard "legal"? */
-                       if (blk_queue_secdiscard(b))
-                               queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
                } else {
                        blk_queue_max_discard_sectors(q, 0);
                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
-                       queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
+                       q->limits.discard_granularity = 0;
                }
 
                blk_queue_stack_limits(q, b);
@@ -1177,6 +1208,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
                        q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
                }
        }
+       /* To avoid confusion, if this queue does not support discard, clear
+        * max_discard_sectors, which is what lsblk -D reports to the user.  */
+       if (!blk_queue_discard(q)) {
+               blk_queue_max_discard_sectors(q, 0);
+               q->limits.discard_granularity = 0;
+       }
 }
 
 void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
@@ -1241,8 +1278,8 @@ static void conn_reconfig_done(struct drbd_connection *connection)
                connection->cstate == C_STANDALONE;
        spin_unlock_irq(&connection->resource->req_lock);
        if (stop_threads) {
-               /* asender is implicitly stopped by receiver
-                * in conn_disconnect() */
+               /* ack_receiver thread and ack_sender workqueue are implicitly
+                * stopped by receiver in conn_disconnect() */
                drbd_thread_stop(&connection->receiver);
                drbd_thread_stop(&connection->worker);
        }
@@ -1389,13 +1426,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
                goto fail_unlock;
        }
 
-       write_lock_irq(&global_state_lock);
+       lock_all_resources();
        retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
        if (retcode == NO_ERROR) {
                rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
                drbd_resync_after_changed(device);
        }
-       write_unlock_irq(&global_state_lock);
+       unlock_all_resources();
 
        if (retcode != NO_ERROR)
                goto fail_unlock;
@@ -1418,7 +1455,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
                set_bit(MD_NO_FUA, &device->flags);
 
        if (write_ordering_changed(old_disk_conf, new_disk_conf))
-               drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
+               drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
 
        drbd_md_sync(device);
 
@@ -1449,6 +1486,88 @@ success:
        return 0;
 }
 
+static struct block_device *open_backing_dev(struct drbd_device *device,
+               const char *bdev_path, void *claim_ptr, bool do_bd_link)
+{
+       struct block_device *bdev;
+       int err = 0;
+
+       bdev = blkdev_get_by_path(bdev_path,
+                                 FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
+       if (IS_ERR(bdev)) {
+               drbd_err(device, "open(\"%s\") failed with %ld\n",
+                               bdev_path, PTR_ERR(bdev));
+               return bdev;
+       }
+
+       if (!do_bd_link)
+               return bdev;
+
+       err = bd_link_disk_holder(bdev, device->vdisk);
+       if (err) {
+               blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+               drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
+                               bdev_path, err);
+               bdev = ERR_PTR(err);
+       }
+       return bdev;
+}
+
+static int open_backing_devices(struct drbd_device *device,
+               struct disk_conf *new_disk_conf,
+               struct drbd_backing_dev *nbc)
+{
+       struct block_device *bdev;
+
+       bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true);
+       if (IS_ERR(bdev))
+               return ERR_OPEN_DISK;
+       nbc->backing_bdev = bdev;
+
+       /*
+        * meta_dev_idx >= 0: external fixed size, possibly multiple
+        * drbd sharing one meta device.  TODO in that case, paranoia
+        * check that [md_bdev, meta_dev_idx] is not yet used by some
+        * other drbd minor!  (if you use drbd.conf + drbdadm, that
+        * should check it for you already; but if you don't, or
+        * someone fooled it, we need to double check here)
+        */
+       bdev = open_backing_dev(device, new_disk_conf->meta_dev,
+               /* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
+                * if potentially shared with other drbd minors */
+                       (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
+               /* avoid double bd_claim_by_disk() for the same (source,target) tuple,
+                * as would happen with internal metadata. */
+                       (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
+                        new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
+       if (IS_ERR(bdev))
+               return ERR_OPEN_MD_DISK;
+       nbc->md_bdev = bdev;
+       return NO_ERROR;
+}
+
+static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
+       bool do_bd_unlink)
+{
+       if (!bdev)
+               return;
+       if (do_bd_unlink)
+               bd_unlink_disk_holder(bdev, device->vdisk);
+       blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+}
+
+void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
+{
+       if (ldev == NULL)
+               return;
+
+       close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
+       close_backing_dev(device, ldev->backing_bdev, true);
+
+       kfree(ldev->disk_conf);
+       kfree(ldev);
+}
+
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
        struct drbd_config_context adm_ctx;
@@ -1462,7 +1581,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        sector_t min_md_device_sectors;
        struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
        struct disk_conf *new_disk_conf = NULL;
-       struct block_device *bdev;
        struct lru_cache *resync_lru = NULL;
        struct fifo_buffer *new_plan = NULL;
        union drbd_state ns, os;
@@ -1478,7 +1596,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        device = adm_ctx.device;
        mutex_lock(&adm_ctx.resource->adm_mutex);
        peer_device = first_peer_device(device);
-       connection = peer_device ? peer_device->connection : NULL;
+       connection = peer_device->connection;
        conn_reconfig_start(connection);
 
        /* if you want to reconfigure, please tear down first */
@@ -1539,12 +1657,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
                goto fail;
        }
 
-       write_lock_irq(&global_state_lock);
-       retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
-       write_unlock_irq(&global_state_lock);
-       if (retcode != NO_ERROR)
-               goto fail;
-
        rcu_read_lock();
        nc = rcu_dereference(connection->net_conf);
        if (nc) {
@@ -1556,35 +1668,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        }
        rcu_read_unlock();
 
-       bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
-                                 FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
-       if (IS_ERR(bdev)) {
-               drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
-                       PTR_ERR(bdev));
-               retcode = ERR_OPEN_DISK;
-               goto fail;
-       }
-       nbc->backing_bdev = bdev;
-
-       /*
-        * meta_dev_idx >= 0: external fixed size, possibly multiple
-        * drbd sharing one meta device.  TODO in that case, paranoia
-        * check that [md_bdev, meta_dev_idx] is not yet used by some
-        * other drbd minor!  (if you use drbd.conf + drbdadm, that
-        * should check it for you already; but if you don't, or
-        * someone fooled it, we need to double check here)
-        */
-       bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
-                                 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-                                 (new_disk_conf->meta_dev_idx < 0) ?
-                                 (void *)device : (void *)drbd_m_holder);
-       if (IS_ERR(bdev)) {
-               drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
-                       PTR_ERR(bdev));
-               retcode = ERR_OPEN_MD_DISK;
+       retcode = open_backing_devices(device, new_disk_conf, nbc);
+       if (retcode != NO_ERROR)
                goto fail;
-       }
-       nbc->md_bdev = bdev;
 
        if ((nbc->backing_bdev == nbc->md_bdev) !=
            (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
@@ -1707,6 +1793,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
                goto force_diskless_dec;
        }
 
+       lock_all_resources();
+       retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
+       if (retcode != NO_ERROR) {
+               unlock_all_resources();
+               goto force_diskless_dec;
+       }
+
        /* Reset the "barriers don't work" bits here, then force meta data to
         * be written, to ensure we determine if barriers are supported. */
        if (new_disk_conf->md_flushes)
@@ -1727,7 +1820,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        new_disk_conf = NULL;
        new_plan = NULL;
 
-       drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
+       drbd_resync_after_changed(device);
+       drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
+       unlock_all_resources();
 
        if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
                set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1875,12 +1970,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
  fail:
        conn_reconfig_done(connection);
        if (nbc) {
-               if (nbc->backing_bdev)
-                       blkdev_put(nbc->backing_bdev,
-                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL);
-               if (nbc->md_bdev)
-                       blkdev_put(nbc->md_bdev,
-                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+               close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
+               close_backing_dev(device, nbc->backing_bdev, true);
                kfree(nbc);
        }
        kfree(new_disk_conf);
@@ -1895,6 +1986,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 static int adm_detach(struct drbd_device *device, int force)
 {
        enum drbd_state_rv retcode;
+       void *buffer;
        int ret;
 
        if (force) {
@@ -1905,13 +1997,16 @@ static int adm_detach(struct drbd_device *device, int force)
        }
 
        drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
-       drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
-       retcode = drbd_request_state(device, NS(disk, D_FAILED));
-       drbd_md_put_buffer(device);
+       buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
+       if (buffer) {
+               retcode = drbd_request_state(device, NS(disk, D_FAILED));
+               drbd_md_put_buffer(device);
+       } else /* already <= D_FAILED */
+               retcode = SS_NOTHING_TO_DO;
        /* D_FAILED will transition to DISKLESS. */
+       drbd_resume_io(device);
        ret = wait_event_interruptible(device->misc_wait,
                        device->state.disk != D_FAILED);
-       drbd_resume_io(device);
        if ((int)retcode == (int)SS_IS_DISKLESS)
                retcode = SS_NOTHING_TO_DO;
        if (ret)
@@ -2245,8 +2340,31 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
 
+static void connection_to_info(struct connection_info *info,
+                              struct drbd_connection *connection)
+{
+       info->conn_connection_state = connection->cstate;
+       info->conn_role = conn_highest_peer(connection);
+}
+
+static void peer_device_to_info(struct peer_device_info *info,
+                               struct drbd_peer_device *peer_device)
+{
+       struct drbd_device *device = peer_device->device;
+
+       info->peer_repl_state =
+               max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
+       info->peer_disk_state = device->state.pdsk;
+       info->peer_resync_susp_user = device->state.user_isp;
+       info->peer_resync_susp_peer = device->state.peer_isp;
+       info->peer_resync_susp_dependency = device->state.aftr_isp;
+}
+
 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 {
+       struct connection_info connection_info;
+       enum drbd_notification_type flags;
+       unsigned int peer_devices = 0;
        struct drbd_config_context adm_ctx;
        struct drbd_peer_device *peer_device;
        struct net_conf *old_net_conf, *new_net_conf = NULL;
@@ -2347,6 +2465,22 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
        connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
        memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
 
+       idr_for_each_entry(&connection->peer_devices, peer_device, i) {
+               peer_devices++;
+       }
+
+       connection_to_info(&connection_info, connection);
+       flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+       mutex_lock(&notification_mutex);
+       notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
+       idr_for_each_entry(&connection->peer_devices, peer_device, i) {
+               struct peer_device_info peer_device_info;
+
+               peer_device_to_info(&peer_device_info, peer_device);
+               flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+               notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
+       }
+       mutex_unlock(&notification_mutex);
        mutex_unlock(&adm_ctx.resource->conf_update);
 
        rcu_read_lock();
@@ -2428,6 +2562,8 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
                        drbd_err(connection,
                                "unexpected rv2=%d in conn_try_disconnect()\n",
                                rv2);
+               /* Unlike in DRBD 9, the state engine has generated
+                * NOTIFY_DESTROY events before clearing connection->net_conf. */
        }
        return rv;
 }
@@ -2585,6 +2721,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
                mutex_unlock(&device->resource->conf_update);
                synchronize_rcu();
                kfree(old_disk_conf);
+               new_disk_conf = NULL;
        }
 
        ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
@@ -2618,6 +2755,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 
  fail_ldev:
        put_ldev(device);
+       kfree(new_disk_conf);
        goto fail;
 }
 
@@ -2855,7 +2993,30 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
        mutex_lock(&adm_ctx.resource->adm_mutex);
        device = adm_ctx.device;
        if (test_bit(NEW_CUR_UUID, &device->flags)) {
-               drbd_uuid_new_current(device);
+               if (get_ldev_if_state(device, D_ATTACHING)) {
+                       drbd_uuid_new_current(device);
+                       put_ldev(device);
+               } else {
+                       /* This is effectively a multi-stage "forced down".
+                        * The NEW_CUR_UUID bit is supposedly only set, if we
+                        * lost the replication connection, and are configured
+                        * to freeze IO and wait for some fence-peer handler.
+                        * So we still don't have a replication connection.
+                        * And now we don't have a local disk either.  After
+                        * resume, we will fail all pending and new IO, because
+                        * we don't have any data anymore.  Which means we will
+                        * eventually be able to terminate all users of this
+                        * device, and then take it down.  By bumping the
+                        * "effective" data uuid, we make sure that you really
+                        * need to tear down before you reconfigure, we will
+                        * the refuse to re-connect or re-attach (because no
+                        * matching real data uuid exists).
+                        */
+                       u64 val;
+                       get_random_bytes(&val, sizeof(u64));
+                       drbd_set_ed_uuid(device, val);
+                       drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
+               }
                clear_bit(NEW_CUR_UUID, &device->flags);
        }
        drbd_suspend_io(device);
@@ -2909,6 +3070,486 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+/*
+ * The generic netlink dump callbacks are called outside the genl_lock(), so
+ * they cannot use the simple attribute parsing code which uses global
+ * attribute tables.
+ */
+static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
+{
+       const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
+       const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+       struct nlattr *nla;
+
+       nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
+                      DRBD_NLA_CFG_CONTEXT);
+       if (!nla)
+               return NULL;
+       return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
+}
+
+static void resource_to_info(struct resource_info *, struct drbd_resource *);
+
+int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct drbd_genlmsghdr *dh;
+       struct drbd_resource *resource;
+       struct resource_info resource_info;
+       struct resource_statistics resource_statistics;
+       int err;
+
+       rcu_read_lock();
+       if (cb->args[0]) {
+               for_each_resource_rcu(resource, &drbd_resources)
+                       if (resource == (struct drbd_resource *)cb->args[0])
+                               goto found_resource;
+               err = 0;  /* resource was probably deleted */
+               goto out;
+       }
+       resource = list_entry(&drbd_resources,
+                             struct drbd_resource, resources);
+
+found_resource:
+       list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
+               goto put_result;
+       }
+       err = 0;
+       goto out;
+
+put_result:
+       dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+                       cb->nlh->nlmsg_seq, &drbd_genl_family,
+                       NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
+       err = -ENOMEM;
+       if (!dh)
+               goto out;
+       dh->minor = -1U;
+       dh->ret_code = NO_ERROR;
+       err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
+       if (err)
+               goto out;
+       err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
+       if (err)
+               goto out;
+       resource_to_info(&resource_info, resource);
+       err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
+       if (err)
+               goto out;
+       resource_statistics.res_stat_write_ordering = resource->write_ordering;
+       err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
+       if (err)
+               goto out;
+       cb->args[0] = (long)resource;
+       genlmsg_end(skb, dh);
+       err = 0;
+
+out:
+       rcu_read_unlock();
+       if (err)
+               return err;
+       return skb->len;
+}
+
+static void device_to_statistics(struct device_statistics *s,
+                                struct drbd_device *device)
+{
+       memset(s, 0, sizeof(*s));
+       s->dev_upper_blocked = !may_inc_ap_bio(device);
+       if (get_ldev(device)) {
+               struct drbd_md *md = &device->ldev->md;
+               u64 *history_uuids = (u64 *)s->history_uuids;
+               struct request_queue *q;
+               int n;
+
+               spin_lock_irq(&md->uuid_lock);
+               s->dev_current_uuid = md->uuid[UI_CURRENT];
+               BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
+               for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
+                       history_uuids[n] = md->uuid[UI_HISTORY_START + n];
+               for (; n < HISTORY_UUIDS; n++)
+                       history_uuids[n] = 0;
+               s->history_uuids_len = HISTORY_UUIDS;
+               spin_unlock_irq(&md->uuid_lock);
+
+               s->dev_disk_flags = md->flags;
+               q = bdev_get_queue(device->ldev->backing_bdev);
+               s->dev_lower_blocked =
+                       bdi_congested(&q->backing_dev_info,
+                                     (1 << WB_async_congested) |
+                                     (1 << WB_sync_congested));
+               put_ldev(device);
+       }
+       s->dev_size = drbd_get_capacity(device->this_bdev);
+       s->dev_read = device->read_cnt;
+       s->dev_write = device->writ_cnt;
+       s->dev_al_writes = device->al_writ_cnt;
+       s->dev_bm_writes = device->bm_writ_cnt;
+       s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
+       s->dev_lower_pending = atomic_read(&device->local_cnt);
+       s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
+       s->dev_exposed_data_uuid = device->ed_uuid;
+}
+
+static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
+{
+       if (cb->args[0]) {
+               struct drbd_resource *resource =
+                       (struct drbd_resource *)cb->args[0];
+               kref_put(&resource->kref, drbd_destroy_resource);
+       }
+
+       return 0;
+}
+
+int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
+       return put_resource_in_arg0(cb, 7);
+}
+
+static void device_to_info(struct device_info *, struct drbd_device *);
+
+int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct nlattr *resource_filter;
+       struct drbd_resource *resource;
+       struct drbd_device *uninitialized_var(device);
+       int minor, err, retcode;
+       struct drbd_genlmsghdr *dh;
+       struct device_info device_info;
+       struct device_statistics device_statistics;
+       struct idr *idr_to_search;
+
+       resource = (struct drbd_resource *)cb->args[0];
+       if (!cb->args[0] && !cb->args[1]) {
+               resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+               if (resource_filter) {
+                       retcode = ERR_RES_NOT_KNOWN;
+                       resource = drbd_find_resource(nla_data(resource_filter));
+                       if (!resource)
+                               goto put_result;
+                       cb->args[0] = (long)resource;
+               }
+       }
+
+       rcu_read_lock();
+       minor = cb->args[1];
+       idr_to_search = resource ? &resource->devices : &drbd_devices;
+       device = idr_get_next(idr_to_search, &minor);
+       if (!device) {
+               err = 0;
+               goto out;
+       }
+       idr_for_each_entry_continue(idr_to_search, device, minor) {
+               retcode = NO_ERROR;
+               goto put_result;  /* only one iteration */
+       }
+       err = 0;
+       goto out;  /* no more devices */
+
+put_result:
+       dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+                       cb->nlh->nlmsg_seq, &drbd_genl_family,
+                       NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
+       err = -ENOMEM;
+       if (!dh)
+               goto out;
+       dh->ret_code = retcode;
+       dh->minor = -1U;
+       if (retcode == NO_ERROR) {
+               dh->minor = device->minor;
+               err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
+               if (err)
+                       goto out;
+               if (get_ldev(device)) {
+                       struct disk_conf *disk_conf =
+                               rcu_dereference(device->ldev->disk_conf);
+
+                       err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
+                       put_ldev(device);
+                       if (err)
+                               goto out;
+               }
+               device_to_info(&device_info, device);
+               err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
+               if (err)
+                       goto out;
+
+               device_to_statistics(&device_statistics, device);
+               err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
+               if (err)
+                       goto out;
+               cb->args[1] = minor + 1;
+       }
+       genlmsg_end(skb, dh);
+       err = 0;
+
+out:
+       rcu_read_unlock();
+       if (err)
+               return err;
+       return skb->len;
+}
+
+int drbd_adm_dump_connections_done(struct netlink_callback *cb)
+{
+       return put_resource_in_arg0(cb, 6);
+}
+
+enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
+
+int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct nlattr *resource_filter;
+       struct drbd_resource *resource = NULL, *next_resource;
+       struct drbd_connection *uninitialized_var(connection);
+       int err = 0, retcode;
+       struct drbd_genlmsghdr *dh;
+       struct connection_info connection_info;
+       struct connection_statistics connection_statistics;
+
+       rcu_read_lock();
+       resource = (struct drbd_resource *)cb->args[0];
+       if (!cb->args[0]) {
+               resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+               if (resource_filter) {
+                       retcode = ERR_RES_NOT_KNOWN;
+                       resource = drbd_find_resource(nla_data(resource_filter));
+                       if (!resource)
+                               goto put_result;
+                       cb->args[0] = (long)resource;
+                       cb->args[1] = SINGLE_RESOURCE;
+               }
+       }
+       if (!resource) {
+               if (list_empty(&drbd_resources))
+                       goto out;
+               resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
+               kref_get(&resource->kref);
+               cb->args[0] = (long)resource;
+               cb->args[1] = ITERATE_RESOURCES;
+       }
+
+    next_resource:
+       rcu_read_unlock();
+       mutex_lock(&resource->conf_update);
+       rcu_read_lock();
+       if (cb->args[2]) {
+               for_each_connection_rcu(connection, resource)
+                       if (connection == (struct drbd_connection *)cb->args[2])
+                               goto found_connection;
+               /* connection was probably deleted */
+               goto no_more_connections;
+       }
+       connection = list_entry(&resource->connections, struct drbd_connection, connections);
+
+found_connection:
+       list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
+               if (!has_net_conf(connection))
+                       continue;
+               retcode = NO_ERROR;
+               goto put_result;  /* only one iteration */
+       }
+
+no_more_connections:
+       if (cb->args[1] == ITERATE_RESOURCES) {
+               for_each_resource_rcu(next_resource, &drbd_resources) {
+                       if (next_resource == resource)
+                               goto found_resource;
+               }
+               /* resource was probably deleted */
+       }
+       goto out;
+
+found_resource:
+       list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
+               mutex_unlock(&resource->conf_update);
+               kref_put(&resource->kref, drbd_destroy_resource);
+               resource = next_resource;
+               kref_get(&resource->kref);
+               cb->args[0] = (long)resource;
+               cb->args[2] = 0;
+               goto next_resource;
+       }
+       goto out;  /* no more resources */
+
+put_result:
+       dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+                       cb->nlh->nlmsg_seq, &drbd_genl_family,
+                       NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
+       err = -ENOMEM;
+       if (!dh)
+               goto out;
+       dh->ret_code = retcode;
+       dh->minor = -1U;
+       if (retcode == NO_ERROR) {
+               struct net_conf *net_conf;
+
+               err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
+               if (err)
+                       goto out;
+               net_conf = rcu_dereference(connection->net_conf);
+               if (net_conf) {
+                       err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
+                       if (err)
+                               goto out;
+               }
+               connection_to_info(&connection_info, connection);
+               err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
+               if (err)
+                       goto out;
+               connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
+               err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
+               if (err)
+                       goto out;
+               cb->args[2] = (long)connection;
+       }
+       genlmsg_end(skb, dh);
+       err = 0;
+
+out:
+       rcu_read_unlock();
+       if (resource)
+               mutex_unlock(&resource->conf_update);
+       if (err)
+               return err;
+       return skb->len;
+}
+
+enum mdf_peer_flag {
+       MDF_PEER_CONNECTED =    1 << 0,
+       MDF_PEER_OUTDATED =     1 << 1,
+       MDF_PEER_FENCING =      1 << 2,
+       MDF_PEER_FULL_SYNC =    1 << 3,
+};
+
+static void peer_device_to_statistics(struct peer_device_statistics *s,
+                                     struct drbd_peer_device *peer_device)
+{
+       struct drbd_device *device = peer_device->device;
+
+       memset(s, 0, sizeof(*s));
+       s->peer_dev_received = device->recv_cnt;
+       s->peer_dev_sent = device->send_cnt;
+       s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
+                             atomic_read(&device->rs_pending_cnt);
+       s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
+       s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
+       s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
+       if (get_ldev(device)) {
+               struct drbd_md *md = &device->ldev->md;
+
+               spin_lock_irq(&md->uuid_lock);
+               s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
+               spin_unlock_irq(&md->uuid_lock);
+               s->peer_dev_flags =
+                       (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
+                               MDF_PEER_CONNECTED : 0) +
+                       (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
+                        !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
+                               MDF_PEER_OUTDATED : 0) +
+                       /* FIXME: MDF_PEER_FENCING? */
+                       (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
+                               MDF_PEER_FULL_SYNC : 0);
+               put_ldev(device);
+       }
+}
+
+int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
+{
+       return put_resource_in_arg0(cb, 9);
+}
+
+int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct nlattr *resource_filter;
+       struct drbd_resource *resource;
+       struct drbd_device *uninitialized_var(device);
+       struct drbd_peer_device *peer_device = NULL;
+       int minor, err, retcode;
+       struct drbd_genlmsghdr *dh;
+       struct idr *idr_to_search;
+
+       resource = (struct drbd_resource *)cb->args[0];
+       if (!cb->args[0] && !cb->args[1]) {
+               resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+               if (resource_filter) {
+                       retcode = ERR_RES_NOT_KNOWN;
+                       resource = drbd_find_resource(nla_data(resource_filter));
+                       if (!resource)
+                               goto put_result;
+               }
+               cb->args[0] = (long)resource;
+       }
+
+       rcu_read_lock();
+       minor = cb->args[1];
+       idr_to_search = resource ? &resource->devices : &drbd_devices;
+       device = idr_find(idr_to_search, minor);
+       if (!device) {
+next_device:
+               minor++;
+               cb->args[2] = 0;
+               device = idr_get_next(idr_to_search, &minor);
+               if (!device) {
+                       err = 0;
+                       goto out;
+               }
+       }
+       if (cb->args[2]) {
+               for_each_peer_device(peer_device, device)
+                       if (peer_device == (struct drbd_peer_device *)cb->args[2])
+                               goto found_peer_device;
+               /* peer device was probably deleted */
+               goto next_device;
+       }
+       /* Make peer_device point to the list head (not the first entry). */
+       peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
+
+found_peer_device:
+       list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
+               if (!has_net_conf(peer_device->connection))
+                       continue;
+               retcode = NO_ERROR;
+               goto put_result;  /* only one iteration */
+       }
+       goto next_device;
+
+put_result:
+       dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+                       cb->nlh->nlmsg_seq, &drbd_genl_family,
+                       NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
+       err = -ENOMEM;
+       if (!dh)
+               goto out;
+       dh->ret_code = retcode;
+       dh->minor = -1U;
+       if (retcode == NO_ERROR) {
+               struct peer_device_info peer_device_info;
+               struct peer_device_statistics peer_device_statistics;
+
+               dh->minor = minor;
+               err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
+               if (err)
+                       goto out;
+               peer_device_to_info(&peer_device_info, peer_device);
+               err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
+               if (err)
+                       goto out;
+               peer_device_to_statistics(&peer_device_statistics, peer_device);
+               err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
+               if (err)
+                       goto out;
+               cb->args[1] = minor;
+               cb->args[2] = (long)peer_device;
+       }
+       genlmsg_end(skb, dh);
+       err = 0;
+
+out:
+       rcu_read_unlock();
+       if (err)
+               return err;
+       return skb->len;
+}
 /*
  * Return the connection of @resource if @resource has exactly one connection.
  */
@@ -3414,8 +4055,18 @@ drbd_check_resource_name(struct drbd_config_context *adm_ctx)
        return NO_ERROR;
 }
 
+static void resource_to_info(struct resource_info *info,
+                            struct drbd_resource *resource)
+{
+       info->res_role = conn_highest_role(first_connection(resource));
+       info->res_susp = resource->susp;
+       info->res_susp_nod = resource->susp_nod;
+       info->res_susp_fen = resource->susp_fen;
+}
+
 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 {
+       struct drbd_connection *connection;
        struct drbd_config_context adm_ctx;
        enum drbd_ret_code retcode;
        struct res_opts res_opts;
@@ -3449,13 +4100,33 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
        }
 
        /* not yet safe for genl_family.parallel_ops */
-       if (!conn_create(adm_ctx.resource_name, &res_opts))
+       mutex_lock(&resources_mutex);
+       connection = conn_create(adm_ctx.resource_name, &res_opts);
+       mutex_unlock(&resources_mutex);
+
+       if (connection) {
+               struct resource_info resource_info;
+
+               mutex_lock(&notification_mutex);
+               resource_to_info(&resource_info, connection->resource);
+               notify_resource_state(NULL, 0, connection->resource,
+                                     &resource_info, NOTIFY_CREATE);
+               mutex_unlock(&notification_mutex);
+       } else
                retcode = ERR_NOMEM;
+
 out:
        drbd_adm_finish(&adm_ctx, info, retcode);
        return 0;
 }
 
+static void device_to_info(struct device_info *info,
+                          struct drbd_device *device)
+{
+       info->dev_disk_state = device->state.disk;
+}
+
+
 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 {
        struct drbd_config_context adm_ctx;
@@ -3490,6 +4161,36 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 
        mutex_lock(&adm_ctx.resource->adm_mutex);
        retcode = drbd_create_device(&adm_ctx, dh->minor);
+       if (retcode == NO_ERROR) {
+               struct drbd_device *device;
+               struct drbd_peer_device *peer_device;
+               struct device_info info;
+               unsigned int peer_devices = 0;
+               enum drbd_notification_type flags;
+
+               device = minor_to_device(dh->minor);
+               for_each_peer_device(peer_device, device) {
+                       if (!has_net_conf(peer_device->connection))
+                               continue;
+                       peer_devices++;
+               }
+
+               device_to_info(&info, device);
+               mutex_lock(&notification_mutex);
+               flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+               notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
+               for_each_peer_device(peer_device, device) {
+                       struct peer_device_info peer_device_info;
+
+                       if (!has_net_conf(peer_device->connection))
+                               continue;
+                       peer_device_to_info(&peer_device_info, peer_device);
+                       flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+                       notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
+                                                NOTIFY_CREATE | flags);
+               }
+               mutex_unlock(&notification_mutex);
+       }
        mutex_unlock(&adm_ctx.resource->adm_mutex);
 out:
        drbd_adm_finish(&adm_ctx, info, retcode);
@@ -3498,13 +4199,35 @@ out:
 
 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
 {
+       struct drbd_peer_device *peer_device;
+
        if (device->state.disk == D_DISKLESS &&
            /* no need to be device->state.conn == C_STANDALONE &&
             * we may want to delete a minor from a live replication group.
             */
            device->state.role == R_SECONDARY) {
+               struct drbd_connection *connection =
+                       first_connection(device->resource);
+
                _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
                                    CS_VERBOSE + CS_WAIT_COMPLETE);
+
+               /* If the state engine hasn't stopped the sender thread yet, we
+                * need to flush the sender work queue before generating the
+                * DESTROY events here. */
+               if (get_t_state(&connection->worker) == RUNNING)
+                       drbd_flush_workqueue(&connection->sender_work);
+
+               mutex_lock(&notification_mutex);
+               for_each_peer_device(peer_device, device) {
+                       if (!has_net_conf(peer_device->connection))
+                               continue;
+                       notify_peer_device_state(NULL, 0, peer_device, NULL,
+                                                NOTIFY_DESTROY | NOTIFY_CONTINUES);
+               }
+               notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
+               mutex_unlock(&notification_mutex);
+
                drbd_delete_device(device);
                return NO_ERROR;
        } else
@@ -3541,7 +4264,16 @@ static int adm_del_resource(struct drbd_resource *resource)
        if (!idr_is_empty(&resource->devices))
                return ERR_RES_IN_USE;
 
+       /* The state engine has stopped the sender thread, so we don't
+        * need to flush the sender work queue before generating the
+        * DESTROY event here. */
+       mutex_lock(&notification_mutex);
+       notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
+       mutex_unlock(&notification_mutex);
+
+       mutex_lock(&resources_mutex);
        list_del_rcu(&resource->resources);
+       mutex_unlock(&resources_mutex);
        /* Make sure all threads have actually stopped: state handling only
         * does drbd_thread_stop_nowait(). */
        list_for_each_entry(connection, &resource->connections, connections)
@@ -3637,7 +4369,6 @@ finish:
 
 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
 {
-       static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
        struct sk_buff *msg;
        struct drbd_genlmsghdr *d_out;
        unsigned seq;
@@ -3658,7 +4389,7 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
        if (nla_put_status_info(msg, device, sib))
                goto nla_put_failure;
        genlmsg_end(msg, d_out);
-       err = drbd_genl_multicast_events(msg, 0);
+       err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
        /* msg has been consumed or freed in netlink_broadcast() */
        if (err && err != -ESRCH)
                goto failed;
@@ -3672,3 +4403,405 @@ failed:
                        "Event seq:%u sib_reason:%u\n",
                        err, seq, sib->sib_reason);
 }
+
+static int nla_put_notification_header(struct sk_buff *msg,
+                                      enum drbd_notification_type type)
+{
+       struct drbd_notification_header nh = {
+               .nh_type = type,
+       };
+
+       return drbd_notification_header_to_skb(msg, &nh, true);
+}
+
+void notify_resource_state(struct sk_buff *skb,
+                          unsigned int seq,
+                          struct drbd_resource *resource,
+                          struct resource_info *resource_info,
+                          enum drbd_notification_type type)
+{
+       struct resource_statistics resource_statistics;
+       struct drbd_genlmsghdr *dh;
+       bool multicast = false;
+       int err;
+
+       if (!skb) {
+               seq = atomic_inc_return(&notify_genl_seq);
+               skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+               err = -ENOMEM;
+               if (!skb)
+                       goto failed;
+               multicast = true;
+       }
+
+       err = -EMSGSIZE;
+       dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
+       if (!dh)
+               goto nla_put_failure;
+       dh->minor = -1U;
+       dh->ret_code = NO_ERROR;
+       if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
+           nla_put_notification_header(skb, type) ||
+           ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+            resource_info_to_skb(skb, resource_info, true)))
+               goto nla_put_failure;
+       resource_statistics.res_stat_write_ordering = resource->write_ordering;
+       err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
+       if (err)
+               goto nla_put_failure;
+       genlmsg_end(skb, dh);
+       if (multicast) {
+               err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+               /* skb has been consumed or freed in netlink_broadcast() */
+               if (err && err != -ESRCH)
+                       goto failed;
+       }
+       return;
+
+nla_put_failure:
+       nlmsg_free(skb);
+failed:
+       drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
+                       err, seq);
+}
+
+void notify_device_state(struct sk_buff *skb,
+                        unsigned int seq,
+                        struct drbd_device *device,
+                        struct device_info *device_info,
+                        enum drbd_notification_type type)
+{
+       struct device_statistics device_statistics;
+       struct drbd_genlmsghdr *dh;
+       bool multicast = false;
+       int err;
+
+       if (!skb) {
+               seq = atomic_inc_return(&notify_genl_seq);
+               skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+               err = -ENOMEM;
+               if (!skb)
+                       goto failed;
+               multicast = true;
+       }
+
+       err = -EMSGSIZE;
+       dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
+       if (!dh)
+               goto nla_put_failure;
+       dh->minor = device->minor;
+       dh->ret_code = NO_ERROR;
+       if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
+           nla_put_notification_header(skb, type) ||
+           ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+            device_info_to_skb(skb, device_info, true)))
+               goto nla_put_failure;
+       device_to_statistics(&device_statistics, device);
+       device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
+       genlmsg_end(skb, dh);
+       if (multicast) {
+               err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+               /* skb has been consumed or freed in netlink_broadcast() */
+               if (err && err != -ESRCH)
+                       goto failed;
+       }
+       return;
+
+nla_put_failure:
+       nlmsg_free(skb);
+failed:
+       drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
+                err, seq);
+}
+
+void notify_connection_state(struct sk_buff *skb,
+                            unsigned int seq,
+                            struct drbd_connection *connection,
+                            struct connection_info *connection_info,
+                            enum drbd_notification_type type)
+{
+       struct connection_statistics connection_statistics;
+       struct drbd_genlmsghdr *dh;
+       bool multicast = false;
+       int err;
+
+       if (!skb) {
+               seq = atomic_inc_return(&notify_genl_seq);
+               skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+               err = -ENOMEM;
+               if (!skb)
+                       goto failed;
+               multicast = true;
+       }
+
+       err = -EMSGSIZE;
+       dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
+       if (!dh)
+               goto nla_put_failure;
+       dh->minor = -1U;
+       dh->ret_code = NO_ERROR;
+       if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
+           nla_put_notification_header(skb, type) ||
+           ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+            connection_info_to_skb(skb, connection_info, true)))
+               goto nla_put_failure;
+       connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
+       connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
+       genlmsg_end(skb, dh);
+       if (multicast) {
+               err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+               /* skb has been consumed or freed in netlink_broadcast() */
+               if (err && err != -ESRCH)
+                       goto failed;
+       }
+       return;
+
+nla_put_failure:
+       nlmsg_free(skb);
+failed:
+       drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
+                err, seq);
+}
+
+void notify_peer_device_state(struct sk_buff *skb,
+                             unsigned int seq,
+                             struct drbd_peer_device *peer_device,
+                             struct peer_device_info *peer_device_info,
+                             enum drbd_notification_type type)
+{
+       struct peer_device_statistics peer_device_statistics;
+       struct drbd_resource *resource = peer_device->device->resource;
+       struct drbd_genlmsghdr *dh;
+       bool multicast = false;
+       int err;
+
+       if (!skb) {
+               seq = atomic_inc_return(&notify_genl_seq);
+               skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+               err = -ENOMEM;
+               if (!skb)
+                       goto failed;
+               multicast = true;
+       }
+
+       err = -EMSGSIZE;
+       dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
+       if (!dh)
+               goto nla_put_failure;
+       dh->minor = -1U;
+       dh->ret_code = NO_ERROR;
+       if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
+           nla_put_notification_header(skb, type) ||
+           ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+            peer_device_info_to_skb(skb, peer_device_info, true)))
+               goto nla_put_failure;
+       peer_device_to_statistics(&peer_device_statistics, peer_device);
+       peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
+       genlmsg_end(skb, dh);
+       if (multicast) {
+               err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+               /* skb has been consumed or freed in netlink_broadcast() */
+               if (err && err != -ESRCH)
+                       goto failed;
+       }
+       return;
+
+nla_put_failure:
+       nlmsg_free(skb);
+failed:
+       drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
+                err, seq);
+}
+
+void notify_helper(enum drbd_notification_type type,
+                  struct drbd_device *device, struct drbd_connection *connection,
+                  const char *name, int status)
+{
+       struct drbd_resource *resource = device ? device->resource : connection->resource;
+       struct drbd_helper_info helper_info;
+       unsigned int seq = atomic_inc_return(&notify_genl_seq);
+       struct sk_buff *skb = NULL;
+       struct drbd_genlmsghdr *dh;
+       int err;
+
+       strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
+       helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
+       helper_info.helper_status = status;
+
+       skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+       err = -ENOMEM;
+       if (!skb)
+               goto fail;
+
+       err = -EMSGSIZE;
+       dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
+       if (!dh)
+               goto fail;
+       dh->minor = device ? device->minor : -1;
+       dh->ret_code = NO_ERROR;
+       mutex_lock(&notification_mutex);
+       if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
+           nla_put_notification_header(skb, type) ||
+           drbd_helper_info_to_skb(skb, &helper_info, true))
+               goto unlock_fail;
+       genlmsg_end(skb, dh);
+       err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+       skb = NULL;
+       /* skb has been consumed or freed in netlink_broadcast() */
+       if (err && err != -ESRCH)
+               goto unlock_fail;
+       mutex_unlock(&notification_mutex);
+       return;
+
+unlock_fail:
+       mutex_unlock(&notification_mutex);
+fail:
+       nlmsg_free(skb);
+       drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
+                err, seq);
+}
+
+static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+{
+       struct drbd_genlmsghdr *dh;
+       int err;
+
+       err = -EMSGSIZE;
+       dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
+       if (!dh)
+               goto nla_put_failure;
+       dh->minor = -1U;
+       dh->ret_code = NO_ERROR;
+       if (nla_put_notification_header(skb, NOTIFY_EXISTS))
+               goto nla_put_failure;
+       genlmsg_end(skb, dh);
+       return;
+
+nla_put_failure:
+       nlmsg_free(skb);
+       pr_err("Error %d sending event. Event seq:%u\n", err, seq);
+}
+
+static void free_state_changes(struct list_head *list)
+{
+       while (!list_empty(list)) {
+               struct drbd_state_change *state_change =
+                       list_first_entry(list, struct drbd_state_change, list);
+               list_del(&state_change->list);
+               forget_state_change(state_change);
+       }
+}
+
+static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
+{
+       return 1 +
+              state_change->n_connections +
+              state_change->n_devices +
+              state_change->n_devices * state_change->n_connections;
+}
+
+static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
+       unsigned int seq = cb->args[2];
+       unsigned int n;
+       enum drbd_notification_type flags = 0;
+
+       /* There is no need for taking notification_mutex here: it doesn't
+          matter if the initial state events mix with later state chage
+          events; we can always tell the events apart by the NOTIFY_EXISTS
+          flag. */
+
+       cb->args[5]--;
+       if (cb->args[5] == 1) {
+               notify_initial_state_done(skb, seq);
+               goto out;
+       }
+       n = cb->args[4]++;
+       if (cb->args[4] < cb->args[3])
+               flags |= NOTIFY_CONTINUES;
+       if (n < 1) {
+               notify_resource_state_change(skb, seq, state_change->resource,
+                                            NOTIFY_EXISTS | flags);
+               goto next;
+       }
+       n--;
+       if (n < state_change->n_connections) {
+               notify_connection_state_change(skb, seq, &state_change->connections[n],
+                                              NOTIFY_EXISTS | flags);
+               goto next;
+       }
+       n -= state_change->n_connections;
+       if (n < state_change->n_devices) {
+               notify_device_state_change(skb, seq, &state_change->devices[n],
+                                          NOTIFY_EXISTS | flags);
+               goto next;
+       }
+       n -= state_change->n_devices;
+       if (n < state_change->n_devices * state_change->n_connections) {
+               notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
+                                               NOTIFY_EXISTS | flags);
+               goto next;
+       }
+
+next:
+       if (cb->args[4] == cb->args[3]) {
+               struct drbd_state_change *next_state_change =
+                       list_entry(state_change->list.next,
+                                  struct drbd_state_change, list);
+               cb->args[0] = (long)next_state_change;
+               cb->args[3] = notifications_for_state_change(next_state_change);
+               cb->args[4] = 0;
+       }
+out:
+       return skb->len;
+}
+
+int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct drbd_resource *resource;
+       LIST_HEAD(head);
+
+       if (cb->args[5] >= 1) {
+               if (cb->args[5] > 1)
+                       return get_initial_state(skb, cb);
+               if (cb->args[0]) {
+                       struct drbd_state_change *state_change =
+                               (struct drbd_state_change *)cb->args[0];
+
+                       /* connect list to head */
+                       list_add(&head, &state_change->list);
+                       free_state_changes(&head);
+               }
+               return 0;
+       }
+
+       cb->args[5] = 2;  /* number of iterations */
+       mutex_lock(&resources_mutex);
+       for_each_resource(resource, &drbd_resources) {
+               struct drbd_state_change *state_change;
+
+               state_change = remember_old_state(resource, GFP_KERNEL);
+               if (!state_change) {
+                       if (!list_empty(&head))
+                               free_state_changes(&head);
+                       mutex_unlock(&resources_mutex);
+                       return -ENOMEM;
+               }
+               copy_old_to_new_state_change(state_change);
+               list_add_tail(&state_change->list, &head);
+               cb->args[5] += notifications_for_state_change(state_change);
+       }
+       mutex_unlock(&resources_mutex);
+
+       if (!list_empty(&head)) {
+               struct drbd_state_change *state_change =
+                       list_entry(head.next, struct drbd_state_change, list);
+               cb->args[0] = (long)state_change;
+               cb->args[3] = notifications_for_state_change(state_change);
+               list_del(&head);  /* detach list from head */
+       }
+
+       cb->args[2] = cb->nlh->nlmsg_seq;
+       return get_initial_state(skb, cb);
+}
index 3b10fa6cb039339a2301350862b7d87e95c6a0cb..6537b25db9c1afaafdb35c61b83b6be431109922 100644 (file)
@@ -245,9 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
        char wp;
 
        static char write_ordering_chars[] = {
-               [WO_none] = 'n',
-               [WO_drain_io] = 'd',
-               [WO_bdev_flush] = 'f',
+               [WO_NONE] = 'n',
+               [WO_DRAIN_IO] = 'd',
+               [WO_BDEV_FLUSH] = 'f',
        };
 
        seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
index 2da9104a3851813010eae268fa778c82ac5b2d4c..ef9245363dccc6183e680083d764404e2e6acd16 100644 (file)
@@ -23,7 +23,7 @@ enum drbd_packet {
        P_AUTH_RESPONSE       = 0x11,
        P_STATE_CHG_REQ       = 0x12,
 
-       /* asender (meta socket */
+       /* (meta socket) */
        P_PING                = 0x13,
        P_PING_ACK            = 0x14,
        P_RECV_ACK            = 0x15, /* Used in protocol B */
index b4b5680ac6adb1dcdbda2428b08398d38e2c8d4b..1957fe8601dcbb60ee2b8d3aa9367104fd503daf 100644 (file)
@@ -215,7 +215,7 @@ static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
        }
 }
 
-static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
+static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
 {
        LIST_HEAD(reclaimed);
        struct drbd_peer_request *peer_req, *t;
@@ -223,11 +223,30 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
        spin_lock_irq(&device->resource->req_lock);
        reclaim_finished_net_peer_reqs(device, &reclaimed);
        spin_unlock_irq(&device->resource->req_lock);
-
        list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
                drbd_free_net_peer_req(device, peer_req);
 }
 
+static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
+{
+       struct drbd_peer_device *peer_device;
+       int vnr;
+
+       rcu_read_lock();
+       idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+               struct drbd_device *device = peer_device->device;
+               if (!atomic_read(&device->pp_in_use_by_net))
+                       continue;
+
+               kref_get(&device->kref);
+               rcu_read_unlock();
+               drbd_reclaim_net_peer_reqs(device);
+               kref_put(&device->kref, drbd_destroy_device);
+               rcu_read_lock();
+       }
+       rcu_read_unlock();
+}
+
 /**
  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
  * @device:    DRBD device.
@@ -265,10 +284,15 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
        if (atomic_read(&device->pp_in_use) < mxb)
                page = __drbd_alloc_pages(device, number);
 
+       /* Try to keep the fast path fast, but occasionally we need
+        * to reclaim the pages we lended to the network stack. */
+       if (page && atomic_read(&device->pp_in_use_by_net) > 512)
+               drbd_reclaim_net_peer_reqs(device);
+
        while (page == NULL) {
                prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
 
-               drbd_kick_lo_and_reclaim_net(device);
+               drbd_reclaim_net_peer_reqs(device);
 
                if (atomic_read(&device->pp_in_use) < mxb) {
                        page = __drbd_alloc_pages(device, number);
@@ -1099,7 +1123,15 @@ randomize:
                return 0;
        }
 
-       drbd_thread_start(&connection->asender);
+       drbd_thread_start(&connection->ack_receiver);
+       /* opencoded create_singlethread_workqueue(),
+        * to be able to use format string arguments */
+       connection->ack_sender =
+               alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
+       if (!connection->ack_sender) {
+               drbd_err(connection, "Failed to create workqueue ack_sender\n");
+               return 0;
+       }
 
        mutex_lock(&connection->resource->conf_update);
        /* The discard_my_data flag is a single-shot modifier to the next
@@ -1178,7 +1210,7 @@ static void drbd_flush(struct drbd_connection *connection)
        struct drbd_peer_device *peer_device;
        int vnr;
 
-       if (connection->resource->write_ordering >= WO_bdev_flush) {
+       if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
                rcu_read_lock();
                idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
                        struct drbd_device *device = peer_device->device;
@@ -1203,7 +1235,7 @@ static void drbd_flush(struct drbd_connection *connection)
                                /* would rather check on EOPNOTSUPP, but that is not reliable.
                                 * don't try again for ANY return value != 0
                                 * if (rv == -EOPNOTSUPP) */
-                               drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
+                               drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
                        }
                        put_ldev(device);
                        kref_put(&device->kref, drbd_destroy_device);
@@ -1299,10 +1331,10 @@ max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
 
        dc = rcu_dereference(bdev->disk_conf);
 
-       if (wo == WO_bdev_flush && !dc->disk_flushes)
-               wo = WO_drain_io;
-       if (wo == WO_drain_io && !dc->disk_drain)
-               wo = WO_none;
+       if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
+               wo = WO_DRAIN_IO;
+       if (wo == WO_DRAIN_IO && !dc->disk_drain)
+               wo = WO_NONE;
 
        return wo;
 }
@@ -1319,13 +1351,13 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
        enum write_ordering_e pwo;
        int vnr;
        static char *write_ordering_str[] = {
-               [WO_none] = "none",
-               [WO_drain_io] = "drain",
-               [WO_bdev_flush] = "flush",
+               [WO_NONE] = "none",
+               [WO_DRAIN_IO] = "drain",
+               [WO_BDEV_FLUSH] = "flush",
        };
 
        pwo = resource->write_ordering;
-       if (wo != WO_bdev_flush)
+       if (wo != WO_BDEV_FLUSH)
                wo = min(pwo, wo);
        rcu_read_lock();
        idr_for_each_entry(&resource->devices, device, vnr) {
@@ -1343,7 +1375,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
        rcu_read_unlock();
 
        resource->write_ordering = wo;
-       if (pwo != resource->write_ordering || wo == WO_bdev_flush)
+       if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
                drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
 }
 
@@ -1380,7 +1412,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
        if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
                /* wait for all pending IO completions, before we start
                 * zeroing things out. */
-               conn_wait_active_ee_empty(first_peer_device(device)->connection);
+               conn_wait_active_ee_empty(peer_req->peer_device->connection);
                /* add it to the active list now,
                 * so we can find it to present it in debugfs */
                peer_req->submit_jif = jiffies;
@@ -1508,12 +1540,6 @@ static void conn_wait_active_ee_empty(struct drbd_connection *connection)
        rcu_read_unlock();
 }
 
-static struct drbd_peer_device *
-conn_peer_device(struct drbd_connection *connection, int volume_number)
-{
-       return idr_find(&connection->peer_devices, volume_number);
-}
-
 static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
 {
        int rv;
@@ -1533,7 +1559,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
         * Therefore we must send the barrier_ack after the barrier request was
         * completed. */
        switch (connection->resource->write_ordering) {
-       case WO_none:
+       case WO_NONE:
                if (rv == FE_RECYCLED)
                        return 0;
 
@@ -1546,8 +1572,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
                        drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
                        /* Fall through */
 
-       case WO_bdev_flush:
-       case WO_drain_io:
+       case WO_BDEV_FLUSH:
+       case WO_DRAIN_IO:
                conn_wait_active_ee_empty(connection);
                drbd_flush(connection);
 
@@ -1752,7 +1778,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
 }
 
 /*
- * e_end_resync_block() is called in asender context via
+ * e_end_resync_block() is called in ack_sender context via
  * drbd_finish_peer_reqs().
  */
 static int e_end_resync_block(struct drbd_work *w, int unused)
@@ -1926,7 +1952,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
 }
 
 /*
- * e_end_block() is called in asender context via drbd_finish_peer_reqs().
+ * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
  */
 static int e_end_block(struct drbd_work *w, int cancel)
 {
@@ -1966,7 +1992,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
        } else
                D_ASSERT(device, drbd_interval_empty(&peer_req->i));
 
-       drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+       drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
 
        return err;
 }
@@ -2098,7 +2124,7 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
                }
 
                rcu_read_lock();
-               tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
+               tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
                rcu_read_unlock();
 
                if (!tp)
@@ -2217,7 +2243,7 @@ static int handle_write_conflicts(struct drbd_device *device,
                        peer_req->w.cb = superseded ? e_send_superseded :
                                                   e_send_retry_write;
                        list_add_tail(&peer_req->w.list, &device->done_ee);
-                       wake_asender(connection);
+                       queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
 
                        err = -ENOENT;
                        goto out;
@@ -2364,7 +2390,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
        if (dp_flags & DP_SEND_RECEIVE_ACK) {
                /* I really don't like it that the receiver thread
                 * sends on the msock, but anyways */
-               drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
+               drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
        }
 
        if (tp) {
@@ -4056,7 +4082,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
        os = ns = drbd_read_state(device);
        spin_unlock_irq(&device->resource->req_lock);
 
-       /* If some other part of the code (asender thread, timeout)
+       /* If some other part of the code (ack_receiver thread, timeout)
         * already decided to close the connection again,
         * we must not "re-establish" it here. */
        if (os.conn <= C_TEAR_DOWN)
@@ -4661,8 +4687,12 @@ static void conn_disconnect(struct drbd_connection *connection)
         */
        conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 
-       /* asender does not clean up anything. it must not interfere, either */
-       drbd_thread_stop(&connection->asender);
+       /* ack_receiver does not clean up anything. it must not interfere, either */
+       drbd_thread_stop(&connection->ack_receiver);
+       if (connection->ack_sender) {
+               destroy_workqueue(connection->ack_sender);
+               connection->ack_sender = NULL;
+       }
        drbd_free_sock(connection);
 
        rcu_read_lock();
@@ -5431,49 +5461,39 @@ static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
        return 0;
 }
 
-static int connection_finish_peer_reqs(struct drbd_connection *connection)
+struct meta_sock_cmd {
+       size_t pkt_size;
+       int (*fn)(struct drbd_connection *connection, struct packet_info *);
+};
+
+static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
 {
-       struct drbd_peer_device *peer_device;
-       int vnr, not_empty = 0;
+       long t;
+       struct net_conf *nc;
 
-       do {
-               clear_bit(SIGNAL_ASENDER, &connection->flags);
-               flush_signals(current);
+       rcu_read_lock();
+       nc = rcu_dereference(connection->net_conf);
+       t = ping_timeout ? nc->ping_timeo : nc->ping_int;
+       rcu_read_unlock();
 
-               rcu_read_lock();
-               idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
-                       struct drbd_device *device = peer_device->device;
-                       kref_get(&device->kref);
-                       rcu_read_unlock();
-                       if (drbd_finish_peer_reqs(device)) {
-                               kref_put(&device->kref, drbd_destroy_device);
-                               return 1;
-                       }
-                       kref_put(&device->kref, drbd_destroy_device);
-                       rcu_read_lock();
-               }
-               set_bit(SIGNAL_ASENDER, &connection->flags);
+       t *= HZ;
+       if (ping_timeout)
+               t /= 10;
 
-               spin_lock_irq(&connection->resource->req_lock);
-               idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
-                       struct drbd_device *device = peer_device->device;
-                       not_empty = !list_empty(&device->done_ee);
-                       if (not_empty)
-                               break;
-               }
-               spin_unlock_irq(&connection->resource->req_lock);
-               rcu_read_unlock();
-       } while (not_empty);
+       connection->meta.socket->sk->sk_rcvtimeo = t;
+}
 
-       return 0;
+static void set_ping_timeout(struct drbd_connection *connection)
+{
+       set_rcvtimeo(connection, 1);
 }
 
-struct asender_cmd {
-       size_t pkt_size;
-       int (*fn)(struct drbd_connection *connection, struct packet_info *);
-};
+static void set_idle_timeout(struct drbd_connection *connection)
+{
+       set_rcvtimeo(connection, 0);
+}
 
-static struct asender_cmd asender_tbl[] = {
+static struct meta_sock_cmd ack_receiver_tbl[] = {
        [P_PING]            = { 0, got_Ping },
        [P_PING_ACK]        = { 0, got_PingAck },
        [P_RECV_ACK]        = { sizeof(struct p_block_ack), got_BlockAck },
@@ -5493,64 +5513,40 @@ static struct asender_cmd asender_tbl[] = {
        [P_RETRY_WRITE]     = { sizeof(struct p_block_ack), got_BlockAck },
 };
 
-int drbd_asender(struct drbd_thread *thi)
+int drbd_ack_receiver(struct drbd_thread *thi)
 {
        struct drbd_connection *connection = thi->connection;
-       struct asender_cmd *cmd = NULL;
+       struct meta_sock_cmd *cmd = NULL;
        struct packet_info pi;
+       unsigned long pre_recv_jif;
        int rv;
        void *buf    = connection->meta.rbuf;
        int received = 0;
        unsigned int header_size = drbd_header_size(connection);
        int expect   = header_size;
        bool ping_timeout_active = false;
-       struct net_conf *nc;
-       int ping_timeo, tcp_cork, ping_int;
        struct sched_param param = { .sched_priority = 2 };
 
        rv = sched_setscheduler(current, SCHED_RR, &param);
        if (rv < 0)
-               drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
+               drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
 
        while (get_t_state(thi) == RUNNING) {
                drbd_thread_current_set_cpu(thi);
 
-               rcu_read_lock();
-               nc = rcu_dereference(connection->net_conf);
-               ping_timeo = nc->ping_timeo;
-               tcp_cork = nc->tcp_cork;
-               ping_int = nc->ping_int;
-               rcu_read_unlock();
+               conn_reclaim_net_peer_reqs(connection);
 
                if (test_and_clear_bit(SEND_PING, &connection->flags)) {
                        if (drbd_send_ping(connection)) {
                                drbd_err(connection, "drbd_send_ping has failed\n");
                                goto reconnect;
                        }
-                       connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
+                       set_ping_timeout(connection);
                        ping_timeout_active = true;
                }
 
-               /* TODO: conditionally cork; it may hurt latency if we cork without
-                  much to send */
-               if (tcp_cork)
-                       drbd_tcp_cork(connection->meta.socket);
-               if (connection_finish_peer_reqs(connection)) {
-                       drbd_err(connection, "connection_finish_peer_reqs() failed\n");
-                       goto reconnect;
-               }
-               /* but unconditionally uncork unless disabled */
-               if (tcp_cork)
-                       drbd_tcp_uncork(connection->meta.socket);
-
-               /* short circuit, recv_msg would return EINTR anyways. */
-               if (signal_pending(current))
-                       continue;
-
+               pre_recv_jif = jiffies;
                rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
-               clear_bit(SIGNAL_ASENDER, &connection->flags);
-
-               flush_signals(current);
 
                /* Note:
                 * -EINTR        (on meta) we got a signal
@@ -5562,7 +5558,6 @@ int drbd_asender(struct drbd_thread *thi)
                 * rv <  expected: "woken" by signal during receive
                 * rv == 0       : "connection shut down by peer"
                 */
-received_more:
                if (likely(rv > 0)) {
                        received += rv;
                        buf      += rv;
@@ -5584,8 +5579,7 @@ received_more:
                } else if (rv == -EAGAIN) {
                        /* If the data socket received something meanwhile,
                         * that is good enough: peer is still alive. */
-                       if (time_after(connection->last_received,
-                               jiffies - connection->meta.socket->sk->sk_rcvtimeo))
+                       if (time_after(connection->last_received, pre_recv_jif))
                                continue;
                        if (ping_timeout_active) {
                                drbd_err(connection, "PingAck did not arrive in time.\n");
@@ -5594,6 +5588,10 @@ received_more:
                        set_bit(SEND_PING, &connection->flags);
                        continue;
                } else if (rv == -EINTR) {
+                       /* maybe drbd_thread_stop(): the while condition will notice.
+                        * maybe woken for send_ping: we'll send a ping above,
+                        * and change the rcvtimeo */
+                       flush_signals(current);
                        continue;
                } else {
                        drbd_err(connection, "sock_recvmsg returned %d\n", rv);
@@ -5603,8 +5601,8 @@ received_more:
                if (received == expect && cmd == NULL) {
                        if (decode_header(connection, connection->meta.rbuf, &pi))
                                goto reconnect;
-                       cmd = &asender_tbl[pi.cmd];
-                       if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
+                       cmd = &ack_receiver_tbl[pi.cmd];
+                       if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
                                drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
                                         cmdname(pi.cmd), pi.cmd);
                                goto disconnect;
@@ -5627,9 +5625,8 @@ received_more:
 
                        connection->last_received = jiffies;
 
-                       if (cmd == &asender_tbl[P_PING_ACK]) {
-                               /* restore idle timeout */
-                               connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
+                       if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
+                               set_idle_timeout(connection);
                                ping_timeout_active = false;
                        }
 
@@ -5638,11 +5635,6 @@ received_more:
                        expect   = header_size;
                        cmd      = NULL;
                }
-               if (test_bit(SEND_PING, &connection->flags))
-                       continue;
-               rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT);
-               if (rv > 0)
-                       goto received_more;
        }
 
        if (0) {
@@ -5654,9 +5646,41 @@ reconnect:
 disconnect:
                conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
        }
-       clear_bit(SIGNAL_ASENDER, &connection->flags);
 
-       drbd_info(connection, "asender terminated\n");
+       drbd_info(connection, "ack_receiver terminated\n");
 
        return 0;
 }
+
+void drbd_send_acks_wf(struct work_struct *ws)
+{
+       struct drbd_peer_device *peer_device =
+               container_of(ws, struct drbd_peer_device, send_acks_work);
+       struct drbd_connection *connection = peer_device->connection;
+       struct drbd_device *device = peer_device->device;
+       struct net_conf *nc;
+       int tcp_cork, err;
+
+       rcu_read_lock();
+       nc = rcu_dereference(connection->net_conf);
+       tcp_cork = nc->tcp_cork;
+       rcu_read_unlock();
+
+       if (tcp_cork)
+               drbd_tcp_cork(connection->meta.socket);
+
+       err = drbd_finish_peer_reqs(device);
+       kref_put(&device->kref, drbd_destroy_device);
+       /* get is in drbd_endio_write_sec_final(). That is necessary to keep the
+          struct work_struct send_acks_work alive, which is in the peer_device object */
+
+       if (err) {
+               conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
+               return;
+       }
+
+       if (tcp_cork)
+               drbd_tcp_uncork(connection->meta.socket);
+
+       return;
+}
index 3ae2c00865635f889e4040d0e218786c237ff3e6..2255dcfebd2b514d2373424718e1ad4831a0c3ab 100644 (file)
@@ -453,12 +453,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
                kref_get(&req->kref); /* wait for the DONE */
 
        if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
-               /* potentially already completed in the asender thread */
+               /* potentially already completed in the ack_receiver thread */
                if (!(s & RQ_NET_DONE)) {
                        atomic_add(req->i.size >> 9, &device->ap_in_flight);
                        set_if_null_req_not_net_done(peer_device, req);
                }
-               if (s & RQ_NET_PENDING)
+               if (req->rq_state & RQ_NET_PENDING)
                        set_if_null_req_ack_pending(peer_device, req);
        }
 
@@ -1095,6 +1095,24 @@ static bool do_remote_read(struct drbd_request *req)
        return false;
 }
 
+bool drbd_should_do_remote(union drbd_dev_state s)
+{
+       return s.pdsk == D_UP_TO_DATE ||
+               (s.pdsk >= D_INCONSISTENT &&
+                s.conn >= C_WF_BITMAP_T &&
+                s.conn < C_AHEAD);
+       /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
+          That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
+          states. */
+}
+
+static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
+{
+       return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
+       /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
+          since we enter state C_AHEAD only if proto >= 96 */
+}
+
 /* returns number of connections (== 1, for drbd 8.4)
  * expected to actually write this data,
  * which does NOT include those that we are L_AHEAD for. */
@@ -1149,7 +1167,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
         * stable storage, and this is a WRITE, we may not even submit
         * this bio. */
        if (get_ldev(device)) {
-               req->pre_submit_jif = jiffies;
                if (drbd_insert_fault(device,
                                      rw == WRITE ? DRBD_FAULT_DT_WR
                                    : rw == READ  ? DRBD_FAULT_DT_RD
@@ -1293,6 +1310,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
                        &device->pending_master_completion[rw == WRITE]);
        if (req->private_bio) {
                /* needs to be marked within the same spinlock */
+               req->pre_submit_jif = jiffies;
                list_add_tail(&req->req_pending_local,
                        &device->pending_completion[rw == WRITE]);
                _req_mod(req, TO_BE_SUBMITTED);
@@ -1513,6 +1531,78 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
        return BLK_QC_T_NONE;
 }
 
+static bool net_timeout_reached(struct drbd_request *net_req,
+               struct drbd_connection *connection,
+               unsigned long now, unsigned long ent,
+               unsigned int ko_count, unsigned int timeout)
+{
+       struct drbd_device *device = net_req->device;
+
+       if (!time_after(now, net_req->pre_send_jif + ent))
+               return false;
+
+       if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
+               return false;
+
+       if (net_req->rq_state & RQ_NET_PENDING) {
+               drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
+                       jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
+               return true;
+       }
+
+       /* We received an ACK already (or are using protocol A),
+        * but are waiting for the epoch closing barrier ack.
+        * Check if we sent the barrier already.  We should not blame the peer
+        * for being unresponsive, if we did not even ask it yet. */
+       if (net_req->epoch == connection->send.current_epoch_nr) {
+               drbd_warn(device,
+                       "We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
+                       jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
+               return false;
+       }
+
+       /* Worst case: we may have been blocked for whatever reason, then
+        * suddenly are able to send a lot of requests (and epoch separating
+        * barriers) in quick succession.
+        * The timestamp of the net_req may be much too old and not correspond
+        * to the sending time of the relevant unack'ed barrier packet, so
+        * would trigger a spurious timeout.  The latest barrier packet may
+        * have a too recent timestamp to trigger the timeout, potentially miss
+        * a timeout.  Right now we don't have a place to conveniently store
+        * these timestamps.
+        * But in this particular situation, the application requests are still
+        * completed to upper layers, DRBD should still "feel" responsive.
+        * No need yet to kill this connection, it may still recover.
+        * If not, eventually we will have queued enough into the network for
+        * us to block. From that point of view, the timestamp of the last sent
+        * barrier packet is relevant enough.
+        */
+       if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
+               drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
+                       connection->send.last_sent_barrier_jif, now,
+                       jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
+               return true;
+       }
+       return false;
+}
+
+/* A request is considered timed out, if
+ * - we have some effective timeout from the configuration,
+ *   with some state restrictions applied,
+ * - the oldest request is waiting for a response from the network
+ *   resp. the local disk,
+ * - the oldest request is in fact older than the effective timeout,
+ * - the connection was established (resp. disk was attached)
+ *   for longer than the timeout already.
+ * Note that for 32bit jiffies and very stable connections/disks,
+ * we may have a wrap around, which is catched by
+ *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
+ *
+ * Side effect: once per 32bit wrap-around interval, which means every
+ * ~198 days with 250 HZ, we have a window where the timeout would need
+ * to expire twice (worst case) to become effective. Good enough.
+ */
+
 void request_timer_fn(unsigned long data)
 {
        struct drbd_device *device = (struct drbd_device *) data;
@@ -1522,11 +1612,14 @@ void request_timer_fn(unsigned long data)
        unsigned long oldest_submit_jif;
        unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
        unsigned long now;
+       unsigned int ko_count = 0, timeout = 0;
 
        rcu_read_lock();
        nc = rcu_dereference(connection->net_conf);
-       if (nc && device->state.conn >= C_WF_REPORT_PARAMS)
-               ent = nc->timeout * HZ/10 * nc->ko_count;
+       if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
+               ko_count = nc->ko_count;
+               timeout = nc->timeout;
+       }
 
        if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
                dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
@@ -1534,6 +1627,8 @@ void request_timer_fn(unsigned long data)
        }
        rcu_read_unlock();
 
+
+       ent = timeout * HZ/10 * ko_count;
        et = min_not_zero(dt, ent);
 
        if (!et)
@@ -1545,11 +1640,22 @@ void request_timer_fn(unsigned long data)
        spin_lock_irq(&device->resource->req_lock);
        req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
        req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
-       req_peer = connection->req_not_net_done;
+
        /* maybe the oldest request waiting for the peer is in fact still
-        * blocking in tcp sendmsg */
-       if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
-               req_peer = connection->req_next;
+        * blocking in tcp sendmsg.  That's ok, though, that's handled via the
+        * socket send timeout, requesting a ping, and bumping ko-count in
+        * we_should_drop_the_connection().
+        */
+
+       /* check the oldest request we did successfully sent,
+        * but which is still waiting for an ACK. */
+       req_peer = connection->req_ack_pending;
+
+       /* if we don't have such request (e.g. protocoll A)
+        * check the oldest requests which is still waiting on its epoch
+        * closing barrier ack. */
+       if (!req_peer)
+               req_peer = connection->req_not_net_done;
 
        /* evaluate the oldest peer request only in one timer! */
        if (req_peer && req_peer->device != device)
@@ -1566,28 +1672,9 @@ void request_timer_fn(unsigned long data)
                : req_write ? req_write->pre_submit_jif
                : req_read ? req_read->pre_submit_jif : now;
 
-       /* The request is considered timed out, if
-        * - we have some effective timeout from the configuration,
-        *   with above state restrictions applied,
-        * - the oldest request is waiting for a response from the network
-        *   resp. the local disk,
-        * - the oldest request is in fact older than the effective timeout,
-        * - the connection was established (resp. disk was attached)
-        *   for longer than the timeout already.
-        * Note that for 32bit jiffies and very stable connections/disks,
-        * we may have a wrap around, which is catched by
-        *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
-        *
-        * Side effect: once per 32bit wrap-around interval, which means every
-        * ~198 days with 250 HZ, we have a window where the timeout would need
-        * to expire twice (worst case) to become effective. Good enough.
-        */
-       if (ent && req_peer &&
-                time_after(now, req_peer->pre_send_jif + ent) &&
-               !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
-               drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
+       if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
                _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
-       }
+
        if (dt && oldest_submit_jif != now &&
                 time_after(now, oldest_submit_jif + dt) &&
                !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
index 9f6a04080e9f76aadfdfedf8d0e1cb408dbcba2a..bb2ef78165e5fe3b0a9394bbdaec61e1e99e66d4 100644 (file)
@@ -331,21 +331,6 @@ static inline int req_mod(struct drbd_request *req,
        return rv;
 }
 
-static inline bool drbd_should_do_remote(union drbd_dev_state s)
-{
-       return s.pdsk == D_UP_TO_DATE ||
-               (s.pdsk >= D_INCONSISTENT &&
-                s.conn >= C_WF_BITMAP_T &&
-                s.conn < C_AHEAD);
-       /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
-          That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
-          states. */
-}
-static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
-{
-       return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
-       /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
-          since we enter state C_AHEAD only if proto >= 96 */
-}
+extern bool drbd_should_do_remote(union drbd_dev_state);
 
 #endif
index 2d7dd269b6a82a41f94ce044f4baf4c87c92d473..5a7ef7873b675b73023001478be61e746b3d1194 100644 (file)
@@ -29,6 +29,7 @@
 #include "drbd_int.h"
 #include "drbd_protocol.h"
 #include "drbd_req.h"
+#include "drbd_state_change.h"
 
 struct after_state_chg_work {
        struct drbd_work w;
@@ -37,6 +38,7 @@ struct after_state_chg_work {
        union drbd_state ns;
        enum chg_state_flags flags;
        struct completion *done;
+       struct drbd_state_change *state_change;
 };
 
 enum sanitize_state_warnings {
@@ -48,9 +50,248 @@ enum sanitize_state_warnings {
        IMPLICITLY_UPGRADED_PDSK,
 };
 
+static void count_objects(struct drbd_resource *resource,
+                         unsigned int *n_devices,
+                         unsigned int *n_connections)
+{
+       struct drbd_device *device;
+       struct drbd_connection *connection;
+       int vnr;
+
+       *n_devices = 0;
+       *n_connections = 0;
+
+       idr_for_each_entry(&resource->devices, device, vnr)
+               (*n_devices)++;
+       for_each_connection(connection, resource)
+               (*n_connections)++;
+}
+
+static struct drbd_state_change *alloc_state_change(unsigned int n_devices, unsigned int n_connections, gfp_t gfp)
+{
+       struct drbd_state_change *state_change;
+       unsigned int size, n;
+
+       size = sizeof(struct drbd_state_change) +
+              n_devices * sizeof(struct drbd_device_state_change) +
+              n_connections * sizeof(struct drbd_connection_state_change) +
+              n_devices * n_connections * sizeof(struct drbd_peer_device_state_change);
+       state_change = kmalloc(size, gfp);
+       if (!state_change)
+               return NULL;
+       state_change->n_devices = n_devices;
+       state_change->n_connections = n_connections;
+       state_change->devices = (void *)(state_change + 1);
+       state_change->connections = (void *)&state_change->devices[n_devices];
+       state_change->peer_devices = (void *)&state_change->connections[n_connections];
+       state_change->resource->resource = NULL;
+       for (n = 0; n < n_devices; n++)
+               state_change->devices[n].device = NULL;
+       for (n = 0; n < n_connections; n++)
+               state_change->connections[n].connection = NULL;
+       return state_change;
+}
+
+struct drbd_state_change *remember_old_state(struct drbd_resource *resource, gfp_t gfp)
+{
+       struct drbd_state_change *state_change;
+       struct drbd_device *device;
+       unsigned int n_devices;
+       struct drbd_connection *connection;
+       unsigned int n_connections;
+       int vnr;
+
+       struct drbd_device_state_change *device_state_change;
+       struct drbd_peer_device_state_change *peer_device_state_change;
+       struct drbd_connection_state_change *connection_state_change;
+
+       /* Caller holds req_lock spinlock.
+        * No state, no device IDR, no connections lists can change. */
+       count_objects(resource, &n_devices, &n_connections);
+       state_change = alloc_state_change(n_devices, n_connections, gfp);
+       if (!state_change)
+               return NULL;
+
+       kref_get(&resource->kref);
+       state_change->resource->resource = resource;
+       state_change->resource->role[OLD] =
+               conn_highest_role(first_connection(resource));
+       state_change->resource->susp[OLD] = resource->susp;
+       state_change->resource->susp_nod[OLD] = resource->susp_nod;
+       state_change->resource->susp_fen[OLD] = resource->susp_fen;
+
+       connection_state_change = state_change->connections;
+       for_each_connection(connection, resource) {
+               kref_get(&connection->kref);
+               connection_state_change->connection = connection;
+               connection_state_change->cstate[OLD] =
+                       connection->cstate;
+               connection_state_change->peer_role[OLD] =
+                       conn_highest_peer(connection);
+               connection_state_change++;
+       }
+
+       device_state_change = state_change->devices;
+       peer_device_state_change = state_change->peer_devices;
+       idr_for_each_entry(&resource->devices, device, vnr) {
+               kref_get(&device->kref);
+               device_state_change->device = device;
+               device_state_change->disk_state[OLD] = device->state.disk;
+
+               /* The peer_devices for each device have to be enumerated in
+                  the order of the connections. We may not use for_each_peer_device() here. */
+               for_each_connection(connection, resource) {
+                       struct drbd_peer_device *peer_device;
+
+                       peer_device = conn_peer_device(connection, device->vnr);
+                       peer_device_state_change->peer_device = peer_device;
+                       peer_device_state_change->disk_state[OLD] =
+                               device->state.pdsk;
+                       peer_device_state_change->repl_state[OLD] =
+                               max_t(enum drbd_conns,
+                                     C_WF_REPORT_PARAMS, device->state.conn);
+                       peer_device_state_change->resync_susp_user[OLD] =
+                               device->state.user_isp;
+                       peer_device_state_change->resync_susp_peer[OLD] =
+                               device->state.peer_isp;
+                       peer_device_state_change->resync_susp_dependency[OLD] =
+                               device->state.aftr_isp;
+                       peer_device_state_change++;
+               }
+               device_state_change++;
+       }
+
+       return state_change;
+}
+
+static void remember_new_state(struct drbd_state_change *state_change)
+{
+       struct drbd_resource_state_change *resource_state_change;
+       struct drbd_resource *resource;
+       unsigned int n;
+
+       if (!state_change)
+               return;
+
+       resource_state_change = &state_change->resource[0];
+       resource = resource_state_change->resource;
+
+       resource_state_change->role[NEW] =
+               conn_highest_role(first_connection(resource));
+       resource_state_change->susp[NEW] = resource->susp;
+       resource_state_change->susp_nod[NEW] = resource->susp_nod;
+       resource_state_change->susp_fen[NEW] = resource->susp_fen;
+
+       for (n = 0; n < state_change->n_devices; n++) {
+               struct drbd_device_state_change *device_state_change =
+                       &state_change->devices[n];
+               struct drbd_device *device = device_state_change->device;
+
+               device_state_change->disk_state[NEW] = device->state.disk;
+       }
+
+       for (n = 0; n < state_change->n_connections; n++) {
+               struct drbd_connection_state_change *connection_state_change =
+                       &state_change->connections[n];
+               struct drbd_connection *connection =
+                       connection_state_change->connection;
+
+               connection_state_change->cstate[NEW] = connection->cstate;
+               connection_state_change->peer_role[NEW] =
+                       conn_highest_peer(connection);
+       }
+
+       for (n = 0; n < state_change->n_devices * state_change->n_connections; n++) {
+               struct drbd_peer_device_state_change *peer_device_state_change =
+                       &state_change->peer_devices[n];
+               struct drbd_device *device =
+                       peer_device_state_change->peer_device->device;
+               union drbd_dev_state state = device->state;
+
+               peer_device_state_change->disk_state[NEW] = state.pdsk;
+               peer_device_state_change->repl_state[NEW] =
+                       max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn);
+               peer_device_state_change->resync_susp_user[NEW] =
+                       state.user_isp;
+               peer_device_state_change->resync_susp_peer[NEW] =
+                       state.peer_isp;
+               peer_device_state_change->resync_susp_dependency[NEW] =
+                       state.aftr_isp;
+       }
+}
+
+void copy_old_to_new_state_change(struct drbd_state_change *state_change)
+{
+       struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
+       unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
+
+#define OLD_TO_NEW(x) \
+       (x[NEW] = x[OLD])
+
+       OLD_TO_NEW(resource_state_change->role);
+       OLD_TO_NEW(resource_state_change->susp);
+       OLD_TO_NEW(resource_state_change->susp_nod);
+       OLD_TO_NEW(resource_state_change->susp_fen);
+
+       for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
+               struct drbd_connection_state_change *connection_state_change =
+                               &state_change->connections[n_connection];
+
+               OLD_TO_NEW(connection_state_change->peer_role);
+               OLD_TO_NEW(connection_state_change->cstate);
+       }
+
+       for (n_device = 0; n_device < state_change->n_devices; n_device++) {
+               struct drbd_device_state_change *device_state_change =
+                       &state_change->devices[n_device];
+
+               OLD_TO_NEW(device_state_change->disk_state);
+       }
+
+       n_peer_devices = state_change->n_devices * state_change->n_connections;
+       for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
+               struct drbd_peer_device_state_change *p =
+                       &state_change->peer_devices[n_peer_device];
+
+               OLD_TO_NEW(p->disk_state);
+               OLD_TO_NEW(p->repl_state);
+               OLD_TO_NEW(p->resync_susp_user);
+               OLD_TO_NEW(p->resync_susp_peer);
+               OLD_TO_NEW(p->resync_susp_dependency);
+       }
+
+#undef OLD_TO_NEW
+}
+
+void forget_state_change(struct drbd_state_change *state_change)
+{
+       unsigned int n;
+
+       if (!state_change)
+               return;
+
+       if (state_change->resource->resource)
+               kref_put(&state_change->resource->resource->kref, drbd_destroy_resource);
+       for (n = 0; n < state_change->n_devices; n++) {
+               struct drbd_device *device = state_change->devices[n].device;
+
+               if (device)
+                       kref_put(&device->kref, drbd_destroy_device);
+       }
+       for (n = 0; n < state_change->n_connections; n++) {
+               struct drbd_connection *connection =
+                       state_change->connections[n].connection;
+
+               if (connection)
+                       kref_put(&connection->kref, drbd_destroy_connection);
+       }
+       kfree(state_change);
+}
+
 static int w_after_state_ch(struct drbd_work *w, int unused);
 static void after_state_ch(struct drbd_device *device, union drbd_state os,
-                          union drbd_state ns, enum chg_state_flags flags);
+                          union drbd_state ns, enum chg_state_flags flags,
+                          struct drbd_state_change *);
 static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
 static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
 static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
@@ -93,6 +334,7 @@ static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
                return R_SECONDARY;
        return R_UNKNOWN;
 }
+
 static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
 {
        if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
@@ -937,7 +1179,7 @@ void drbd_resume_al(struct drbd_device *device)
                drbd_info(device, "Resumed AL updates\n");
 }
 
-/* helper for __drbd_set_state */
+/* helper for _drbd_set_state */
 static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
 {
        if (first_peer_device(device)->connection->agreed_pro_version < 90)
@@ -965,17 +1207,17 @@ static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
 }
 
 /**
- * __drbd_set_state() - Set a new DRBD state
+ * _drbd_set_state() - Set a new DRBD state
  * @device:    DRBD device.
  * @ns:                new state.
  * @flags:     Flags
  * @done:      Optional completion, that will get completed after the after_state_ch() finished
  *
- * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ * Caller needs to hold req_lock. Do not call directly.
  */
 enum drbd_state_rv
-__drbd_set_state(struct drbd_device *device, union drbd_state ns,
-                enum chg_state_flags flags, struct completion *done)
+_drbd_set_state(struct drbd_device *device, union drbd_state ns,
+               enum chg_state_flags flags, struct completion *done)
 {
        struct drbd_peer_device *peer_device = first_peer_device(device);
        struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
@@ -983,6 +1225,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
        enum drbd_state_rv rv = SS_SUCCESS;
        enum sanitize_state_warnings ssw;
        struct after_state_chg_work *ascw;
+       struct drbd_state_change *state_change;
 
        os = drbd_read_state(device);
 
@@ -1037,6 +1280,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
        if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
                clear_bit(RS_DONE, &device->flags);
 
+       /* FIXME: Have any flags been set earlier in this function already? */
+       state_change = remember_old_state(device->resource, GFP_ATOMIC);
+
        /* changes to local_cnt and device flags should be visible before
         * changes to state, which again should be visible before anything else
         * depending on that change happens. */
@@ -1047,6 +1293,8 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
        device->resource->susp_fen = ns.susp_fen;
        smp_wmb();
 
+       remember_new_state(state_change);
+
        /* put replicated vs not-replicated requests in seperate epochs */
        if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
            drbd_should_do_remote((union drbd_dev_state)ns.i))
@@ -1184,6 +1432,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
                ascw->w.cb = w_after_state_ch;
                ascw->device = device;
                ascw->done = done;
+               ascw->state_change = state_change;
                drbd_queue_work(&connection->sender_work,
                                &ascw->w);
        } else {
@@ -1199,7 +1448,8 @@ static int w_after_state_ch(struct drbd_work *w, int unused)
                container_of(w, struct after_state_chg_work, w);
        struct drbd_device *device = ascw->device;
 
-       after_state_ch(device, ascw->os, ascw->ns, ascw->flags);
+       after_state_ch(device, ascw->os, ascw->ns, ascw->flags, ascw->state_change);
+       forget_state_change(ascw->state_change);
        if (ascw->flags & CS_WAIT_COMPLETE)
                complete(ascw->done);
        kfree(ascw);
@@ -1234,7 +1484,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
        D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
 
        /* open coded non-blocking drbd_suspend_io(device); */
-       set_bit(SUSPEND_IO, &device->flags);
+       atomic_inc(&device->suspend_cnt);
 
        drbd_bm_lock(device, why, flags);
        rv = io_fn(device);
@@ -1245,6 +1495,139 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
        return rv;
 }
 
+void notify_resource_state_change(struct sk_buff *skb,
+                                 unsigned int seq,
+                                 struct drbd_resource_state_change *resource_state_change,
+                                 enum drbd_notification_type type)
+{
+       struct drbd_resource *resource = resource_state_change->resource;
+       struct resource_info resource_info = {
+               .res_role = resource_state_change->role[NEW],
+               .res_susp = resource_state_change->susp[NEW],
+               .res_susp_nod = resource_state_change->susp_nod[NEW],
+               .res_susp_fen = resource_state_change->susp_fen[NEW],
+       };
+
+       notify_resource_state(skb, seq, resource, &resource_info, type);
+}
+
+void notify_connection_state_change(struct sk_buff *skb,
+                                   unsigned int seq,
+                                   struct drbd_connection_state_change *connection_state_change,
+                                   enum drbd_notification_type type)
+{
+       struct drbd_connection *connection = connection_state_change->connection;
+       struct connection_info connection_info = {
+               .conn_connection_state = connection_state_change->cstate[NEW],
+               .conn_role = connection_state_change->peer_role[NEW],
+       };
+
+       notify_connection_state(skb, seq, connection, &connection_info, type);
+}
+
+void notify_device_state_change(struct sk_buff *skb,
+                               unsigned int seq,
+                               struct drbd_device_state_change *device_state_change,
+                               enum drbd_notification_type type)
+{
+       struct drbd_device *device = device_state_change->device;
+       struct device_info device_info = {
+               .dev_disk_state = device_state_change->disk_state[NEW],
+       };
+
+       notify_device_state(skb, seq, device, &device_info, type);
+}
+
+void notify_peer_device_state_change(struct sk_buff *skb,
+                                    unsigned int seq,
+                                    struct drbd_peer_device_state_change *p,
+                                    enum drbd_notification_type type)
+{
+       struct drbd_peer_device *peer_device = p->peer_device;
+       struct peer_device_info peer_device_info = {
+               .peer_repl_state = p->repl_state[NEW],
+               .peer_disk_state = p->disk_state[NEW],
+               .peer_resync_susp_user = p->resync_susp_user[NEW],
+               .peer_resync_susp_peer = p->resync_susp_peer[NEW],
+               .peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
+       };
+
+       notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
+}
+
+static void broadcast_state_change(struct drbd_state_change *state_change)
+{
+       struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
+       bool resource_state_has_changed;
+       unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
+       void (*last_func)(struct sk_buff *, unsigned int, void *,
+                         enum drbd_notification_type) = NULL;
+       void *uninitialized_var(last_arg);
+
+#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
+#define FINAL_STATE_CHANGE(type) \
+       ({ if (last_func) \
+               last_func(NULL, 0, last_arg, type); \
+       })
+#define REMEMBER_STATE_CHANGE(func, arg, type) \
+       ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
+          last_func = (typeof(last_func))func; \
+          last_arg = arg; \
+        })
+
+       mutex_lock(&notification_mutex);
+
+       resource_state_has_changed =
+           HAS_CHANGED(resource_state_change->role) ||
+           HAS_CHANGED(resource_state_change->susp) ||
+           HAS_CHANGED(resource_state_change->susp_nod) ||
+           HAS_CHANGED(resource_state_change->susp_fen);
+
+       if (resource_state_has_changed)
+               REMEMBER_STATE_CHANGE(notify_resource_state_change,
+                                     resource_state_change, NOTIFY_CHANGE);
+
+       for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
+               struct drbd_connection_state_change *connection_state_change =
+                               &state_change->connections[n_connection];
+
+               if (HAS_CHANGED(connection_state_change->peer_role) ||
+                   HAS_CHANGED(connection_state_change->cstate))
+                       REMEMBER_STATE_CHANGE(notify_connection_state_change,
+                                             connection_state_change, NOTIFY_CHANGE);
+       }
+
+       for (n_device = 0; n_device < state_change->n_devices; n_device++) {
+               struct drbd_device_state_change *device_state_change =
+                       &state_change->devices[n_device];
+
+               if (HAS_CHANGED(device_state_change->disk_state))
+                       REMEMBER_STATE_CHANGE(notify_device_state_change,
+                                             device_state_change, NOTIFY_CHANGE);
+       }
+
+       n_peer_devices = state_change->n_devices * state_change->n_connections;
+       for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
+               struct drbd_peer_device_state_change *p =
+                       &state_change->peer_devices[n_peer_device];
+
+               if (HAS_CHANGED(p->disk_state) ||
+                   HAS_CHANGED(p->repl_state) ||
+                   HAS_CHANGED(p->resync_susp_user) ||
+                   HAS_CHANGED(p->resync_susp_peer) ||
+                   HAS_CHANGED(p->resync_susp_dependency))
+                       REMEMBER_STATE_CHANGE(notify_peer_device_state_change,
+                                             p, NOTIFY_CHANGE);
+       }
+
+       FINAL_STATE_CHANGE(NOTIFY_CHANGE);
+       mutex_unlock(&notification_mutex);
+
+#undef HAS_CHANGED
+#undef FINAL_STATE_CHANGE
+#undef REMEMBER_STATE_CHANGE
+}
+
 /**
  * after_state_ch() - Perform after state change actions that may sleep
  * @device:    DRBD device.
@@ -1253,13 +1636,16 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
  * @flags:     Flags
  */
 static void after_state_ch(struct drbd_device *device, union drbd_state os,
-                          union drbd_state ns, enum chg_state_flags flags)
+                          union drbd_state ns, enum chg_state_flags flags,
+                          struct drbd_state_change *state_change)
 {
        struct drbd_resource *resource = device->resource;
        struct drbd_peer_device *peer_device = first_peer_device(device);
        struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
        struct sib_info sib;
 
+       broadcast_state_change(state_change);
+
        sib.sib_reason = SIB_STATE_CHANGE;
        sib.os = os;
        sib.ns = ns;
@@ -1377,7 +1763,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
        }
 
        if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
-               if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
+               if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
                    device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
                        drbd_uuid_new_current(device);
                        drbd_send_uuids(peer_device);
@@ -1444,7 +1830,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
        if (os.disk != D_FAILED && ns.disk == D_FAILED) {
                enum drbd_io_error_p eh = EP_PASS_ON;
                int was_io_error = 0;
-               /* corresponding get_ldev was in __drbd_set_state, to serialize
+               /* corresponding get_ldev was in _drbd_set_state, to serialize
                 * our cleanup here with the transition to D_DISKLESS.
                 * But is is still not save to dreference ldev here, since
                 * we might come from an failed Attach before ldev was set. */
@@ -1455,6 +1841,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 
                        was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);
 
+                       /* Intentionally call this handler first, before drbd_send_state().
+                        * See: 2932204 drbd: call local-io-error handler early
+                        * People may chose to hard-reset the box from this handler.
+                        * It is useful if this looks like a "regular node crash". */
                        if (was_io_error && eh == EP_CALL_HELPER)
                                drbd_khelper(device, "local-io-error");
 
@@ -1572,6 +1962,7 @@ struct after_conn_state_chg_work {
        union drbd_state ns_max; /* new, max state, over all devices */
        enum chg_state_flags flags;
        struct drbd_connection *connection;
+       struct drbd_state_change *state_change;
 };
 
 static int w_after_conn_state_ch(struct drbd_work *w, int unused)
@@ -1584,6 +1975,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
        struct drbd_peer_device *peer_device;
        int vnr;
 
+       broadcast_state_change(acscw->state_change);
+       forget_state_change(acscw->state_change);
        kfree(acscw);
 
        /* Upon network configuration, we need to start the receiver */
@@ -1593,6 +1986,13 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
        if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
                struct net_conf *old_conf;
 
+               mutex_lock(&notification_mutex);
+               idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+                       notify_peer_device_state(NULL, 0, peer_device, NULL,
+                                                NOTIFY_DESTROY | NOTIFY_CONTINUES);
+               notify_connection_state(NULL, 0, connection, NULL, NOTIFY_DESTROY);
+               mutex_unlock(&notification_mutex);
+
                mutex_lock(&connection->resource->conf_update);
                old_conf = connection->net_conf;
                connection->my_addr_len = 0;
@@ -1759,7 +2159,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
                if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
                        ns.disk = os.disk;
 
-               rv = __drbd_set_state(device, ns, flags, NULL);
+               rv = _drbd_set_state(device, ns, flags, NULL);
                if (rv < SS_SUCCESS)
                        BUG();
 
@@ -1823,6 +2223,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
        enum drbd_conns oc = connection->cstate;
        union drbd_state ns_max, ns_min, os;
        bool have_mutex = false;
+       struct drbd_state_change *state_change;
 
        if (mask.conn) {
                rv = is_valid_conn_transition(oc, val.conn);
@@ -1868,10 +2269,12 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
                        goto abort;
        }
 
+       state_change = remember_old_state(connection->resource, GFP_ATOMIC);
        conn_old_common_state(connection, &os, &flags);
        flags |= CS_DC_SUSP;
        conn_set_state(connection, mask, val, &ns_min, &ns_max, flags);
        conn_pr_state_change(connection, os, ns_max, flags);
+       remember_new_state(state_change);
 
        acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
        if (acscw) {
@@ -1882,6 +2285,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
                acscw->w.cb = w_after_conn_state_ch;
                kref_get(&connection->kref);
                acscw->connection = connection;
+               acscw->state_change = state_change;
                drbd_queue_work(&connection->sender_work, &acscw->w);
        } else {
                drbd_err(connection, "Could not kmalloc an acscw\n");
index 7f53c40823cd581e28279b9ef046a07af54a974e..bd989536f888fc05af96942858795dc34713045f 100644 (file)
@@ -122,9 +122,9 @@ extern enum drbd_state_rv
 _drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state,
                                        union drbd_state, enum chg_state_flags);
 
-extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state,
-                                          enum chg_state_flags,
-                                          struct completion *done);
+extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state,
+                                         enum chg_state_flags,
+                                         struct completion *done);
 extern void print_st_err(struct drbd_device *, union drbd_state,
                        union drbd_state, int);
 
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
new file mode 100644 (file)
index 0000000..9e503a1
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef DRBD_STATE_CHANGE_H
+#define DRBD_STATE_CHANGE_H
+
+struct drbd_resource_state_change {
+       struct drbd_resource *resource;
+       enum drbd_role role[2];
+       bool susp[2];
+       bool susp_nod[2];
+       bool susp_fen[2];
+};
+
+struct drbd_device_state_change {
+       struct drbd_device *device;
+       enum drbd_disk_state disk_state[2];
+};
+
+struct drbd_connection_state_change {
+       struct drbd_connection *connection;
+       enum drbd_conns cstate[2];  /* drbd9: enum drbd_conn_state */
+       enum drbd_role peer_role[2];
+};
+
+struct drbd_peer_device_state_change {
+       struct drbd_peer_device *peer_device;
+       enum drbd_disk_state disk_state[2];
+       enum drbd_conns repl_state[2];  /* drbd9: enum drbd_repl_state */
+       bool resync_susp_user[2];
+       bool resync_susp_peer[2];
+       bool resync_susp_dependency[2];
+};
+
+struct drbd_state_change {
+       struct list_head list;
+       unsigned int n_devices;
+       unsigned int n_connections;
+       struct drbd_resource_state_change resource[1];
+       struct drbd_device_state_change *devices;
+       struct drbd_connection_state_change *connections;
+       struct drbd_peer_device_state_change *peer_devices;
+};
+
+extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_t);
+extern void copy_old_to_new_state_change(struct drbd_state_change *);
+extern void forget_state_change(struct drbd_state_change *);
+
+extern void notify_resource_state_change(struct sk_buff *,
+                                        unsigned int,
+                                        struct drbd_resource_state_change *,
+                                        enum drbd_notification_type type);
+extern void notify_connection_state_change(struct sk_buff *,
+                                          unsigned int,
+                                          struct drbd_connection_state_change *,
+                                          enum drbd_notification_type type);
+extern void notify_device_state_change(struct sk_buff *,
+                                      unsigned int,
+                                      struct drbd_device_state_change *,
+                                      enum drbd_notification_type type);
+extern void notify_peer_device_state_change(struct sk_buff *,
+                                           unsigned int,
+                                           struct drbd_peer_device_state_change *,
+                                           enum drbd_notification_type type);
+
+#endif  /* DRBD_STATE_CHANGE_H */
index 5578c1477ba6610f27f80e4cffaa874939a05364..eff716c27b1fdf53caa7440ac5bad755c271ad43 100644 (file)
@@ -55,13 +55,6 @@ static int make_resync_request(struct drbd_device *, int);
  *
  */
 
-
-/* About the global_state_lock
-   Each state transition on an device holds a read lock. In case we have
-   to evaluate the resync after dependencies, we grab a write lock, because
-   we need stable states on all devices for that.  */
-rwlock_t global_state_lock;
-
 /* used for synchronous meta data and bitmap IO
  * submitted by drbd_md_sync_page_io()
  */
@@ -120,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
        unsigned long flags = 0;
        struct drbd_peer_device *peer_device = peer_req->peer_device;
        struct drbd_device *device = peer_device->device;
+       struct drbd_connection *connection = peer_device->connection;
        struct drbd_interval i;
        int do_wake;
        u64 block_id;
@@ -152,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
         * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
        if (peer_req->flags & EE_WAS_ERROR)
                __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
+
+       if (connection->cstate >= C_WF_REPORT_PARAMS) {
+               kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
+               if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
+                       kref_put(&device->kref, drbd_destroy_device);
+       }
        spin_unlock_irqrestore(&device->resource->req_lock, flags);
 
        if (block_id == ID_SYNCER)
@@ -163,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
        if (do_al_complete_io)
                drbd_al_complete_io(device, &i);
 
-       wake_asender(peer_device->connection);
        put_ldev(device);
 }
 
@@ -195,6 +194,12 @@ void drbd_peer_request_endio(struct bio *bio)
        }
 }
 
+void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
+{
+       panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
+               device->minor, device->resource->name, device->vnr);
+}
+
 /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
  */
 void drbd_request_endio(struct bio *bio)
@@ -238,7 +243,7 @@ void drbd_request_endio(struct bio *bio)
                        drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
 
                if (!bio->bi_error)
-                       panic("possible random memory corruption caused by delayed completion of aborted local request\n");
+                       drbd_panic_after_delayed_completion_of_aborted_request(device);
        }
 
        /* to avoid recursion in __req_mod */
@@ -1291,6 +1296,7 @@ static int drbd_send_barrier(struct drbd_connection *connection)
        p->barrier = connection->send.current_epoch_nr;
        p->pad = 0;
        connection->send.current_epoch_writes = 0;
+       connection->send.last_sent_barrier_jif = jiffies;
 
        return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
 }
@@ -1315,6 +1321,7 @@ static void re_init_if_first_write(struct drbd_connection *connection, unsigned
                connection->send.seen_any_write_yet = true;
                connection->send.current_epoch_nr = epoch;
                connection->send.current_epoch_writes = 0;
+               connection->send.last_sent_barrier_jif = jiffies;
        }
 }
 
@@ -1456,70 +1463,73 @@ static int _drbd_may_sync_now(struct drbd_device *device)
 }
 
 /**
- * _drbd_pause_after() - Pause resync on all devices that may not resync now
+ * drbd_pause_after() - Pause resync on all devices that may not resync now
  * @device:    DRBD device.
  *
  * Called from process context only (admin command and after_state_ch).
  */
-static int _drbd_pause_after(struct drbd_device *device)
+static bool drbd_pause_after(struct drbd_device *device)
 {
+       bool changed = false;
        struct drbd_device *odev;
-       int i, rv = 0;
+       int i;
 
        rcu_read_lock();
        idr_for_each_entry(&drbd_devices, odev, i) {
                if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
                        continue;
-               if (!_drbd_may_sync_now(odev))
-                       rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
-                              != SS_NOTHING_TO_DO);
+               if (!_drbd_may_sync_now(odev) &&
+                   _drbd_set_state(_NS(odev, aftr_isp, 1),
+                                   CS_HARD, NULL) != SS_NOTHING_TO_DO)
+                       changed = true;
        }
        rcu_read_unlock();
 
-       return rv;
+       return changed;
 }
 
 /**
- * _drbd_resume_next() - Resume resync on all devices that may resync now
+ * drbd_resume_next() - Resume resync on all devices that may resync now
  * @device:    DRBD device.
  *
  * Called from process context only (admin command and worker).
  */
-static int _drbd_resume_next(struct drbd_device *device)
+static bool drbd_resume_next(struct drbd_device *device)
 {
+       bool changed = false;
        struct drbd_device *odev;
-       int i, rv = 0;
+       int i;
 
        rcu_read_lock();
        idr_for_each_entry(&drbd_devices, odev, i) {
                if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
                        continue;
                if (odev->state.aftr_isp) {
-                       if (_drbd_may_sync_now(odev))
-                               rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
-                                                       CS_HARD, NULL)
-                                      != SS_NOTHING_TO_DO) ;
+                       if (_drbd_may_sync_now(odev) &&
+                           _drbd_set_state(_NS(odev, aftr_isp, 0),
+                                           CS_HARD, NULL) != SS_NOTHING_TO_DO)
+                               changed = true;
                }
        }
        rcu_read_unlock();
-       return rv;
+       return changed;
 }
 
 void resume_next_sg(struct drbd_device *device)
 {
-       write_lock_irq(&global_state_lock);
-       _drbd_resume_next(device);
-       write_unlock_irq(&global_state_lock);
+       lock_all_resources();
+       drbd_resume_next(device);
+       unlock_all_resources();
 }
 
 void suspend_other_sg(struct drbd_device *device)
 {
-       write_lock_irq(&global_state_lock);
-       _drbd_pause_after(device);
-       write_unlock_irq(&global_state_lock);
+       lock_all_resources();
+       drbd_pause_after(device);
+       unlock_all_resources();
 }
 
-/* caller must hold global_state_lock */
+/* caller must lock_all_resources() */
 enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
 {
        struct drbd_device *odev;
@@ -1557,15 +1567,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_min
        }
 }
 
-/* caller must hold global_state_lock */
+/* caller must lock_all_resources() */
 void drbd_resync_after_changed(struct drbd_device *device)
 {
-       int changes;
+       int changed;
 
        do {
-               changes  = _drbd_pause_after(device);
-               changes |= _drbd_resume_next(device);
-       } while (changes);
+               changed  = drbd_pause_after(device);
+               changed |= drbd_resume_next(device);
+       } while (changed);
 }
 
 void drbd_rs_controller_reset(struct drbd_device *device)
@@ -1685,19 +1695,14 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
        } else {
                mutex_lock(device->state_mutex);
        }
-       clear_bit(B_RS_H_DONE, &device->flags);
 
-       /* req_lock: serialize with drbd_send_and_submit() and others
-        * global_state_lock: for stable sync-after dependencies */
-       spin_lock_irq(&device->resource->req_lock);
-       write_lock(&global_state_lock);
+       lock_all_resources();
+       clear_bit(B_RS_H_DONE, &device->flags);
        /* Did some connection breakage or IO error race with us? */
        if (device->state.conn < C_CONNECTED
        || !get_ldev_if_state(device, D_NEGOTIATING)) {
-               write_unlock(&global_state_lock);
-               spin_unlock_irq(&device->resource->req_lock);
-               mutex_unlock(device->state_mutex);
-               return;
+               unlock_all_resources();
+               goto out;
        }
 
        ns = drbd_read_state(device);
@@ -1711,7 +1716,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
        else /* side == C_SYNC_SOURCE */
                ns.pdsk = D_INCONSISTENT;
 
-       r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
+       r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
        ns = drbd_read_state(device);
 
        if (ns.conn < C_CONNECTED)
@@ -1732,7 +1737,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
                        device->rs_mark_left[i] = tw;
                        device->rs_mark_time[i] = now;
                }
-               _drbd_pause_after(device);
+               drbd_pause_after(device);
                /* Forget potentially stale cached per resync extent bit-counts.
                 * Open coded drbd_rs_cancel_all(device), we already have IRQs
                 * disabled, and know the disk state is ok. */
@@ -1742,8 +1747,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
                device->resync_wenr = LC_FREE;
                spin_unlock(&device->al_lock);
        }
-       write_unlock(&global_state_lock);
-       spin_unlock_irq(&device->resource->req_lock);
+       unlock_all_resources();
 
        if (r == SS_SUCCESS) {
                wake_up(&device->al_wait); /* for lc_reset() above */
@@ -1807,6 +1811,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
                drbd_md_sync(device);
        }
        put_ldev(device);
+out:
        mutex_unlock(device->state_mutex);
 }
 
@@ -1836,7 +1841,7 @@ static void drbd_ldev_destroy(struct drbd_device *device)
        device->act_log = NULL;
 
        __acquire(local);
-       drbd_free_ldev(device->ldev);
+       drbd_backing_dev_free(device, device->ldev);
        device->ldev = NULL;
        __release(local);
 
index 9e251201dd48ef2a850b8069edf7ffc05be98a4e..84708a5f8c520cb86db4ad62a4b845a5ec83b88f 100644 (file)
@@ -866,7 +866,7 @@ static void set_fdc(int drive)
 }
 
 /* locks the driver */
-static int lock_fdc(int drive, bool interruptible)
+static int lock_fdc(int drive)
 {
        if (WARN(atomic_read(&usage_count) == 0,
                 "Trying to lock fdc while usage count=0\n"))
@@ -2173,7 +2173,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
 {
        int ret;
 
-       if (lock_fdc(drive, true))
+       if (lock_fdc(drive))
                return -EINTR;
 
        set_floppy(drive);
@@ -2960,7 +2960,7 @@ static int user_reset_fdc(int drive, int arg, bool interruptible)
 {
        int ret;
 
-       if (lock_fdc(drive, interruptible))
+       if (lock_fdc(drive))
                return -EINTR;
 
        if (arg == FD_RESET_ALWAYS)
@@ -3243,7 +3243,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                mutex_lock(&open_lock);
-               if (lock_fdc(drive, true)) {
+               if (lock_fdc(drive)) {
                        mutex_unlock(&open_lock);
                        return -EINTR;
                }
@@ -3263,7 +3263,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
        } else {
                int oldStretch;
 
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
                if (cmd != FDDEFPRM) {
                        /* notice a disk change immediately, else
@@ -3349,7 +3349,7 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
        if (type)
                *g = &floppy_type[type];
        else {
-               if (lock_fdc(drive, false))
+               if (lock_fdc(drive))
                        return -EINTR;
                if (poll_drive(false, 0) == -EINTR)
                        return -EINTR;
@@ -3433,7 +3433,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                if (UDRS->fd_ref != 1)
                        /* somebody else has this drive open */
                        return -EBUSY;
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
 
                /* do the actual eject. Fails on
@@ -3445,7 +3445,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                process_fd_request();
                return ret;
        case FDCLRPRM:
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
                current_type[drive] = NULL;
                floppy_sizes[drive] = MAX_DISK_SIZE << 1;
@@ -3467,7 +3467,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                UDP->flags &= ~FTD_MSG;
                return 0;
        case FDFMTBEG:
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
                if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
                        return -EINTR;
@@ -3484,7 +3484,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                return do_format(drive, &inparam.f);
        case FDFMTEND:
        case FDFLUSH:
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
                return invalidate_drive(bdev);
        case FDSETEMSGTRESH:
@@ -3507,7 +3507,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                outparam = UDP;
                break;
        case FDPOLLDRVSTAT:
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
                if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
                        return -EINTR;
@@ -3530,7 +3530,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
        case FDRAWCMD:
                if (type)
                        return -EINVAL;
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
                set_floppy(drive);
                i = raw_cmd_ioctl(cmd, (void __user *)param);
@@ -3539,7 +3539,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                process_fd_request();
                return i;
        case FDTWADDLE:
-               if (lock_fdc(drive, true))
+               if (lock_fdc(drive))
                        return -EINTR;
                twaddle();
                process_fd_request();
@@ -3663,6 +3663,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 
        opened_bdev[drive] = bdev;
 
+       if (!(mode & (FMODE_READ|FMODE_WRITE))) {
+               res = -EINVAL;
+               goto out;
+       }
+
        res = -ENXIO;
 
        if (!floppy_track_buffer) {
@@ -3706,21 +3711,20 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        if (UFDCS->rawcmd == 1)
                UFDCS->rawcmd = 2;
 
-       if (!(mode & FMODE_NDELAY)) {
-               if (mode & (FMODE_READ|FMODE_WRITE)) {
-                       UDRS->last_checked = 0;
-                       clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
-                       check_disk_change(bdev);
-                       if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
-                               goto out;
-                       if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
-                               goto out;
-               }
-               res = -EROFS;
-               if ((mode & FMODE_WRITE) &&
-                   !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
-                       goto out;
-       }
+       UDRS->last_checked = 0;
+       clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+       check_disk_change(bdev);
+       if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+               goto out;
+       if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+               goto out;
+
+       res = -EROFS;
+
+       if ((mode & FMODE_WRITE) &&
+                       !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
+               goto out;
+
        mutex_unlock(&open_lock);
        mutex_unlock(&floppy_mutex);
        return 0;
@@ -3748,7 +3752,8 @@ static unsigned int floppy_check_events(struct gendisk *disk,
                return DISK_EVENT_MEDIA_CHANGE;
 
        if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
-               lock_fdc(drive, false);
+               if (lock_fdc(drive))
+                       return -EINTR;
                poll_drive(false, 0);
                process_fd_request();
        }
@@ -3847,7 +3852,9 @@ static int floppy_revalidate(struct gendisk *disk)
                         "VFS: revalidate called on non-open device.\n"))
                        return -EFAULT;
 
-               lock_fdc(drive, false);
+               res = lock_fdc(drive);
+               if (res)
+                       return res;
                cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
                      test_bit(FD_VERIFY_BIT, &UDRS->flags));
                if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) {
index 15bec407ac37c214855aeaa69e0c45ecbe25f0e6..9b180dbbd03cc822c776e636183614bf59a76d3d 100644 (file)
 /* Device instance number, incremented each time a device is probed. */
 static int instance;
 
-struct list_head online_list;
-struct list_head removing_list;
-spinlock_t dev_lock;
+static struct list_head online_list;
+static struct list_head removing_list;
+static spinlock_t dev_lock;
 
 /*
  * Global variable used to hold the major block device number
index 95dff91135ad6396aa2467ae1093d784b66aa020..64a7b5971b57046eafd210d726fa039b05bc705d 100644 (file)
@@ -436,9 +436,8 @@ static void null_del_dev(struct nullb *nullb)
 static void null_lnvm_end_io(struct request *rq, int error)
 {
        struct nvm_rq *rqd = rq->end_io_data;
-       struct nvm_dev *dev = rqd->dev;
 
-       dev->mt->end_io(rqd, error);
+       nvm_end_io(rqd, error);
 
        blk_put_request(rq);
 }
@@ -479,7 +478,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
        id->ver_id = 0x1;
        id->vmnt = 0;
        id->cgrps = 1;
-       id->cap = 0x3;
+       id->cap = 0x2;
        id->dom = 0x1;
 
        id->ppaf.blk_offset = 0;
@@ -495,17 +494,17 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
        id->ppaf.ch_offset = 56;
        id->ppaf.ch_len = 8;
 
-       do_div(size, bs); /* convert size to pages */
-       do_div(size, 256); /* concert size to pgs pr blk */
+       sector_div(size, bs); /* convert size to pages */
+       size >>= 8; /* concert size to pgs pr blk */
        grp = &id->groups[0];
        grp->mtype = 0;
        grp->fmtype = 0;
        grp->num_ch = 1;
        grp->num_pg = 256;
        blksize = size;
-       do_div(size, (1 << 16));
+       size >>= 16;
        grp->num_lun = size + 1;
-       do_div(blksize, grp->num_lun);
+       sector_div(blksize, grp->num_lun);
        grp->num_blk = blksize;
        grp->num_pln = 1;
 
@@ -708,9 +707,7 @@ static int null_add_dev(void)
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
        queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
 
-
        mutex_lock(&lock);
-       list_add_tail(&nullb->list, &nullb_list);
        nullb->index = nullb_indexes++;
        mutex_unlock(&lock);
 
@@ -744,6 +741,10 @@ static int null_add_dev(void)
        strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
 
        add_disk(disk);
+
+       mutex_lock(&lock);
+       list_add_tail(&nullb->list, &nullb_list);
+       mutex_unlock(&lock);
 done:
        return 0;
 
index 81ea69fee7ca183313b8e8322833062262279187..4a876785b68cd5c550dbbb1d2b63071446454081 100644 (file)
@@ -5185,8 +5185,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
 
 out_err:
        rbd_dev_unparent(rbd_dev);
-       if (parent)
-               rbd_dev_destroy(parent);
+       rbd_dev_destroy(parent);
        return ret;
 }
 
index 59c91d49b14b649f839f8af5951772436d959da8..ba4bfe933276e34310a4208a10eb2e364dfefa7e 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
-#include <linux/time.h>
+#include <linux/ktime.h>
 #include <linux/hdreg.h>
 #include <linux/dma-mapping.h>
 #include <linux/completion.h>
@@ -671,16 +671,15 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
 static unsigned int carm_fill_sync_time(struct carm_host *host,
                                        unsigned int idx, void *mem)
 {
-       struct timeval tv;
        struct carm_msg_sync_time *st = mem;
 
-       do_gettimeofday(&tv);
+       time64_t tv = ktime_get_real_seconds();
 
        memset(st, 0, sizeof(*st));
        st->type        = CARM_MSG_MISC;
        st->subtype     = MISC_SET_TIME;
        st->handle      = cpu_to_le32(TAG_ENCODE(idx));
-       st->timestamp   = cpu_to_le32(tv.tv_sec);
+       st->timestamp   = cpu_to_le32(tv);
 
        return sizeof(struct carm_msg_sync_time);
 }
index 41fb1a917b172dc9446495d84357060847c2b569..4809c1501d7eda82081d922fa1944d8cd7c76c84 100644 (file)
@@ -83,6 +83,16 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
 MODULE_PARM_DESC(max_persistent_grants,
                  "Maximum number of grants to map persistently");
 
+/*
+ * Maximum number of rings/queues blkback supports, allow as many queues as there
+ * are CPUs if user has not specified a value.
+ */
+unsigned int xenblk_max_queues;
+module_param_named(max_queues, xenblk_max_queues, uint, 0644);
+MODULE_PARM_DESC(max_queues,
+                "Maximum number of hardware queues per virtual disk." \
+                "By default it is the number of online CPUs.");
+
 /*
  * Maximum order of pages to be used for the shared ring between front and
  * backend, 4KB page granularity is used.
@@ -113,71 +123,71 @@ module_param(log_stats, int, 0644);
 /* Number of free pages to remove on each call to gnttab_free_pages */
 #define NUM_BATCH_FREE_PAGES 10
 
-static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
+static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&blkif->free_pages_lock, flags);
-       if (list_empty(&blkif->free_pages)) {
-               BUG_ON(blkif->free_pages_num != 0);
-               spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       spin_lock_irqsave(&ring->free_pages_lock, flags);
+       if (list_empty(&ring->free_pages)) {
+               BUG_ON(ring->free_pages_num != 0);
+               spin_unlock_irqrestore(&ring->free_pages_lock, flags);
                return gnttab_alloc_pages(1, page);
        }
-       BUG_ON(blkif->free_pages_num == 0);
-       page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+       BUG_ON(ring->free_pages_num == 0);
+       page[0] = list_first_entry(&ring->free_pages, struct page, lru);
        list_del(&page[0]->lru);
-       blkif->free_pages_num--;
-       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       ring->free_pages_num--;
+       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
 
        return 0;
 }
 
-static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
                                   int num)
 {
        unsigned long flags;
        int i;
 
-       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       spin_lock_irqsave(&ring->free_pages_lock, flags);
        for (i = 0; i < num; i++)
-               list_add(&page[i]->lru, &blkif->free_pages);
-       blkif->free_pages_num += num;
-       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+               list_add(&page[i]->lru, &ring->free_pages);
+       ring->free_pages_num += num;
+       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
 }
 
-static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
+static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
 {
        /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
        struct page *page[NUM_BATCH_FREE_PAGES];
        unsigned int num_pages = 0;
        unsigned long flags;
 
-       spin_lock_irqsave(&blkif->free_pages_lock, flags);
-       while (blkif->free_pages_num > num) {
-               BUG_ON(list_empty(&blkif->free_pages));
-               page[num_pages] = list_first_entry(&blkif->free_pages,
+       spin_lock_irqsave(&ring->free_pages_lock, flags);
+       while (ring->free_pages_num > num) {
+               BUG_ON(list_empty(&ring->free_pages));
+               page[num_pages] = list_first_entry(&ring->free_pages,
                                                   struct page, lru);
                list_del(&page[num_pages]->lru);
-               blkif->free_pages_num--;
+               ring->free_pages_num--;
                if (++num_pages == NUM_BATCH_FREE_PAGES) {
-                       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+                       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
                        gnttab_free_pages(num_pages, page);
-                       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+                       spin_lock_irqsave(&ring->free_pages_lock, flags);
                        num_pages = 0;
                }
        }
-       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
        if (num_pages != 0)
                gnttab_free_pages(num_pages, page);
 }
 
 #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
-static int do_block_io_op(struct xen_blkif *blkif);
-static int dispatch_rw_block_io(struct xen_blkif *blkif,
+static int do_block_io_op(struct xen_blkif_ring *ring);
+static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
                                struct blkif_request *req,
                                struct pending_req *pending_req);
-static void make_response(struct xen_blkif *blkif, u64 id,
+static void make_response(struct xen_blkif_ring *ring, u64 id,
                          unsigned short op, int st);
 
 #define foreach_grant_safe(pos, n, rbtree, node) \
@@ -190,7 +200,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 
 /*
  * We don't need locking around the persistent grant helpers
- * because blkback uses a single-thread for each backed, so we
+ * because blkback uses a single-thread for each backend, so we
  * can be sure that this functions will never be called recursively.
  *
  * The only exception to that is put_persistent_grant, that can be called
@@ -198,19 +208,20 @@ static void make_response(struct xen_blkif *blkif, u64 id,
  * bit operations to modify the flags of a persistent grant and to count
  * the number of used grants.
  */
-static int add_persistent_gnt(struct xen_blkif *blkif,
+static int add_persistent_gnt(struct xen_blkif_ring *ring,
                               struct persistent_gnt *persistent_gnt)
 {
        struct rb_node **new = NULL, *parent = NULL;
        struct persistent_gnt *this;
+       struct xen_blkif *blkif = ring->blkif;
 
-       if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+       if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
                if (!blkif->vbd.overflow_max_grants)
                        blkif->vbd.overflow_max_grants = 1;
                return -EBUSY;
        }
        /* Figure out where to put new node */
-       new = &blkif->persistent_gnts.rb_node;
+       new = &ring->persistent_gnts.rb_node;
        while (*new) {
                this = container_of(*new, struct persistent_gnt, node);
 
@@ -229,19 +240,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
        set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
        /* Add new node and rebalance tree. */
        rb_link_node(&(persistent_gnt->node), parent, new);
-       rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
-       blkif->persistent_gnt_c++;
-       atomic_inc(&blkif->persistent_gnt_in_use);
+       rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
+       ring->persistent_gnt_c++;
+       atomic_inc(&ring->persistent_gnt_in_use);
        return 0;
 }
 
-static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
                                                 grant_ref_t gref)
 {
        struct persistent_gnt *data;
        struct rb_node *node = NULL;
 
-       node = blkif->persistent_gnts.rb_node;
+       node = ring->persistent_gnts.rb_node;
        while (node) {
                data = container_of(node, struct persistent_gnt, node);
 
@@ -255,24 +266,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
                                return NULL;
                        }
                        set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
-                       atomic_inc(&blkif->persistent_gnt_in_use);
+                       atomic_inc(&ring->persistent_gnt_in_use);
                        return data;
                }
        }
        return NULL;
 }
 
-static void put_persistent_gnt(struct xen_blkif *blkif,
+static void put_persistent_gnt(struct xen_blkif_ring *ring,
                                struct persistent_gnt *persistent_gnt)
 {
        if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
                pr_alert_ratelimited("freeing a grant already unused\n");
        set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
        clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
-       atomic_dec(&blkif->persistent_gnt_in_use);
+       atomic_dec(&ring->persistent_gnt_in_use);
 }
 
-static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
                                  unsigned int num)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -303,7 +314,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
                        unmap_data.count = segs_to_unmap;
                        BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 
-                       put_free_pages(blkif, pages, segs_to_unmap);
+                       put_free_pages(ring, pages, segs_to_unmap);
                        segs_to_unmap = 0;
                }
 
@@ -320,15 +331,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
        struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct persistent_gnt *persistent_gnt;
        int segs_to_unmap = 0;
-       struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+       struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
        struct gntab_unmap_queue_data unmap_data;
 
        unmap_data.pages = pages;
        unmap_data.unmap_ops = unmap;
        unmap_data.kunmap_ops = NULL;
 
-       while(!list_empty(&blkif->persistent_purge_list)) {
-               persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+       while(!list_empty(&ring->persistent_purge_list)) {
+               persistent_gnt = list_first_entry(&ring->persistent_purge_list,
                                                  struct persistent_gnt,
                                                  remove_node);
                list_del(&persistent_gnt->remove_node);
@@ -343,7 +354,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
                if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
                        unmap_data.count = segs_to_unmap;
                        BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
-                       put_free_pages(blkif, pages, segs_to_unmap);
+                       put_free_pages(ring, pages, segs_to_unmap);
                        segs_to_unmap = 0;
                }
                kfree(persistent_gnt);
@@ -351,11 +362,11 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
        if (segs_to_unmap > 0) {
                unmap_data.count = segs_to_unmap;
                BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
-               put_free_pages(blkif, pages, segs_to_unmap);
+               put_free_pages(ring, pages, segs_to_unmap);
        }
 }
 
-static void purge_persistent_gnt(struct xen_blkif *blkif)
+static void purge_persistent_gnt(struct xen_blkif_ring *ring)
 {
        struct persistent_gnt *persistent_gnt;
        struct rb_node *n;
@@ -363,23 +374,23 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
        bool scan_used = false, clean_used = false;
        struct rb_root *root;
 
-       if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
-           (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
-           !blkif->vbd.overflow_max_grants)) {
-               return;
+       if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+           (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+           !ring->blkif->vbd.overflow_max_grants)) {
+               goto out;
        }
 
-       if (work_busy(&blkif->persistent_purge_work)) {
+       if (work_busy(&ring->persistent_purge_work)) {
                pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
-               return;
+               goto out;
        }
 
        num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
-       num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
-       num_clean = min(blkif->persistent_gnt_c, num_clean);
+       num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+       num_clean = min(ring->persistent_gnt_c, num_clean);
        if ((num_clean == 0) ||
-           (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
-               return;
+           (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
+               goto out;
 
        /*
         * At this point, we can assure that there will be no calls
@@ -394,8 +405,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 
        pr_debug("Going to purge %u persistent grants\n", num_clean);
 
-       BUG_ON(!list_empty(&blkif->persistent_purge_list));
-       root = &blkif->persistent_gnts;
+       BUG_ON(!list_empty(&ring->persistent_purge_list));
+       root = &ring->persistent_gnts;
 purge_list:
        foreach_grant_safe(persistent_gnt, n, root, node) {
                BUG_ON(persistent_gnt->handle ==
@@ -414,7 +425,7 @@ purge_list:
 
                rb_erase(&persistent_gnt->node, root);
                list_add(&persistent_gnt->remove_node,
-                        &blkif->persistent_purge_list);
+                        &ring->persistent_purge_list);
                if (--num_clean == 0)
                        goto finished;
        }
@@ -435,30 +446,32 @@ finished:
                goto purge_list;
        }
 
-       blkif->persistent_gnt_c -= (total - num_clean);
-       blkif->vbd.overflow_max_grants = 0;
+       ring->persistent_gnt_c -= (total - num_clean);
+       ring->blkif->vbd.overflow_max_grants = 0;
 
        /* We can defer this work */
-       schedule_work(&blkif->persistent_purge_work);
+       schedule_work(&ring->persistent_purge_work);
        pr_debug("Purged %u/%u\n", (total - num_clean), total);
+
+out:
        return;
 }
 
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
-static struct pending_req *alloc_req(struct xen_blkif *blkif)
+static struct pending_req *alloc_req(struct xen_blkif_ring *ring)
 {
        struct pending_req *req = NULL;
        unsigned long flags;
 
-       spin_lock_irqsave(&blkif->pending_free_lock, flags);
-       if (!list_empty(&blkif->pending_free)) {
-               req = list_entry(blkif->pending_free.next, struct pending_req,
+       spin_lock_irqsave(&ring->pending_free_lock, flags);
+       if (!list_empty(&ring->pending_free)) {
+               req = list_entry(ring->pending_free.next, struct pending_req,
                                 free_list);
                list_del(&req->free_list);
        }
-       spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
+       spin_unlock_irqrestore(&ring->pending_free_lock, flags);
        return req;
 }
 
@@ -466,17 +479,17 @@ static struct pending_req *alloc_req(struct xen_blkif *blkif)
  * Return the 'pending_req' structure back to the freepool. We also
  * wake up the thread if it was waiting for a free page.
  */
-static void free_req(struct xen_blkif *blkif, struct pending_req *req)
+static void free_req(struct xen_blkif_ring *ring, struct pending_req *req)
 {
        unsigned long flags;
        int was_empty;
 
-       spin_lock_irqsave(&blkif->pending_free_lock, flags);
-       was_empty = list_empty(&blkif->pending_free);
-       list_add(&req->free_list, &blkif->pending_free);
-       spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
+       spin_lock_irqsave(&ring->pending_free_lock, flags);
+       was_empty = list_empty(&ring->pending_free);
+       list_add(&req->free_list, &ring->pending_free);
+       spin_unlock_irqrestore(&ring->pending_free_lock, flags);
        if (was_empty)
-               wake_up(&blkif->pending_free_wq);
+               wake_up(&ring->pending_free_wq);
 }
 
 /*
@@ -556,10 +569,10 @@ abort:
 /*
  * Notification from the guest OS.
  */
-static void blkif_notify_work(struct xen_blkif *blkif)
+static void blkif_notify_work(struct xen_blkif_ring *ring)
 {
-       blkif->waiting_reqs = 1;
-       wake_up(&blkif->wq);
+       ring->waiting_reqs = 1;
+       wake_up(&ring->wq);
 }
 
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
@@ -572,31 +585,33 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
  * SCHEDULER FUNCTIONS
  */
 
-static void print_stats(struct xen_blkif *blkif)
+static void print_stats(struct xen_blkif_ring *ring)
 {
        pr_info("(%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
                 "  |  ds %4llu | pg: %4u/%4d\n",
-                current->comm, blkif->st_oo_req,
-                blkif->st_rd_req, blkif->st_wr_req,
-                blkif->st_f_req, blkif->st_ds_req,
-                blkif->persistent_gnt_c,
+                current->comm, ring->st_oo_req,
+                ring->st_rd_req, ring->st_wr_req,
+                ring->st_f_req, ring->st_ds_req,
+                ring->persistent_gnt_c,
                 xen_blkif_max_pgrants);
-       blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
-       blkif->st_rd_req = 0;
-       blkif->st_wr_req = 0;
-       blkif->st_oo_req = 0;
-       blkif->st_ds_req = 0;
+       ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+       ring->st_rd_req = 0;
+       ring->st_wr_req = 0;
+       ring->st_oo_req = 0;
+       ring->st_ds_req = 0;
 }
 
 int xen_blkif_schedule(void *arg)
 {
-       struct xen_blkif *blkif = arg;
+       struct xen_blkif_ring *ring = arg;
+       struct xen_blkif *blkif = ring->blkif;
        struct xen_vbd *vbd = &blkif->vbd;
        unsigned long timeout;
        int ret;
 
        xen_blkif_get(blkif);
 
+       set_freezable();
        while (!kthread_should_stop()) {
                if (try_to_freeze())
                        continue;
@@ -606,50 +621,50 @@ int xen_blkif_schedule(void *arg)
                timeout = msecs_to_jiffies(LRU_INTERVAL);
 
                timeout = wait_event_interruptible_timeout(
-                       blkif->wq,
-                       blkif->waiting_reqs || kthread_should_stop(),
+                       ring->wq,
+                       ring->waiting_reqs || kthread_should_stop(),
                        timeout);
                if (timeout == 0)
                        goto purge_gnt_list;
                timeout = wait_event_interruptible_timeout(
-                       blkif->pending_free_wq,
-                       !list_empty(&blkif->pending_free) ||
+                       ring->pending_free_wq,
+                       !list_empty(&ring->pending_free) ||
                        kthread_should_stop(),
                        timeout);
                if (timeout == 0)
                        goto purge_gnt_list;
 
-               blkif->waiting_reqs = 0;
+               ring->waiting_reqs = 0;
                smp_mb(); /* clear flag *before* checking for work */
 
-               ret = do_block_io_op(blkif);
+               ret = do_block_io_op(ring);
                if (ret > 0)
-                       blkif->waiting_reqs = 1;
+                       ring->waiting_reqs = 1;
                if (ret == -EACCES)
-                       wait_event_interruptible(blkif->shutdown_wq,
+                       wait_event_interruptible(ring->shutdown_wq,
                                                 kthread_should_stop());
 
 purge_gnt_list:
                if (blkif->vbd.feature_gnt_persistent &&
-                   time_after(jiffies, blkif->next_lru)) {
-                       purge_persistent_gnt(blkif);
-                       blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+                   time_after(jiffies, ring->next_lru)) {
+                       purge_persistent_gnt(ring);
+                       ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
                }
 
                /* Shrink if we have more than xen_blkif_max_buffer_pages */
-               shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
+               shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
 
-               if (log_stats && time_after(jiffies, blkif->st_print))
-                       print_stats(blkif);
+               if (log_stats && time_after(jiffies, ring->st_print))
+                       print_stats(ring);
        }
 
        /* Drain pending purge work */
-       flush_work(&blkif->persistent_purge_work);
+       flush_work(&ring->persistent_purge_work);
 
        if (log_stats)
-               print_stats(blkif);
+               print_stats(ring);
 
-       blkif->xenblkd = NULL;
+       ring->xenblkd = NULL;
        xen_blkif_put(blkif);
 
        return 0;
@@ -658,22 +673,22 @@ purge_gnt_list:
 /*
  * Remove persistent grants and empty the pool of free pages
  */
-void xen_blkbk_free_caches(struct xen_blkif *blkif)
+void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
 {
        /* Free all persistent grant pages */
-       if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
-               free_persistent_gnts(blkif, &blkif->persistent_gnts,
-                       blkif->persistent_gnt_c);
+       if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
+               free_persistent_gnts(ring, &ring->persistent_gnts,
+                       ring->persistent_gnt_c);
 
-       BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
-       blkif->persistent_gnt_c = 0;
+       BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+       ring->persistent_gnt_c = 0;
 
        /* Since we are shutting down remove all pages from the buffer */
-       shrink_free_pagepool(blkif, 0 /* All */);
+       shrink_free_pagepool(ring, 0 /* All */);
 }
 
 static unsigned int xen_blkbk_unmap_prepare(
-       struct xen_blkif *blkif,
+       struct xen_blkif_ring *ring,
        struct grant_page **pages,
        unsigned int num,
        struct gnttab_unmap_grant_ref *unmap_ops,
@@ -683,7 +698,7 @@ static unsigned int xen_blkbk_unmap_prepare(
 
        for (i = 0; i < num; i++) {
                if (pages[i]->persistent_gnt != NULL) {
-                       put_persistent_gnt(blkif, pages[i]->persistent_gnt);
+                       put_persistent_gnt(ring, pages[i]->persistent_gnt);
                        continue;
                }
                if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
@@ -700,17 +715,18 @@ static unsigned int xen_blkbk_unmap_prepare(
 
 static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
 {
-       struct pending_req* pending_req = (struct pending_req*) (data->data);
-       struct xen_blkif *blkif = pending_req->blkif;
+       struct pending_req *pending_req = (struct pending_req *)(data->data);
+       struct xen_blkif_ring *ring = pending_req->ring;
+       struct xen_blkif *blkif = ring->blkif;
 
        /* BUG_ON used to reproduce existing behaviour,
           but is this the best way to deal with this? */
        BUG_ON(result);
 
-       put_free_pages(blkif, data->pages, data->count);
-       make_response(blkif, pending_req->id,
+       put_free_pages(ring, data->pages, data->count);
+       make_response(ring, pending_req->id,
                      pending_req->operation, pending_req->status);
-       free_req(blkif, pending_req);
+       free_req(ring, pending_req);
        /*
         * Make sure the request is freed before releasing blkif,
         * or there could be a race between free_req and the
@@ -723,7 +739,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
         * pending_free_wq if there's a drain going on, but it has
         * to be taken into account if the current model is changed.
         */
-       if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+       if (atomic_dec_and_test(&ring->inflight) && atomic_read(&blkif->drain)) {
                complete(&blkif->drain_complete);
        }
        xen_blkif_put(blkif);
@@ -732,11 +748,11 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
 static void xen_blkbk_unmap_and_respond(struct pending_req *req)
 {
        struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
-       struct xen_blkif *blkif = req->blkif;
+       struct xen_blkif_ring *ring = req->ring;
        struct grant_page **pages = req->segments;
        unsigned int invcount;
 
-       invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
+       invcount = xen_blkbk_unmap_prepare(ring, pages, req->nr_segs,
                                           req->unmap, req->unmap_pages);
 
        work->data = req;
@@ -757,7 +773,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req)
  * of hypercalls, but since this is only used in error paths there's
  * no real need.
  */
-static void xen_blkbk_unmap(struct xen_blkif *blkif,
+static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
                             struct grant_page *pages[],
                             int num)
 {
@@ -768,20 +784,20 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
 
        while (num) {
                unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-               
-               invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
+
+               invcount = xen_blkbk_unmap_prepare(ring, pages, batch,
                                                   unmap, unmap_pages);
                if (invcount) {
                        ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
                        BUG_ON(ret);
-                       put_free_pages(blkif, unmap_pages, invcount);
+                       put_free_pages(ring, unmap_pages, invcount);
                }
                pages += batch;
                num -= batch;
        }
 }
 
-static int xen_blkbk_map(struct xen_blkif *blkif,
+static int xen_blkbk_map(struct xen_blkif_ring *ring,
                         struct grant_page *pages[],
                         int num, bool ro)
 {
@@ -794,6 +810,7 @@ static int xen_blkbk_map(struct xen_blkif *blkif,
        int ret = 0;
        int last_map = 0, map_until = 0;
        int use_persistent_gnts;
+       struct xen_blkif *blkif = ring->blkif;
 
        use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
@@ -806,10 +823,11 @@ again:
        for (i = map_until; i < num; i++) {
                uint32_t flags;
 
-               if (use_persistent_gnts)
+               if (use_persistent_gnts) {
                        persistent_gnt = get_persistent_gnt(
-                               blkif,
+                               ring,
                                pages[i]->gref);
+               }
 
                if (persistent_gnt) {
                        /*
@@ -819,7 +837,7 @@ again:
                        pages[i]->page = persistent_gnt->page;
                        pages[i]->persistent_gnt = persistent_gnt;
                } else {
-                       if (get_free_page(blkif, &pages[i]->page))
+                       if (get_free_page(ring, &pages[i]->page))
                                goto out_of_memory;
                        addr = vaddr(pages[i]->page);
                        pages_to_gnt[segs_to_map] = pages[i]->page;
@@ -852,7 +870,7 @@ again:
                        BUG_ON(new_map_idx >= segs_to_map);
                        if (unlikely(map[new_map_idx].status != 0)) {
                                pr_debug("invalid buffer -- could not remap it\n");
-                               put_free_pages(blkif, &pages[seg_idx]->page, 1);
+                               put_free_pages(ring, &pages[seg_idx]->page, 1);
                                pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
                                ret |= 1;
                                goto next;
@@ -862,7 +880,7 @@ again:
                        continue;
                }
                if (use_persistent_gnts &&
-                   blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+                   ring->persistent_gnt_c < xen_blkif_max_pgrants) {
                        /*
                         * We are using persistent grants, the grant is
                         * not mapped but we might have room for it.
@@ -880,7 +898,7 @@ again:
                        persistent_gnt->gnt = map[new_map_idx].ref;
                        persistent_gnt->handle = map[new_map_idx].handle;
                        persistent_gnt->page = pages[seg_idx]->page;
-                       if (add_persistent_gnt(blkif,
+                       if (add_persistent_gnt(ring,
                                               persistent_gnt)) {
                                kfree(persistent_gnt);
                                persistent_gnt = NULL;
@@ -888,7 +906,7 @@ again:
                        }
                        pages[seg_idx]->persistent_gnt = persistent_gnt;
                        pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
-                                persistent_gnt->gnt, blkif->persistent_gnt_c,
+                                persistent_gnt->gnt, ring->persistent_gnt_c,
                                 xen_blkif_max_pgrants);
                        goto next;
                }
@@ -913,7 +931,7 @@ next:
 
 out_of_memory:
        pr_alert("%s: out of memory\n", __func__);
-       put_free_pages(blkif, pages_to_gnt, segs_to_map);
+       put_free_pages(ring, pages_to_gnt, segs_to_map);
        return -ENOMEM;
 }
 
@@ -921,7 +939,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req)
 {
        int rc;
 
-       rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
+       rc = xen_blkbk_map(pending_req->ring, pending_req->segments,
                           pending_req->nr_segs,
                           (pending_req->operation != BLKIF_OP_READ));
 
@@ -934,7 +952,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
                                    struct phys_req *preq)
 {
        struct grant_page **pages = pending_req->indirect_pages;
-       struct xen_blkif *blkif = pending_req->blkif;
+       struct xen_blkif_ring *ring = pending_req->ring;
        int indirect_grefs, rc, n, nseg, i;
        struct blkif_request_segment *segments = NULL;
 
@@ -945,7 +963,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
        for (i = 0; i < indirect_grefs; i++)
                pages[i]->gref = req->u.indirect.indirect_grefs[i];
 
-       rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
+       rc = xen_blkbk_map(ring, pages, indirect_grefs, true);
        if (rc)
                goto unmap;
 
@@ -977,15 +995,16 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 unmap:
        if (segments)
                kunmap_atomic(segments);
-       xen_blkbk_unmap(blkif, pages, indirect_grefs);
+       xen_blkbk_unmap(ring, pages, indirect_grefs);
        return rc;
 }
 
-static int dispatch_discard_io(struct xen_blkif *blkif,
+static int dispatch_discard_io(struct xen_blkif_ring *ring,
                                struct blkif_request *req)
 {
        int err = 0;
        int status = BLKIF_RSP_OKAY;
+       struct xen_blkif *blkif = ring->blkif;
        struct block_device *bdev = blkif->vbd.bdev;
        unsigned long secure;
        struct phys_req preq;
@@ -1002,7 +1021,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
                        preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
                goto fail_response;
        }
-       blkif->st_ds_req++;
+       ring->st_ds_req++;
 
        secure = (blkif->vbd.discard_secure &&
                 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
@@ -1018,26 +1037,28 @@ fail_response:
        } else if (err)
                status = BLKIF_RSP_ERROR;
 
-       make_response(blkif, req->u.discard.id, req->operation, status);
+       make_response(ring, req->u.discard.id, req->operation, status);
        xen_blkif_put(blkif);
        return err;
 }
 
-static int dispatch_other_io(struct xen_blkif *blkif,
+static int dispatch_other_io(struct xen_blkif_ring *ring,
                             struct blkif_request *req,
                             struct pending_req *pending_req)
 {
-       free_req(blkif, pending_req);
-       make_response(blkif, req->u.other.id, req->operation,
+       free_req(ring, pending_req);
+       make_response(ring, req->u.other.id, req->operation,
                      BLKIF_RSP_EOPNOTSUPP);
        return -EIO;
 }
 
-static void xen_blk_drain_io(struct xen_blkif *blkif)
+static void xen_blk_drain_io(struct xen_blkif_ring *ring)
 {
+       struct xen_blkif *blkif = ring->blkif;
+
        atomic_set(&blkif->drain, 1);
        do {
-               if (atomic_read(&blkif->inflight) == 0)
+               if (atomic_read(&ring->inflight) == 0)
                        break;
                wait_for_completion_interruptible_timeout(
                                &blkif->drain_complete, HZ);
@@ -1058,12 +1079,12 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
        if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
            (error == -EOPNOTSUPP)) {
                pr_debug("flush diskcache op failed, not supported\n");
-               xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
+               xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
                pending_req->status = BLKIF_RSP_EOPNOTSUPP;
        } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
                    (error == -EOPNOTSUPP)) {
                pr_debug("write barrier op failed, not supported\n");
-               xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+               xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
                pending_req->status = BLKIF_RSP_EOPNOTSUPP;
        } else if (error) {
                pr_debug("Buffer not up-to-date at end of operation,"
@@ -1097,9 +1118,9 @@ static void end_block_io_op(struct bio *bio)
  * and transmute  it to the block API to hand it over to the proper block disk.
  */
 static int
-__do_block_io_op(struct xen_blkif *blkif)
+__do_block_io_op(struct xen_blkif_ring *ring)
 {
-       union blkif_back_rings *blk_rings = &blkif->blk_rings;
+       union blkif_back_rings *blk_rings = &ring->blk_rings;
        struct blkif_request req;
        struct pending_req *pending_req;
        RING_IDX rc, rp;
@@ -1112,7 +1133,7 @@ __do_block_io_op(struct xen_blkif *blkif)
        if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
                rc = blk_rings->common.rsp_prod_pvt;
                pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
-                       rp, rc, rp - rc, blkif->vbd.pdevice);
+                       rp, rc, rp - rc, ring->blkif->vbd.pdevice);
                return -EACCES;
        }
        while (rc != rp) {
@@ -1125,14 +1146,14 @@ __do_block_io_op(struct xen_blkif *blkif)
                        break;
                }
 
-               pending_req = alloc_req(blkif);
+               pending_req = alloc_req(ring);
                if (NULL == pending_req) {
-                       blkif->st_oo_req++;
+                       ring->st_oo_req++;
                        more_to_do = 1;
                        break;
                }
 
-               switch (blkif->blk_protocol) {
+               switch (ring->blkif->blk_protocol) {
                case BLKIF_PROTOCOL_NATIVE:
                        memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
                        break;
@@ -1156,16 +1177,16 @@ __do_block_io_op(struct xen_blkif *blkif)
                case BLKIF_OP_WRITE_BARRIER:
                case BLKIF_OP_FLUSH_DISKCACHE:
                case BLKIF_OP_INDIRECT:
-                       if (dispatch_rw_block_io(blkif, &req, pending_req))
+                       if (dispatch_rw_block_io(ring, &req, pending_req))
                                goto done;
                        break;
                case BLKIF_OP_DISCARD:
-                       free_req(blkif, pending_req);
-                       if (dispatch_discard_io(blkif, &req))
+                       free_req(ring, pending_req);
+                       if (dispatch_discard_io(ring, &req))
                                goto done;
                        break;
                default:
-                       if (dispatch_other_io(blkif, &req, pending_req))
+                       if (dispatch_other_io(ring, &req, pending_req))
                                goto done;
                        break;
                }
@@ -1178,13 +1199,13 @@ done:
 }
 
 static int
-do_block_io_op(struct xen_blkif *blkif)
+do_block_io_op(struct xen_blkif_ring *ring)
 {
-       union blkif_back_rings *blk_rings = &blkif->blk_rings;
+       union blkif_back_rings *blk_rings = &ring->blk_rings;
        int more_to_do;
 
        do {
-               more_to_do = __do_block_io_op(blkif);
+               more_to_do = __do_block_io_op(ring);
                if (more_to_do)
                        break;
 
@@ -1197,7 +1218,7 @@ do_block_io_op(struct xen_blkif *blkif)
  * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
  * and call the 'submit_bio' to pass it to the underlying storage.
  */
-static int dispatch_rw_block_io(struct xen_blkif *blkif,
+static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
                                struct blkif_request *req,
                                struct pending_req *pending_req)
 {
@@ -1225,17 +1246,17 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
        switch (req_operation) {
        case BLKIF_OP_READ:
-               blkif->st_rd_req++;
+               ring->st_rd_req++;
                operation = READ;
                break;
        case BLKIF_OP_WRITE:
-               blkif->st_wr_req++;
+               ring->st_wr_req++;
                operation = WRITE_ODIRECT;
                break;
        case BLKIF_OP_WRITE_BARRIER:
                drain = true;
        case BLKIF_OP_FLUSH_DISKCACHE:
-               blkif->st_f_req++;
+               ring->st_f_req++;
                operation = WRITE_FLUSH;
                break;
        default:
@@ -1260,7 +1281,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
        preq.nr_sects      = 0;
 
-       pending_req->blkif     = blkif;
+       pending_req->ring      = ring;
        pending_req->id        = req->u.rw.id;
        pending_req->operation = req_operation;
        pending_req->status    = BLKIF_RSP_OKAY;
@@ -1287,12 +1308,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                        goto fail_response;
        }
 
-       if (xen_vbd_translate(&preq, blkif, operation) != 0) {
+       if (xen_vbd_translate(&preq, ring->blkif, operation) != 0) {
                pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
                         operation == READ ? "read" : "write",
                         preq.sector_number,
                         preq.sector_number + preq.nr_sects,
-                        blkif->vbd.pdevice);
+                        ring->blkif->vbd.pdevice);
                goto fail_response;
        }
 
@@ -1304,7 +1325,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                if (((int)preq.sector_number|(int)seg[i].nsec) &
                    ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
                        pr_debug("Misaligned I/O request from domain %d\n",
-                                blkif->domid);
+                                ring->blkif->domid);
                        goto fail_response;
                }
        }
@@ -1313,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         * issue the WRITE_FLUSH.
         */
        if (drain)
-               xen_blk_drain_io(pending_req->blkif);
+               xen_blk_drain_io(pending_req->ring);
 
        /*
         * If we have failed at this point, we need to undo the M2P override,
@@ -1328,8 +1349,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         * This corresponding xen_blkif_put is done in __end_block_io_op, or
         * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
         */
-       xen_blkif_get(blkif);
-       atomic_inc(&blkif->inflight);
+       xen_blkif_get(ring->blkif);
+       atomic_inc(&ring->inflight);
 
        for (i = 0; i < nseg; i++) {
                while ((bio == NULL) ||
@@ -1377,19 +1398,19 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        blk_finish_plug(&plug);
 
        if (operation == READ)
-               blkif->st_rd_sect += preq.nr_sects;
+               ring->st_rd_sect += preq.nr_sects;
        else if (operation & WRITE)
-               blkif->st_wr_sect += preq.nr_sects;
+               ring->st_wr_sect += preq.nr_sects;
 
        return 0;
 
  fail_flush:
-       xen_blkbk_unmap(blkif, pending_req->segments,
+       xen_blkbk_unmap(ring, pending_req->segments,
                        pending_req->nr_segs);
  fail_response:
        /* Haven't submitted any bio's yet. */
-       make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
-       free_req(blkif, pending_req);
+       make_response(ring, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
+       free_req(ring, pending_req);
        msleep(1); /* back off a bit */
        return -EIO;
 
@@ -1407,21 +1428,22 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 /*
  * Put a response on the ring on how the operation fared.
  */
-static void make_response(struct xen_blkif *blkif, u64 id,
+static void make_response(struct xen_blkif_ring *ring, u64 id,
                          unsigned short op, int st)
 {
        struct blkif_response  resp;
        unsigned long     flags;
-       union blkif_back_rings *blk_rings = &blkif->blk_rings;
+       union blkif_back_rings *blk_rings;
        int notify;
 
        resp.id        = id;
        resp.operation = op;
        resp.status    = st;
 
-       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+       spin_lock_irqsave(&ring->blk_ring_lock, flags);
+       blk_rings = &ring->blk_rings;
        /* Place on the response ring for the relevant domain. */
-       switch (blkif->blk_protocol) {
+       switch (ring->blkif->blk_protocol) {
        case BLKIF_PROTOCOL_NATIVE:
                memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
                       &resp, sizeof(resp));
@@ -1439,9 +1461,9 @@ static void make_response(struct xen_blkif *blkif, u64 id,
        }
        blk_rings->common.rsp_prod_pvt++;
        RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
-       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+       spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
        if (notify)
-               notify_remote_via_irq(blkif->irq);
+               notify_remote_via_irq(ring->irq);
 }
 
 static int __init xen_blkif_init(void)
@@ -1457,6 +1479,9 @@ static int __init xen_blkif_init(void)
                xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
        }
 
+       if (xenblk_max_queues == 0)
+               xenblk_max_queues = num_online_cpus();
+
        rc = xen_blkif_interface_init();
        if (rc)
                goto failed_init;
index c929ae22764c9dd4345195a1542df997c5a8e29b..dea61f6ab8cbdbaffedceb4c64bda239b51a63a4 100644 (file)
@@ -46,6 +46,7 @@
 #include <xen/interface/io/protocols.h>
 
 extern unsigned int xen_blkif_max_ring_order;
+extern unsigned int xenblk_max_queues;
 /*
  * This is the maximum number of segments that would be allowed in indirect
  * requests. This value will also be passed to the frontend.
@@ -269,68 +270,79 @@ struct persistent_gnt {
        struct list_head remove_node;
 };
 
-struct xen_blkif {
-       /* Unique identifier for this interface. */
-       domid_t                 domid;
-       unsigned int            handle;
+/* Per-ring information. */
+struct xen_blkif_ring {
        /* Physical parameters of the comms window. */
        unsigned int            irq;
-       /* Comms information. */
-       enum blkif_protocol     blk_protocol;
        union blkif_back_rings  blk_rings;
        void                    *blk_ring;
-       /* The VBD attached to this interface. */
-       struct xen_vbd          vbd;
-       /* Back pointer to the backend_info. */
-       struct backend_info     *be;
        /* Private fields. */
        spinlock_t              blk_ring_lock;
-       atomic_t                refcnt;
 
        wait_queue_head_t       wq;
-       /* for barrier (drain) requests */
-       struct completion       drain_complete;
-       atomic_t                drain;
        atomic_t                inflight;
-       /* One thread per one blkif. */
+       /* One thread per blkif ring. */
        struct task_struct      *xenblkd;
        unsigned int            waiting_reqs;
 
-       /* tree to store persistent grants */
+       /* List of all 'pending_req' available */
+       struct list_head        pending_free;
+       /* And its spinlock. */
+       spinlock_t              pending_free_lock;
+       wait_queue_head_t       pending_free_wq;
+
+       /* Tree to store persistent grants. */
+       spinlock_t              pers_gnts_lock;
        struct rb_root          persistent_gnts;
        unsigned int            persistent_gnt_c;
        atomic_t                persistent_gnt_in_use;
        unsigned long           next_lru;
 
-       /* used by the kworker that offload work from the persistent purge */
+       /* Statistics. */
+       unsigned long           st_print;
+       unsigned long long      st_rd_req;
+       unsigned long long      st_wr_req;
+       unsigned long long      st_oo_req;
+       unsigned long long      st_f_req;
+       unsigned long long      st_ds_req;
+       unsigned long long      st_rd_sect;
+       unsigned long long      st_wr_sect;
+
+       /* Used by the kworker that offload work from the persistent purge. */
        struct list_head        persistent_purge_list;
        struct work_struct      persistent_purge_work;
 
-       /* buffer of free pages to map grant refs */
+       /* Buffer of free pages to map grant refs. */
        spinlock_t              free_pages_lock;
        int                     free_pages_num;
        struct list_head        free_pages;
 
-       /* List of all 'pending_req' available */
-       struct list_head        pending_free;
-       /* And its spinlock. */
-       spinlock_t              pending_free_lock;
-       wait_queue_head_t       pending_free_wq;
-
-       /* statistics */
-       unsigned long           st_print;
-       unsigned long long                      st_rd_req;
-       unsigned long long                      st_wr_req;
-       unsigned long long                      st_oo_req;
-       unsigned long long                      st_f_req;
-       unsigned long long                      st_ds_req;
-       unsigned long long                      st_rd_sect;
-       unsigned long long                      st_wr_sect;
-
        struct work_struct      free_work;
        /* Thread shutdown wait queue. */
        wait_queue_head_t       shutdown_wq;
-       unsigned int nr_ring_pages;
+       struct xen_blkif        *blkif;
+};
+
+struct xen_blkif {
+       /* Unique identifier for this interface. */
+       domid_t                 domid;
+       unsigned int            handle;
+       /* Comms information. */
+       enum blkif_protocol     blk_protocol;
+       /* The VBD attached to this interface. */
+       struct xen_vbd          vbd;
+       /* Back pointer to the backend_info. */
+       struct backend_info     *be;
+       atomic_t                refcnt;
+       /* for barrier (drain) requests */
+       struct completion       drain_complete;
+       atomic_t                drain;
+
+       struct work_struct      free_work;
+       unsigned int            nr_ring_pages;
+       /* All rings for this device. */
+       struct xen_blkif_ring   *rings;
+       unsigned int            nr_rings;
 };
 
 struct seg_buf {
@@ -352,7 +364,7 @@ struct grant_page {
  * response queued for it, with the saved 'id' passed back.
  */
 struct pending_req {
-       struct xen_blkif        *blkif;
+       struct xen_blkif_ring   *ring;
        u64                     id;
        int                     nr_segs;
        atomic_t                pendcnt;
@@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void);
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
 int xen_blkif_purge_persistent(void *arg);
-void xen_blkbk_free_caches(struct xen_blkif *blkif);
+void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
                              struct backend_info *be, int state);
index f53cff42f8dab8891143ff2d6a3626273857eec6..876763f7f13e1a77f8f64738de967c1f21815440 100644 (file)
@@ -86,9 +86,11 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 {
        int err;
        char name[BLKBACK_NAME_LEN];
+       struct xen_blkif_ring *ring;
+       int i;
 
        /* Not ready to connect? */
-       if (!blkif->irq || !blkif->vbd.bdev)
+       if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
                return;
 
        /* Already connected? */
@@ -113,13 +115,55 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
        }
        invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
 
-       blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
-       if (IS_ERR(blkif->xenblkd)) {
-               err = PTR_ERR(blkif->xenblkd);
-               blkif->xenblkd = NULL;
-               xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
-               return;
+       for (i = 0; i < blkif->nr_rings; i++) {
+               ring = &blkif->rings[i];
+               ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
+               if (IS_ERR(ring->xenblkd)) {
+                       err = PTR_ERR(ring->xenblkd);
+                       ring->xenblkd = NULL;
+                       xenbus_dev_fatal(blkif->be->dev, err,
+                                       "start %s-%d xenblkd", name, i);
+                       goto out;
+               }
+       }
+       return;
+
+out:
+       while (--i >= 0) {
+               ring = &blkif->rings[i];
+               kthread_stop(ring->xenblkd);
+       }
+       return;
+}
+
+static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
+{
+       unsigned int r;
+
+       blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL);
+       if (!blkif->rings)
+               return -ENOMEM;
+
+       for (r = 0; r < blkif->nr_rings; r++) {
+               struct xen_blkif_ring *ring = &blkif->rings[r];
+
+               spin_lock_init(&ring->blk_ring_lock);
+               init_waitqueue_head(&ring->wq);
+               INIT_LIST_HEAD(&ring->pending_free);
+               INIT_LIST_HEAD(&ring->persistent_purge_list);
+               INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
+               spin_lock_init(&ring->free_pages_lock);
+               INIT_LIST_HEAD(&ring->free_pages);
+
+               spin_lock_init(&ring->pending_free_lock);
+               init_waitqueue_head(&ring->pending_free_wq);
+               init_waitqueue_head(&ring->shutdown_wq);
+               ring->blkif = blkif;
+               ring->st_print = jiffies;
+               xen_blkif_get(blkif);
        }
+
+       return 0;
 }
 
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
@@ -133,41 +177,25 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
                return ERR_PTR(-ENOMEM);
 
        blkif->domid = domid;
-       spin_lock_init(&blkif->blk_ring_lock);
        atomic_set(&blkif->refcnt, 1);
-       init_waitqueue_head(&blkif->wq);
        init_completion(&blkif->drain_complete);
-       atomic_set(&blkif->drain, 0);
-       blkif->st_print = jiffies;
-       blkif->persistent_gnts.rb_node = NULL;
-       spin_lock_init(&blkif->free_pages_lock);
-       INIT_LIST_HEAD(&blkif->free_pages);
-       INIT_LIST_HEAD(&blkif->persistent_purge_list);
-       blkif->free_pages_num = 0;
-       atomic_set(&blkif->persistent_gnt_in_use, 0);
-       atomic_set(&blkif->inflight, 0);
-       INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
-
-       INIT_LIST_HEAD(&blkif->pending_free);
        INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-       spin_lock_init(&blkif->pending_free_lock);
-       init_waitqueue_head(&blkif->pending_free_wq);
-       init_waitqueue_head(&blkif->shutdown_wq);
 
        return blkif;
 }
 
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
                         unsigned int nr_grefs, unsigned int evtchn)
 {
        int err;
+       struct xen_blkif *blkif = ring->blkif;
 
        /* Already connected through? */
-       if (blkif->irq)
+       if (ring->irq)
                return 0;
 
        err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
-                                    &blkif->blk_ring);
+                                    &ring->blk_ring);
        if (err < 0)
                return err;
 
@@ -175,24 +203,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
        case BLKIF_PROTOCOL_NATIVE:
        {
                struct blkif_sring *sring;
-               sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+               sring = (struct blkif_sring *)ring->blk_ring;
+               BACK_RING_INIT(&ring->blk_rings.native, sring,
                               XEN_PAGE_SIZE * nr_grefs);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
-               sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+               sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
+               BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
                               XEN_PAGE_SIZE * nr_grefs);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
-               sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+               sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
+               BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
                               XEN_PAGE_SIZE * nr_grefs);
                break;
        }
@@ -202,13 +230,13 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
 
        err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
                                                    xen_blkif_be_int, 0,
-                                                   "blkif-backend", blkif);
+                                                   "blkif-backend", ring);
        if (err < 0) {
-               xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
-               blkif->blk_rings.common.sring = NULL;
+               xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+               ring->blk_rings.common.sring = NULL;
                return err;
        }
-       blkif->irq = err;
+       ring->irq = err;
 
        return 0;
 }
@@ -216,50 +244,69 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
 static int xen_blkif_disconnect(struct xen_blkif *blkif)
 {
        struct pending_req *req, *n;
-       int i = 0, j;
+       unsigned int j, r;
 
-       if (blkif->xenblkd) {
-               kthread_stop(blkif->xenblkd);
-               wake_up(&blkif->shutdown_wq);
-               blkif->xenblkd = NULL;
-       }
+       for (r = 0; r < blkif->nr_rings; r++) {
+               struct xen_blkif_ring *ring = &blkif->rings[r];
+               unsigned int i = 0;
 
-       /* The above kthread_stop() guarantees that at this point we
-        * don't have any discard_io or other_io requests. So, checking
-        * for inflight IO is enough.
-        */
-       if (atomic_read(&blkif->inflight) > 0)
-               return -EBUSY;
+               if (ring->xenblkd) {
+                       kthread_stop(ring->xenblkd);
+                       wake_up(&ring->shutdown_wq);
+                       ring->xenblkd = NULL;
+               }
 
-       if (blkif->irq) {
-               unbind_from_irqhandler(blkif->irq, blkif);
-               blkif->irq = 0;
-       }
+               /* The above kthread_stop() guarantees that at this point we
+                * don't have any discard_io or other_io requests. So, checking
+                * for inflight IO is enough.
+                */
+               if (atomic_read(&ring->inflight) > 0)
+                       return -EBUSY;
 
-       if (blkif->blk_rings.common.sring) {
-               xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
-               blkif->blk_rings.common.sring = NULL;
-       }
+               if (ring->irq) {
+                       unbind_from_irqhandler(ring->irq, ring);
+                       ring->irq = 0;
+               }
 
-       /* Remove all persistent grants and the cache of ballooned pages. */
-       xen_blkbk_free_caches(blkif);
+               if (ring->blk_rings.common.sring) {
+                       xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+                       ring->blk_rings.common.sring = NULL;
+               }
 
-       /* Check that there is no request in use */
-       list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
-               list_del(&req->free_list);
+               /* Remove all persistent grants and the cache of ballooned pages. */
+               xen_blkbk_free_caches(ring);
 
-               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
-                       kfree(req->segments[j]);
+               /* Check that there is no request in use */
+               list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
+                       list_del(&req->free_list);
 
-               for (j = 0; j < MAX_INDIRECT_PAGES; j++)
-                       kfree(req->indirect_pages[j]);
+                       for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+                               kfree(req->segments[j]);
 
-               kfree(req);
-               i++;
-       }
+                       for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+                               kfree(req->indirect_pages[j]);
+
+                       kfree(req);
+                       i++;
+               }
 
-       WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
+               BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
+               BUG_ON(!list_empty(&ring->persistent_purge_list));
+               BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+               BUG_ON(!list_empty(&ring->free_pages));
+               BUG_ON(ring->free_pages_num != 0);
+               BUG_ON(ring->persistent_gnt_c != 0);
+               WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
+               xen_blkif_put(blkif);
+       }
        blkif->nr_ring_pages = 0;
+       /*
+        * blkif->rings was allocated in connect_ring, so we should free it in
+        * here.
+        */
+       kfree(blkif->rings);
+       blkif->rings = NULL;
+       blkif->nr_rings = 0;
 
        return 0;
 }
@@ -271,13 +318,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
        xen_vbd_free(&blkif->vbd);
 
        /* Make sure everything is drained before shutting down */
-       BUG_ON(blkif->persistent_gnt_c != 0);
-       BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
-       BUG_ON(blkif->free_pages_num != 0);
-       BUG_ON(!list_empty(&blkif->persistent_purge_list));
-       BUG_ON(!list_empty(&blkif->free_pages));
-       BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
-
        kmem_cache_free(xen_blkif_cachep, blkif);
 }
 
@@ -296,25 +336,38 @@ int __init xen_blkif_interface_init(void)
  *  sysfs interface for VBD I/O requests
  */
 
-#define VBD_SHOW(name, format, args...)                                        \
+#define VBD_SHOW_ALLRING(name, format)                                 \
        static ssize_t show_##name(struct device *_dev,                 \
                                   struct device_attribute *attr,       \
                                   char *buf)                           \
        {                                                               \
                struct xenbus_device *dev = to_xenbus_device(_dev);     \
                struct backend_info *be = dev_get_drvdata(&dev->dev);   \
+               struct xen_blkif *blkif = be->blkif;                    \
+               unsigned int i;                                         \
+               unsigned long long result = 0;                          \
                                                                        \
-               return sprintf(buf, format, ##args);                    \
+               if (!blkif->rings)                              \
+                       goto out;                                       \
+                                                                       \
+               for (i = 0; i < blkif->nr_rings; i++) {         \
+                       struct xen_blkif_ring *ring = &blkif->rings[i]; \
+                                                                       \
+                       result += ring->st_##name;                      \
+               }                                                       \
+                                                                       \
+out:                                                                   \
+               return sprintf(buf, format, result);                    \
        }                                                               \
        static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
 
-VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
-VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
-VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
-VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
-VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
-VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
-VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
+VBD_SHOW_ALLRING(oo_req,  "%llu\n");
+VBD_SHOW_ALLRING(rd_req,  "%llu\n");
+VBD_SHOW_ALLRING(wr_req,  "%llu\n");
+VBD_SHOW_ALLRING(f_req,  "%llu\n");
+VBD_SHOW_ALLRING(ds_req,  "%llu\n");
+VBD_SHOW_ALLRING(rd_sect, "%llu\n");
+VBD_SHOW_ALLRING(wr_sect, "%llu\n");
 
 static struct attribute *xen_vbdstat_attrs[] = {
        &dev_attr_oo_req.attr,
@@ -332,6 +385,18 @@ static struct attribute_group xen_vbdstat_group = {
        .attrs = xen_vbdstat_attrs,
 };
 
+#define VBD_SHOW(name, format, args...)                                        \
+       static ssize_t show_##name(struct device *_dev,                 \
+                                  struct device_attribute *attr,       \
+                                  char *buf)                           \
+       {                                                               \
+               struct xenbus_device *dev = to_xenbus_device(_dev);     \
+               struct backend_info *be = dev_get_drvdata(&dev->dev);   \
+                                                                       \
+               return sprintf(buf, format, ##args);                    \
+       }                                                               \
+       static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
 VBD_SHOW(mode, "%s\n", be->mode);
 
@@ -440,11 +505,11 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 
        dev_set_drvdata(&dev->dev, NULL);
 
-       if (be->blkif) {
+       if (be->blkif)
                xen_blkif_disconnect(be->blkif);
-               xen_blkif_put(be->blkif);
-       }
 
+       /* Put the reference we set in xen_blkif_alloc(). */
+       xen_blkif_put(be->blkif);
        kfree(be->mode);
        kfree(be);
        return 0;
@@ -553,6 +618,12 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
                goto fail;
        }
 
+       /* Multi-queue: advertise how many queues are supported by us.*/
+       err = xenbus_printf(XBT_NIL, dev->nodename,
+                           "multi-queue-max-queues", "%u", xenblk_max_queues);
+       if (err)
+               pr_warn("Error writing multi-queue-max-queues\n");
+
        /* setup back pointer */
        be->blkif->be = be;
 
@@ -708,8 +779,14 @@ static void frontend_changed(struct xenbus_device *dev,
                }
 
                err = connect_ring(be);
-               if (err)
+               if (err) {
+                       /*
+                        * Clean up so that memory resources can be used by
+                        * other devices. connect_ring reported already error.
+                        */
+                       xen_blkif_disconnect(be->blkif);
                        break;
+               }
                xen_update_blkif_status(be->blkif);
                break;
 
@@ -825,50 +902,43 @@ again:
        xenbus_transaction_end(xbt, 1);
 }
 
-
-static int connect_ring(struct backend_info *be)
+/*
+ * Each ring may have multi pages, depends on "ring-page-order".
+ */
+static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
 {
-       struct xenbus_device *dev = be->dev;
        unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
-       unsigned int evtchn, nr_grefs, ring_page_order;
-       unsigned int pers_grants;
-       char protocol[64] = "";
        struct pending_req *req, *n;
        int err, i, j;
+       struct xen_blkif *blkif = ring->blkif;
+       struct xenbus_device *dev = blkif->be->dev;
+       unsigned int ring_page_order, nr_grefs, evtchn;
 
-       pr_debug("%s %s\n", __func__, dev->otherend);
-
-       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+       err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
                          &evtchn);
        if (err != 1) {
                err = -EINVAL;
-               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
-                                dev->otherend);
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
                return err;
        }
-       pr_info("event-channel %u\n", evtchn);
 
        err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
                          &ring_page_order);
        if (err != 1) {
-               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
-                                 "%u", &ring_ref[0]);
+               err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
                if (err != 1) {
                        err = -EINVAL;
-                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
-                                        dev->otherend);
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
                        return err;
                }
                nr_grefs = 1;
-               pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
-                       ring_ref[0]);
        } else {
                unsigned int i;
 
                if (ring_page_order > xen_blkif_max_ring_order) {
                        err = -EINVAL;
                        xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
-                                        dev->otherend, ring_page_order,
+                                        dir, ring_page_order,
                                         xen_blkif_max_ring_order);
                        return err;
                }
@@ -878,52 +948,23 @@ static int connect_ring(struct backend_info *be)
                        char ring_ref_name[RINGREF_NAME_LEN];
 
                        snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
-                       err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+                       err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
                                           "%u", &ring_ref[i]);
                        if (err != 1) {
                                err = -EINVAL;
                                xenbus_dev_fatal(dev, err, "reading %s/%s",
-                                                dev->otherend, ring_ref_name);
+                                                dir, ring_ref_name);
                                return err;
                        }
-                       pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
                }
        }
-
-       be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
-       err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
-                           "%63s", protocol, NULL);
-       if (err)
-               strcpy(protocol, "unspecified, assuming default");
-       else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
-               be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
-       else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
-               be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
-       else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
-               be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
-       else {
-               xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
-               return -1;
-       }
-       err = xenbus_gather(XBT_NIL, dev->otherend,
-                           "feature-persistent", "%u",
-                           &pers_grants, NULL);
-       if (err)
-               pers_grants = 0;
-
-       be->blkif->vbd.feature_gnt_persistent = pers_grants;
-       be->blkif->vbd.overflow_max_grants = 0;
-       be->blkif->nr_ring_pages = nr_grefs;
-
-       pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
-               nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
-               pers_grants ? "persistent grants" : "");
+       blkif->nr_ring_pages = nr_grefs;
 
        for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
                req = kzalloc(sizeof(*req), GFP_KERNEL);
                if (!req)
                        goto fail;
-               list_add_tail(&req->free_list, &be->blkif->pending_free);
+               list_add_tail(&req->free_list, &ring->pending_free);
                for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
                        req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
                        if (!req->segments[j])
@@ -938,7 +979,7 @@ static int connect_ring(struct backend_info *be)
        }
 
        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
+       err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
        if (err) {
                xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
                return err;
@@ -947,7 +988,7 @@ static int connect_ring(struct backend_info *be)
        return 0;
 
 fail:
-       list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+       list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
                list_del(&req->free_list);
                for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
                        if (!req->segments[j])
@@ -962,6 +1003,93 @@ fail:
                kfree(req);
        }
        return -ENOMEM;
+
+}
+
+static int connect_ring(struct backend_info *be)
+{
+       struct xenbus_device *dev = be->dev;
+       unsigned int pers_grants;
+       char protocol[64] = "";
+       int err, i;
+       char *xspath;
+       size_t xspathsize;
+       const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
+       unsigned int requested_num_queues = 0;
+
+       pr_debug("%s %s\n", __func__, dev->otherend);
+
+       be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
+       err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
+                           "%63s", protocol, NULL);
+       if (err)
+               strcpy(protocol, "unspecified, assuming default");
+       else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
+               be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+       else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
+               be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
+       else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
+               be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
+       else {
+               xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
+               return -ENOSYS;
+       }
+       err = xenbus_gather(XBT_NIL, dev->otherend,
+                           "feature-persistent", "%u",
+                           &pers_grants, NULL);
+       if (err)
+               pers_grants = 0;
+
+       be->blkif->vbd.feature_gnt_persistent = pers_grants;
+       be->blkif->vbd.overflow_max_grants = 0;
+
+       /*
+        * Read the number of hardware queues from frontend.
+        */
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "multi-queue-num-queues",
+                          "%u", &requested_num_queues);
+       if (err < 0) {
+               requested_num_queues = 1;
+       } else {
+               if (requested_num_queues > xenblk_max_queues
+                   || requested_num_queues == 0) {
+                       /* Buggy or malicious guest. */
+                       xenbus_dev_fatal(dev, err,
+                                       "guest requested %u queues, exceeding the maximum of %u.",
+                                       requested_num_queues, xenblk_max_queues);
+                       return -ENOSYS;
+               }
+       }
+       be->blkif->nr_rings = requested_num_queues;
+       if (xen_blkif_alloc_rings(be->blkif))
+               return -ENOMEM;
+
+       pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
+                be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
+                pers_grants ? "persistent grants" : "");
+
+       if (be->blkif->nr_rings == 1)
+               return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
+       else {
+               xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
+               xspath = kmalloc(xspathsize, GFP_KERNEL);
+               if (!xspath) {
+                       xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
+                       return -ENOMEM;
+               }
+
+               for (i = 0; i < be->blkif->nr_rings; i++) {
+                       memset(xspath, 0, xspathsize);
+                       snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
+                       err = read_per_ring_refs(&be->blkif->rings[i], xspath);
+                       if (err) {
+                               kfree(xspath);
+                               return err;
+                       }
+               }
+               kfree(xspath);
+       }
+       return 0;
 }
 
 static const struct xenbus_device_id xen_blkbk_ids[] = {
index 2fee2eef988d2992480363ea2578ddb9edf17240..83eb9e6bf8b06673640ff5d5ef05db1555e4912f 100644 (file)
 
 #include <asm/xen/hypervisor.h>
 
+/*
+ * The minimal size of segment supported by the block framework is PAGE_SIZE.
+ * When Linux is using a different page size than Xen, it may not be possible
+ * to put all the data in a single segment.
+ * This can happen when the backend doesn't support indirect descriptor and
+ * therefore the maximum amount of data that a request can carry is
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE = 44KB
+ *
+ * Note that we only support one extra request. So the Linux page size
+ * should be <= ( 2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) =
+ * 88KB.
+ */
+#define HAS_EXTRA_REQ (BLKIF_MAX_SEGMENTS_PER_REQUEST < XEN_PFN_PER_PAGE)
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,6 +86,13 @@ struct grant {
        struct list_head node;
 };
 
+enum blk_req_status {
+       REQ_WAITING,
+       REQ_DONE,
+       REQ_ERROR,
+       REQ_EOPNOTSUPP,
+};
+
 struct blk_shadow {
        struct blkif_request req;
        struct request *request;
@@ -79,6 +100,14 @@ struct blk_shadow {
        struct grant **indirect_grants;
        struct scatterlist *sg;
        unsigned int num_sg;
+       enum blk_req_status status;
+
+       #define NO_ASSOCIATED_ID ~0UL
+       /*
+        * Id of the sibling if we ever need 2 requests when handling a
+        * block I/O request
+        */
+       unsigned long associated_id;
 };
 
 struct split_bio {
@@ -99,6 +128,10 @@ static unsigned int xen_blkif_max_segments = 32;
 module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
 MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
 
+static unsigned int xen_blkif_max_queues = 4;
+module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
+MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
+
 /*
  * Maximum order of pages to be used for the shared ring between front and
  * backend, 4KB page granularity is used.
@@ -114,10 +147,35 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the
        __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
 
 /*
- * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
- * characters are enough. Define to 20 to keep consist with backend.
+ * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consistent with backend.
  */
 #define RINGREF_NAME_LEN (20)
+/*
+ * queue-%u would take 7 + 10(UINT_MAX) = 17 characters.
+ */
+#define QUEUE_NAME_LEN (17)
+
+/*
+ *  Per-ring info.
+ *  Every blkfront device can associate with one or more blkfront_ring_info,
+ *  depending on how many hardware queues/rings to be used.
+ */
+struct blkfront_ring_info {
+       /* Lock to protect data in every ring buffer. */
+       spinlock_t ring_lock;
+       struct blkif_front_ring ring;
+       unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
+       unsigned int evtchn, irq;
+       struct work_struct work;
+       struct gnttab_free_callback callback;
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
+       struct list_head indirect_pages;
+       struct list_head grants;
+       unsigned int persistent_gnts_c;
+       unsigned long shadow_free;
+       struct blkfront_info *dev_info;
+};
 
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -126,25 +184,15 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the
  */
 struct blkfront_info
 {
-       spinlock_t io_lock;
        struct mutex mutex;
        struct xenbus_device *xbdev;
        struct gendisk *gd;
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref[XENBUS_MAX_RING_GRANTS];
+       /* Number of pages per ring buffer. */
        unsigned int nr_ring_pages;
-       struct blkif_front_ring ring;
-       unsigned int evtchn, irq;
        struct request_queue *rq;
-       struct work_struct work;
-       struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
-       struct list_head grants;
-       struct list_head indirect_pages;
-       unsigned int persistent_gnts_c;
-       unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int feature_discard:1;
        unsigned int feature_secdiscard:1;
@@ -155,6 +203,8 @@ struct blkfront_info
        unsigned int max_indirect_segments;
        int is_ready;
        struct blk_mq_tag_set tag_set;
+       struct blkfront_ring_info *rinfo;
+       unsigned int nr_rings;
 };
 
 static unsigned int nr_minors;
@@ -198,38 +248,40 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define GREFS(_psegs)  ((_psegs) * GRANTS_PER_PSEG)
 
-static int blkfront_setup_indirect(struct blkfront_info *info);
-static int blkfront_gather_backend_features(struct blkfront_info *info);
+static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
+static void blkfront_gather_backend_features(struct blkfront_info *info);
 
-static int get_id_from_freelist(struct blkfront_info *info)
+static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
 {
-       unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE(info));
-       info->shadow_free = info->shadow[free].req.u.rw.id;
-       info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
+       unsigned long free = rinfo->shadow_free;
+
+       BUG_ON(free >= BLK_RING_SIZE(rinfo->dev_info));
+       rinfo->shadow_free = rinfo->shadow[free].req.u.rw.id;
+       rinfo->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
 }
 
-static int add_id_to_freelist(struct blkfront_info *info,
-                              unsigned long id)
+static int add_id_to_freelist(struct blkfront_ring_info *rinfo,
+                             unsigned long id)
 {
-       if (info->shadow[id].req.u.rw.id != id)
+       if (rinfo->shadow[id].req.u.rw.id != id)
                return -EINVAL;
-       if (info->shadow[id].request == NULL)
+       if (rinfo->shadow[id].request == NULL)
                return -EINVAL;
-       info->shadow[id].req.u.rw.id  = info->shadow_free;
-       info->shadow[id].request = NULL;
-       info->shadow_free = id;
+       rinfo->shadow[id].req.u.rw.id  = rinfo->shadow_free;
+       rinfo->shadow[id].request = NULL;
+       rinfo->shadow_free = id;
        return 0;
 }
 
-static int fill_grant_buffer(struct blkfront_info *info, int num)
+static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
 {
+       struct blkfront_info *info = rinfo->dev_info;
        struct page *granted_page;
        struct grant *gnt_list_entry, *n;
        int i = 0;
 
-       while(i < num) {
+       while (i < num) {
                gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
                if (!gnt_list_entry)
                        goto out_of_memory;
@@ -244,7 +296,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
                }
 
                gnt_list_entry->gref = GRANT_INVALID_REF;
-               list_add(&gnt_list_entry->node, &info->grants);
+               list_add(&gnt_list_entry->node, &rinfo->grants);
                i++;
        }
 
@@ -252,7 +304,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
 
 out_of_memory:
        list_for_each_entry_safe(gnt_list_entry, n,
-                                &info->grants, node) {
+                                &rinfo->grants, node) {
                list_del(&gnt_list_entry->node);
                if (info->feature_persistent)
                        __free_page(gnt_list_entry->page);
@@ -263,17 +315,17 @@ out_of_memory:
        return -ENOMEM;
 }
 
-static struct grant *get_free_grant(struct blkfront_info *info)
+static struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
 {
        struct grant *gnt_list_entry;
 
-       BUG_ON(list_empty(&info->grants));
-       gnt_list_entry = list_first_entry(&info->grants, struct grant,
+       BUG_ON(list_empty(&rinfo->grants));
+       gnt_list_entry = list_first_entry(&rinfo->grants, struct grant,
                                          node);
        list_del(&gnt_list_entry->node);
 
        if (gnt_list_entry->gref != GRANT_INVALID_REF)
-               info->persistent_gnts_c--;
+               rinfo->persistent_gnts_c--;
 
        return gnt_list_entry;
 }
@@ -289,9 +341,10 @@ static inline void grant_foreign_access(const struct grant *gnt_list_entry,
 
 static struct grant *get_grant(grant_ref_t *gref_head,
                               unsigned long gfn,
-                              struct blkfront_info *info)
+                              struct blkfront_ring_info *rinfo)
 {
-       struct grant *gnt_list_entry = get_free_grant(info);
+       struct grant *gnt_list_entry = get_free_grant(rinfo);
+       struct blkfront_info *info = rinfo->dev_info;
 
        if (gnt_list_entry->gref != GRANT_INVALID_REF)
                return gnt_list_entry;
@@ -312,9 +365,10 @@ static struct grant *get_grant(grant_ref_t *gref_head,
 }
 
 static struct grant *get_indirect_grant(grant_ref_t *gref_head,
-                                       struct blkfront_info *info)
+                                       struct blkfront_ring_info *rinfo)
 {
-       struct grant *gnt_list_entry = get_free_grant(info);
+       struct grant *gnt_list_entry = get_free_grant(rinfo);
+       struct blkfront_info *info = rinfo->dev_info;
 
        if (gnt_list_entry->gref != GRANT_INVALID_REF)
                return gnt_list_entry;
@@ -326,8 +380,8 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
                struct page *indirect_page;
 
                /* Fetch a pre-allocated page to use for indirect grefs */
-               BUG_ON(list_empty(&info->indirect_pages));
-               indirect_page = list_first_entry(&info->indirect_pages,
+               BUG_ON(list_empty(&rinfo->indirect_pages));
+               indirect_page = list_first_entry(&rinfo->indirect_pages,
                                                 struct page, lru);
                list_del(&indirect_page->lru);
                gnt_list_entry->page = indirect_page;
@@ -403,8 +457,8 @@ static void xlbd_release_minors(unsigned int minor, unsigned int nr)
 
 static void blkif_restart_queue_callback(void *arg)
 {
-       struct blkfront_info *info = (struct blkfront_info *)arg;
-       schedule_work(&info->work);
+       struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)arg;
+       schedule_work(&rinfo->work);
 }
 
 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
@@ -456,16 +510,33 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
        return 0;
 }
 
-static int blkif_queue_discard_req(struct request *req)
+static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
+                                           struct request *req,
+                                           struct blkif_request **ring_req)
+{
+       unsigned long id;
+
+       *ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt);
+       rinfo->ring.req_prod_pvt++;
+
+       id = get_id_from_freelist(rinfo);
+       rinfo->shadow[id].request = req;
+       rinfo->shadow[id].status = REQ_WAITING;
+       rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
+
+       (*ring_req)->u.rw.id = id;
+
+       return id;
+}
+
+static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
 {
-       struct blkfront_info *info = req->rq_disk->private_data;
+       struct blkfront_info *info = rinfo->dev_info;
        struct blkif_request *ring_req;
        unsigned long id;
 
        /* Fill out a communications ring structure. */
-       ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-       id = get_id_from_freelist(info);
-       info->shadow[id].request = req;
+       id = blkif_ring_get_request(rinfo, req, &ring_req);
 
        ring_req->operation = BLKIF_OP_DISCARD;
        ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
@@ -476,10 +547,8 @@ static int blkif_queue_discard_req(struct request *req)
        else
                ring_req->u.discard.flag = 0;
 
-       info->ring.req_prod_pvt++;
-
        /* Keep a private copy so we can reissue requests when recovering. */
-       info->shadow[id].req = *ring_req;
+       rinfo->shadow[id].req = *ring_req;
 
        return 0;
 }
@@ -487,7 +556,7 @@ static int blkif_queue_discard_req(struct request *req)
 struct setup_rw_req {
        unsigned int grant_idx;
        struct blkif_request_segment *segments;
-       struct blkfront_info *info;
+       struct blkfront_ring_info *rinfo;
        struct blkif_request *ring_req;
        grant_ref_t gref_head;
        unsigned int id;
@@ -495,6 +564,9 @@ struct setup_rw_req {
        bool need_copy;
        unsigned int bvec_off;
        char *bvec_data;
+
+       bool require_extra_req;
+       struct blkif_request *extra_ring_req;
 };
 
 static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
@@ -507,8 +579,24 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
        /* Convenient aliases */
        unsigned int grant_idx = setup->grant_idx;
        struct blkif_request *ring_req = setup->ring_req;
-       struct blkfront_info *info = setup->info;
-       struct blk_shadow *shadow = &info->shadow[setup->id];
+       struct blkfront_ring_info *rinfo = setup->rinfo;
+       /*
+        * We always use the shadow of the first request to store the list
+        * of grant associated to the block I/O request. This made the
+        * completion more easy to handle even if the block I/O request is
+        * split.
+        */
+       struct blk_shadow *shadow = &rinfo->shadow[setup->id];
+
+       if (unlikely(setup->require_extra_req &&
+                    grant_idx >= BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+               /*
+                * We are using the second request, setup grant_idx
+                * to be the index of the segment array.
+                */
+               grant_idx -= BLKIF_MAX_SEGMENTS_PER_REQUEST;
+               ring_req = setup->extra_ring_req;
+       }
 
        if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
            (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) {
@@ -516,15 +604,19 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
                        kunmap_atomic(setup->segments);
 
                n = grant_idx / GRANTS_PER_INDIRECT_FRAME;
-               gnt_list_entry = get_indirect_grant(&setup->gref_head, info);
+               gnt_list_entry = get_indirect_grant(&setup->gref_head, rinfo);
                shadow->indirect_grants[n] = gnt_list_entry;
                setup->segments = kmap_atomic(gnt_list_entry->page);
                ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
        }
 
-       gnt_list_entry = get_grant(&setup->gref_head, gfn, info);
+       gnt_list_entry = get_grant(&setup->gref_head, gfn, rinfo);
        ref = gnt_list_entry->gref;
-       shadow->grants_used[grant_idx] = gnt_list_entry;
+       /*
+        * All the grants are stored in the shadow of the first
+        * request. Therefore we have to use the global index.
+        */
+       shadow->grants_used[setup->grant_idx] = gnt_list_entry;
 
        if (setup->need_copy) {
                void *shared_data;
@@ -566,16 +658,36 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
        (setup->grant_idx)++;
 }
 
-static int blkif_queue_rw_req(struct request *req)
+static void blkif_setup_extra_req(struct blkif_request *first,
+                                 struct blkif_request *second)
 {
-       struct blkfront_info *info = req->rq_disk->private_data;
-       struct blkif_request *ring_req;
-       unsigned long id;
+       uint16_t nr_segments = first->u.rw.nr_segments;
+
+       /*
+        * The second request is only present when the first request uses
+        * all its segments. It's always the continuity of the first one.
+        */
+       first->u.rw.nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+       second->u.rw.nr_segments = nr_segments - BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       second->u.rw.sector_number = first->u.rw.sector_number +
+               (BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) / 512;
+
+       second->u.rw.handle = first->u.rw.handle;
+       second->operation = first->operation;
+}
+
+static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *rinfo)
+{
+       struct blkfront_info *info = rinfo->dev_info;
+       struct blkif_request *ring_req, *extra_ring_req = NULL;
+       unsigned long id, extra_id = NO_ASSOCIATED_ID;
+       bool require_extra_req = false;
        int i;
        struct setup_rw_req setup = {
                .grant_idx = 0,
                .segments = NULL,
-               .info = info,
+               .rinfo = rinfo,
                .need_copy = rq_data_dir(req) && info->feature_persistent,
        };
 
@@ -584,7 +696,6 @@ static int blkif_queue_rw_req(struct request *req)
         * existing persistent grants, or if we have to get new grants,
         * as there are not sufficiently many free.
         */
-       bool new_persistent_gnts;
        struct scatterlist *sg;
        int num_sg, max_grefs, num_grant;
 
@@ -596,41 +707,36 @@ static int blkif_queue_rw_req(struct request *req)
                 */
                max_grefs += INDIRECT_GREFS(max_grefs);
 
-       /* Check if we have enough grants to allocate a requests */
-       if (info->persistent_gnts_c < max_grefs) {
-               new_persistent_gnts = 1;
-               if (gnttab_alloc_grant_references(
-                   max_grefs - info->persistent_gnts_c,
-                   &setup.gref_head) < 0) {
+       /*
+        * We have to reserve 'max_grefs' grants because persistent
+        * grants are shared by all rings.
+        */
+       if (max_grefs > 0)
+               if (gnttab_alloc_grant_references(max_grefs, &setup.gref_head) < 0) {
                        gnttab_request_free_callback(
-                               &info->callback,
+                               &rinfo->callback,
                                blkif_restart_queue_callback,
-                               info,
+                               rinfo,
                                max_grefs);
                        return 1;
                }
-       } else
-               new_persistent_gnts = 0;
 
        /* Fill out a communications ring structure. */
-       ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-       id = get_id_from_freelist(info);
-       info->shadow[id].request = req;
+       id = blkif_ring_get_request(rinfo, req, &ring_req);
 
-       BUG_ON(info->max_indirect_segments == 0 &&
-              GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST);
-       BUG_ON(info->max_indirect_segments &&
-              GREFS(req->nr_phys_segments) > info->max_indirect_segments);
-
-       num_sg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
+       num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
        num_grant = 0;
        /* Calculate the number of grant used */
-       for_each_sg(info->shadow[id].sg, sg, num_sg, i)
+       for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
               num_grant += gnttab_count_grant(sg->offset, sg->length);
 
-       ring_req->u.rw.id = id;
-       info->shadow[id].num_sg = num_sg;
-       if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+       require_extra_req = info->max_indirect_segments == 0 &&
+               num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       BUG_ON(!HAS_EXTRA_REQ && require_extra_req);
+
+       rinfo->shadow[id].num_sg = num_sg;
+       if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST &&
+           likely(!require_extra_req)) {
                /*
                 * The indirect operation can only be a BLKIF_OP_READ or
                 * BLKIF_OP_WRITE
@@ -670,11 +776,31 @@ static int blkif_queue_rw_req(struct request *req)
                        }
                }
                ring_req->u.rw.nr_segments = num_grant;
+               if (unlikely(require_extra_req)) {
+                       extra_id = blkif_ring_get_request(rinfo, req,
+                                                         &extra_ring_req);
+                       /*
+                        * Only the first request contains the scatter-gather
+                        * list.
+                        */
+                       rinfo->shadow[extra_id].num_sg = 0;
+
+                       blkif_setup_extra_req(ring_req, extra_ring_req);
+
+                       /* Link the 2 requests together */
+                       rinfo->shadow[extra_id].associated_id = id;
+                       rinfo->shadow[id].associated_id = extra_id;
+               }
        }
 
        setup.ring_req = ring_req;
        setup.id = id;
-       for_each_sg(info->shadow[id].sg, sg, num_sg, i) {
+
+       setup.require_extra_req = require_extra_req;
+       if (unlikely(require_extra_req))
+               setup.extra_ring_req = extra_ring_req;
+
+       for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) {
                BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
                if (setup.need_copy) {
@@ -694,12 +820,12 @@ static int blkif_queue_rw_req(struct request *req)
        if (setup.segments)
                kunmap_atomic(setup.segments);
 
-       info->ring.req_prod_pvt++;
-
        /* Keep a private copy so we can reissue requests when recovering. */
-       info->shadow[id].req = *ring_req;
+       rinfo->shadow[id].req = *ring_req;
+       if (unlikely(require_extra_req))
+               rinfo->shadow[extra_id].req = *extra_ring_req;
 
-       if (new_persistent_gnts)
+       if (max_grefs > 0)
                gnttab_free_grant_references(setup.gref_head);
 
        return 0;
@@ -711,27 +837,25 @@ static int blkif_queue_rw_req(struct request *req)
  *
  * @req: a request struct
  */
-static int blkif_queue_request(struct request *req)
+static int blkif_queue_request(struct request *req, struct blkfront_ring_info *rinfo)
 {
-       struct blkfront_info *info = req->rq_disk->private_data;
-
-       if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+       if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
                return 1;
 
        if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
-               return blkif_queue_discard_req(req);
+               return blkif_queue_discard_req(req, rinfo);
        else
-               return blkif_queue_rw_req(req);
+               return blkif_queue_rw_req(req, rinfo);
 }
 
-static inline void flush_requests(struct blkfront_info *info)
+static inline void flush_requests(struct blkfront_ring_info *rinfo)
 {
        int notify;
 
-       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rinfo->ring, notify);
 
        if (notify)
-               notify_remote_via_irq(info->irq);
+               notify_remote_via_irq(rinfo->irq);
 }
 
 static inline bool blkif_request_flush_invalid(struct request *req,
@@ -745,38 +869,50 @@ static inline bool blkif_request_flush_invalid(struct request *req,
 }
 
 static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
-                          const struct blk_mq_queue_data *qd)
+                         const struct blk_mq_queue_data *qd)
 {
-       struct blkfront_info *info = qd->rq->rq_disk->private_data;
+       unsigned long flags;
+       struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
 
        blk_mq_start_request(qd->rq);
-       spin_lock_irq(&info->io_lock);
-       if (RING_FULL(&info->ring))
+       spin_lock_irqsave(&rinfo->ring_lock, flags);
+       if (RING_FULL(&rinfo->ring))
                goto out_busy;
 
-       if (blkif_request_flush_invalid(qd->rq, info))
+       if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
                goto out_err;
 
-       if (blkif_queue_request(qd->rq))
+       if (blkif_queue_request(qd->rq, rinfo))
                goto out_busy;
 
-       flush_requests(info);
-       spin_unlock_irq(&info->io_lock);
+       flush_requests(rinfo);
+       spin_unlock_irqrestore(&rinfo->ring_lock, flags);
        return BLK_MQ_RQ_QUEUE_OK;
 
 out_err:
-       spin_unlock_irq(&info->io_lock);
+       spin_unlock_irqrestore(&rinfo->ring_lock, flags);
        return BLK_MQ_RQ_QUEUE_ERROR;
 
 out_busy:
-       spin_unlock_irq(&info->io_lock);
+       spin_unlock_irqrestore(&rinfo->ring_lock, flags);
        blk_mq_stop_hw_queue(hctx);
        return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
+static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+                           unsigned int index)
+{
+       struct blkfront_info *info = (struct blkfront_info *)data;
+
+       BUG_ON(info->nr_rings <= index);
+       hctx->driver_data = &info->rinfo[index];
+       return 0;
+}
+
 static struct blk_mq_ops blkfront_mq_ops = {
        .queue_rq = blkif_queue_rq,
        .map_queue = blk_mq_map_queue,
+       .init_hctx = blk_mq_init_hctx,
 };
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -788,19 +924,28 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 
        memset(&info->tag_set, 0, sizeof(info->tag_set));
        info->tag_set.ops = &blkfront_mq_ops;
-       info->tag_set.nr_hw_queues = 1;
-       info->tag_set.queue_depth =  BLK_RING_SIZE(info);
+       info->tag_set.nr_hw_queues = info->nr_rings;
+       if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
+               /*
+                * When indirect descriptior is not supported, the I/O request
+                * will be split between multiple request in the ring.
+                * To avoid problems when sending the request, divide by
+                * 2 the depth of the queue.
+                */
+               info->tag_set.queue_depth =  BLK_RING_SIZE(info) / 2;
+       } else
+               info->tag_set.queue_depth = BLK_RING_SIZE(info);
        info->tag_set.numa_node = NUMA_NO_NODE;
        info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
        info->tag_set.cmd_size = 0;
        info->tag_set.driver_data = info;
 
        if (blk_mq_alloc_tag_set(&info->tag_set))
-               return -1;
+               return -EINVAL;
        rq = blk_mq_init_queue(&info->tag_set);
        if (IS_ERR(rq)) {
                blk_mq_free_tag_set(&info->tag_set);
-               return -1;
+               return PTR_ERR(rq);
        }
 
        queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
@@ -1028,7 +1173,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
 static void xlvbd_release_gendisk(struct blkfront_info *info)
 {
-       unsigned int minor, nr_minors;
+       unsigned int minor, nr_minors, i;
 
        if (info->rq == NULL)
                return;
@@ -1036,11 +1181,15 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
        /* No more blkif_request(). */
        blk_mq_stop_hw_queues(info->rq);
 
-       /* No more gnttab callback work. */
-       gnttab_cancel_free_callback(&info->callback);
+       for (i = 0; i < info->nr_rings; i++) {
+               struct blkfront_ring_info *rinfo = &info->rinfo[i];
 
-       /* Flush gnttab callback work. Must be done with no locks held. */
-       flush_work(&info->work);
+               /* No more gnttab callback work. */
+               gnttab_cancel_free_callback(&rinfo->callback);
+
+               /* Flush gnttab callback work. Must be done with no locks held. */
+               flush_work(&rinfo->work);
+       }
 
        del_gendisk(info->gd);
 
@@ -1056,88 +1205,87 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
        info->gd = NULL;
 }
 
-/* Must be called with io_lock holded */
-static void kick_pending_request_queues(struct blkfront_info *info)
+/* Already hold rinfo->ring_lock. */
+static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
 {
-       if (!RING_FULL(&info->ring))
-               blk_mq_start_stopped_hw_queues(info->rq, true);
+       if (!RING_FULL(&rinfo->ring))
+               blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true);
 }
 
-static void blkif_restart_queue(struct work_struct *work)
+static void kick_pending_request_queues(struct blkfront_ring_info *rinfo)
 {
-       struct blkfront_info *info = container_of(work, struct blkfront_info, work);
+       unsigned long flags;
 
-       spin_lock_irq(&info->io_lock);
-       if (info->connected == BLKIF_STATE_CONNECTED)
-               kick_pending_request_queues(info);
-       spin_unlock_irq(&info->io_lock);
+       spin_lock_irqsave(&rinfo->ring_lock, flags);
+       kick_pending_request_queues_locked(rinfo);
+       spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 }
 
-static void blkif_free(struct blkfront_info *info, int suspend)
+static void blkif_restart_queue(struct work_struct *work)
 {
-       struct grant *persistent_gnt;
-       struct grant *n;
-       int i, j, segs;
+       struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work);
 
-       /* Prevent new requests being issued until we fix things up. */
-       spin_lock_irq(&info->io_lock);
-       info->connected = suspend ?
-               BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
-       /* No more blkif_request(). */
-       if (info->rq)
-               blk_mq_stop_hw_queues(info->rq);
+       if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED)
+               kick_pending_request_queues(rinfo);
+}
 
-       /* Remove all persistent grants */
-       if (!list_empty(&info->grants)) {
-               list_for_each_entry_safe(persistent_gnt, n,
-                                        &info->grants, node) {
-                       list_del(&persistent_gnt->node);
-                       if (persistent_gnt->gref != GRANT_INVALID_REF) {
-                               gnttab_end_foreign_access(persistent_gnt->gref,
-                                                         0, 0UL);
-                               info->persistent_gnts_c--;
-                       }
-                       if (info->feature_persistent)
-                               __free_page(persistent_gnt->page);
-                       kfree(persistent_gnt);
-               }
-       }
-       BUG_ON(info->persistent_gnts_c != 0);
+static void blkif_free_ring(struct blkfront_ring_info *rinfo)
+{
+       struct grant *persistent_gnt, *n;
+       struct blkfront_info *info = rinfo->dev_info;
+       int i, j, segs;
 
        /*
         * Remove indirect pages, this only happens when using indirect
         * descriptors but not persistent grants
         */
-       if (!list_empty(&info->indirect_pages)) {
+       if (!list_empty(&rinfo->indirect_pages)) {
                struct page *indirect_page, *n;
 
                BUG_ON(info->feature_persistent);
-               list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
+               list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
                        list_del(&indirect_page->lru);
                        __free_page(indirect_page);
                }
        }
 
+       /* Remove all persistent grants. */
+       if (!list_empty(&rinfo->grants)) {
+               list_for_each_entry_safe(persistent_gnt, n,
+                                        &rinfo->grants, node) {
+                       list_del(&persistent_gnt->node);
+                       if (persistent_gnt->gref != GRANT_INVALID_REF) {
+                               gnttab_end_foreign_access(persistent_gnt->gref,
+                                                         0, 0UL);
+                               rinfo->persistent_gnts_c--;
+                       }
+                       if (info->feature_persistent)
+                               __free_page(persistent_gnt->page);
+                       kfree(persistent_gnt);
+               }
+       }
+       BUG_ON(rinfo->persistent_gnts_c != 0);
+
        for (i = 0; i < BLK_RING_SIZE(info); i++) {
                /*
                 * Clear persistent grants present in requests already
                 * on the shared ring
                 */
-               if (!info->shadow[i].request)
+               if (!rinfo->shadow[i].request)
                        goto free_shadow;
 
-               segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
-                      info->shadow[i].req.u.indirect.nr_segments :
-                      info->shadow[i].req.u.rw.nr_segments;
+               segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+                      rinfo->shadow[i].req.u.indirect.nr_segments :
+                      rinfo->shadow[i].req.u.rw.nr_segments;
                for (j = 0; j < segs; j++) {
-                       persistent_gnt = info->shadow[i].grants_used[j];
+                       persistent_gnt = rinfo->shadow[i].grants_used[j];
                        gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
                        if (info->feature_persistent)
                                __free_page(persistent_gnt->page);
                        kfree(persistent_gnt);
                }
 
-               if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+               if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT)
                        /*
                         * If this is not an indirect operation don't try to
                         * free indirect segments
@@ -1145,42 +1293,59 @@ static void blkif_free(struct blkfront_info *info, int suspend)
                        goto free_shadow;
 
                for (j = 0; j < INDIRECT_GREFS(segs); j++) {
-                       persistent_gnt = info->shadow[i].indirect_grants[j];
+                       persistent_gnt = rinfo->shadow[i].indirect_grants[j];
                        gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
                        __free_page(persistent_gnt->page);
                        kfree(persistent_gnt);
                }
 
 free_shadow:
-               kfree(info->shadow[i].grants_used);
-               info->shadow[i].grants_used = NULL;
-               kfree(info->shadow[i].indirect_grants);
-               info->shadow[i].indirect_grants = NULL;
-               kfree(info->shadow[i].sg);
-               info->shadow[i].sg = NULL;
+               kfree(rinfo->shadow[i].grants_used);
+               rinfo->shadow[i].grants_used = NULL;
+               kfree(rinfo->shadow[i].indirect_grants);
+               rinfo->shadow[i].indirect_grants = NULL;
+               kfree(rinfo->shadow[i].sg);
+               rinfo->shadow[i].sg = NULL;
        }
 
        /* No more gnttab callback work. */
-       gnttab_cancel_free_callback(&info->callback);
-       spin_unlock_irq(&info->io_lock);
+       gnttab_cancel_free_callback(&rinfo->callback);
 
        /* Flush gnttab callback work. Must be done with no locks held. */
-       flush_work(&info->work);
+       flush_work(&rinfo->work);
 
        /* Free resources associated with old device channel. */
        for (i = 0; i < info->nr_ring_pages; i++) {
-               if (info->ring_ref[i] != GRANT_INVALID_REF) {
-                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
-                       info->ring_ref[i] = GRANT_INVALID_REF;
+               if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
+                       rinfo->ring_ref[i] = GRANT_INVALID_REF;
                }
        }
-       free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
-       info->ring.sring = NULL;
+       free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+       rinfo->ring.sring = NULL;
+
+       if (rinfo->irq)
+               unbind_from_irqhandler(rinfo->irq, rinfo);
+       rinfo->evtchn = rinfo->irq = 0;
+}
+
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+       unsigned int i;
+
+       /* Prevent new requests being issued until we fix things up. */
+       info->connected = suspend ?
+               BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+       /* No more blkif_request(). */
+       if (info->rq)
+               blk_mq_stop_hw_queues(info->rq);
 
-       if (info->irq)
-               unbind_from_irqhandler(info->irq, info);
-       info->evtchn = info->irq = 0;
+       for (i = 0; i < info->nr_rings; i++)
+               blkif_free_ring(&info->rinfo[i]);
 
+       kfree(info->rinfo);
+       info->rinfo = NULL;
+       info->nr_rings = 0;
 }
 
 struct copy_from_grant {
@@ -1209,19 +1374,93 @@ static void blkif_copy_from_grant(unsigned long gfn, unsigned int offset,
        kunmap_atomic(shared_data);
 }
 
-static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
+static enum blk_req_status blkif_rsp_to_req_status(int rsp)
+{
+       switch (rsp)
+       {
+       case BLKIF_RSP_OKAY:
+               return REQ_DONE;
+       case BLKIF_RSP_EOPNOTSUPP:
+               return REQ_EOPNOTSUPP;
+       case BLKIF_RSP_ERROR:
+               /* Fallthrough. */
+       default:
+               return REQ_ERROR;
+       }
+}
+
+/*
+ * Get the final status of the block request based on two ring response
+ */
+static int blkif_get_final_status(enum blk_req_status s1,
+                                 enum blk_req_status s2)
+{
+       BUG_ON(s1 == REQ_WAITING);
+       BUG_ON(s2 == REQ_WAITING);
+
+       if (s1 == REQ_ERROR || s2 == REQ_ERROR)
+               return BLKIF_RSP_ERROR;
+       else if (s1 == REQ_EOPNOTSUPP || s2 == REQ_EOPNOTSUPP)
+               return BLKIF_RSP_EOPNOTSUPP;
+       return BLKIF_RSP_OKAY;
+}
+
+static bool blkif_completion(unsigned long *id,
+                            struct blkfront_ring_info *rinfo,
                             struct blkif_response *bret)
 {
        int i = 0;
        struct scatterlist *sg;
        int num_sg, num_grant;
+       struct blkfront_info *info = rinfo->dev_info;
+       struct blk_shadow *s = &rinfo->shadow[*id];
        struct copy_from_grant data = {
-               .s = s,
                .grant_idx = 0,
        };
 
        num_grant = s->req.operation == BLKIF_OP_INDIRECT ?
                s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
+
+       /* The I/O request may be split in two. */
+       if (unlikely(s->associated_id != NO_ASSOCIATED_ID)) {
+               struct blk_shadow *s2 = &rinfo->shadow[s->associated_id];
+
+               /* Keep the status of the current response in shadow. */
+               s->status = blkif_rsp_to_req_status(bret->status);
+
+               /* Wait the second response if not yet here. */
+               if (s2->status == REQ_WAITING)
+                       return 0;
+
+               bret->status = blkif_get_final_status(s->status,
+                                                     s2->status);
+
+               /*
+                * All the grants is stored in the first shadow in order
+                * to make the completion code simpler.
+                */
+               num_grant += s2->req.u.rw.nr_segments;
+
+               /*
+                * The two responses may not come in order. Only the
+                * first request will store the scatter-gather list.
+                */
+               if (s2->num_sg != 0) {
+                       /* Update "id" with the ID of the first response. */
+                       *id = s->associated_id;
+                       s = s2;
+               }
+
+               /*
+                * We don't need anymore the second request, so recycling
+                * it now.
+                */
+               if (add_id_to_freelist(rinfo, s->associated_id))
+                       WARN(1, "%s: can't recycle the second part (id = %ld) of the request\n",
+                            info->gd->disk_name, s->associated_id);
+       }
+
+       data.s = s;
        num_sg = s->num_sg;
 
        if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
@@ -1252,8 +1491,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                        if (!info->feature_persistent)
                                pr_alert_ratelimited("backed has not unmapped grant: %u\n",
                                                     s->grants_used[i]->gref);
-                       list_add(&s->grants_used[i]->node, &info->grants);
-                       info->persistent_gnts_c++;
+                       list_add(&s->grants_used[i]->node, &rinfo->grants);
+                       rinfo->persistent_gnts_c++;
                } else {
                        /*
                         * If the grant is not mapped by the backend we end the
@@ -1263,7 +1502,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                         */
                        gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
                        s->grants_used[i]->gref = GRANT_INVALID_REF;
-                       list_add_tail(&s->grants_used[i]->node, &info->grants);
+                       list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
                }
        }
        if (s->req.operation == BLKIF_OP_INDIRECT) {
@@ -1272,8 +1511,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                                if (!info->feature_persistent)
                                        pr_alert_ratelimited("backed has not unmapped grant: %u\n",
                                                             s->indirect_grants[i]->gref);
-                               list_add(&s->indirect_grants[i]->node, &info->grants);
-                               info->persistent_gnts_c++;
+                               list_add(&s->indirect_grants[i]->node, &rinfo->grants);
+                               rinfo->persistent_gnts_c++;
                        } else {
                                struct page *indirect_page;
 
@@ -1284,13 +1523,15 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                                 */
                                if (!info->feature_persistent) {
                                        indirect_page = s->indirect_grants[i]->page;
-                                       list_add(&indirect_page->lru, &info->indirect_pages);
+                                       list_add(&indirect_page->lru, &rinfo->indirect_pages);
                                }
                                s->indirect_grants[i]->gref = GRANT_INVALID_REF;
-                               list_add_tail(&s->indirect_grants[i]->node, &info->grants);
+                               list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
                        }
                }
        }
+
+       return 1;
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1299,24 +1540,22 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
        struct blkif_response *bret;
        RING_IDX i, rp;
        unsigned long flags;
-       struct blkfront_info *info = (struct blkfront_info *)dev_id;
+       struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
+       struct blkfront_info *info = rinfo->dev_info;
        int error;
 
-       spin_lock_irqsave(&info->io_lock, flags);
-
-       if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
-               spin_unlock_irqrestore(&info->io_lock, flags);
+       if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
                return IRQ_HANDLED;
-       }
 
+       spin_lock_irqsave(&rinfo->ring_lock, flags);
  again:
-       rp = info->ring.sring->rsp_prod;
+       rp = rinfo->ring.sring->rsp_prod;
        rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-       for (i = info->ring.rsp_cons; i != rp; i++) {
+       for (i = rinfo->ring.rsp_cons; i != rp; i++) {
                unsigned long id;
 
-               bret = RING_GET_RESPONSE(&info->ring, i);
+               bret = RING_GET_RESPONSE(&rinfo->ring, i);
                id   = bret->id;
                /*
                 * The backend has messed up and given us an id that we would
@@ -1330,12 +1569,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                         * the id is busted. */
                        continue;
                }
-               req  = info->shadow[id].request;
+               req  = rinfo->shadow[id].request;
 
-               if (bret->operation != BLKIF_OP_DISCARD)
-                       blkif_completion(&info->shadow[id], info, bret);
+               if (bret->operation != BLKIF_OP_DISCARD) {
+                       /*
+                        * We may need to wait for an extra response if the
+                        * I/O request is split in 2
+                        */
+                       if (!blkif_completion(&id, rinfo, bret))
+                               continue;
+               }
 
-               if (add_id_to_freelist(info, id)) {
+               if (add_id_to_freelist(rinfo, id)) {
                        WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
                             info->gd->disk_name, op_name(bret->operation), id);
                        continue;
@@ -1364,7 +1609,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                                error = -EOPNOTSUPP;
                        }
                        if (unlikely(bret->status == BLKIF_RSP_ERROR &&
-                                    info->shadow[id].req.u.rw.nr_segments == 0)) {
+                                    rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
                                printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
                                       info->gd->disk_name, op_name(bret->operation));
                                error = -EOPNOTSUPP;
@@ -1389,34 +1634,35 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                }
        }
 
-       info->ring.rsp_cons = i;
+       rinfo->ring.rsp_cons = i;
 
-       if (i != info->ring.req_prod_pvt) {
+       if (i != rinfo->ring.req_prod_pvt) {
                int more_to_do;
-               RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+               RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do);
                if (more_to_do)
                        goto again;
        } else
-               info->ring.sring->rsp_event = i + 1;
+               rinfo->ring.sring->rsp_event = i + 1;
 
-       kick_pending_request_queues(info);
+       kick_pending_request_queues_locked(rinfo);
 
-       spin_unlock_irqrestore(&info->io_lock, flags);
+       spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 
        return IRQ_HANDLED;
 }
 
 
 static int setup_blkring(struct xenbus_device *dev,
-                        struct blkfront_info *info)
+                        struct blkfront_ring_info *rinfo)
 {
        struct blkif_sring *sring;
        int err, i;
+       struct blkfront_info *info = rinfo->dev_info;
        unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
        grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
 
        for (i = 0; i < info->nr_ring_pages; i++)
-               info->ring_ref[i] = GRANT_INVALID_REF;
+               rinfo->ring_ref[i] = GRANT_INVALID_REF;
 
        sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
                                                       get_order(ring_size));
@@ -1425,29 +1671,29 @@ static int setup_blkring(struct xenbus_device *dev,
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, ring_size);
+       FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
 
-       err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
+       err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
        if (err < 0) {
                free_pages((unsigned long)sring, get_order(ring_size));
-               info->ring.sring = NULL;
+               rinfo->ring.sring = NULL;
                goto fail;
        }
        for (i = 0; i < info->nr_ring_pages; i++)
-               info->ring_ref[i] = gref[i];
+               rinfo->ring_ref[i] = gref[i];
 
-       err = xenbus_alloc_evtchn(dev, &info->evtchn);
+       err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
        if (err)
                goto fail;
 
-       err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0,
-                                       "blkif", info);
+       err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
+                                       "blkif", rinfo);
        if (err <= 0) {
                xenbus_dev_fatal(dev, err,
                                 "bind_evtchn_to_irqhandler failed");
                goto fail;
        }
-       info->irq = err;
+       rinfo->irq = err;
 
        return 0;
 fail:
@@ -1455,6 +1701,53 @@ fail:
        return err;
 }
 
+/*
+ * Write out per-ring/queue nodes including ring-ref and event-channel, and each
+ * ring buffer may have multi pages depending on ->nr_ring_pages.
+ */
+static int write_per_ring_nodes(struct xenbus_transaction xbt,
+                               struct blkfront_ring_info *rinfo, const char *dir)
+{
+       int err;
+       unsigned int i;
+       const char *message = NULL;
+       struct blkfront_info *info = rinfo->dev_info;
+
+       if (info->nr_ring_pages == 1) {
+               err = xenbus_printf(xbt, dir, "ring-ref", "%u", rinfo->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < info->nr_ring_pages; i++) {
+                       char ring_ref_name[RINGREF_NAME_LEN];
+
+                       snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+                       err = xenbus_printf(xbt, dir, ring_ref_name,
+                                           "%u", rinfo->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+       }
+
+       err = xenbus_printf(xbt, dir, "event-channel", "%u", rinfo->evtchn);
+       if (err) {
+               message = "writing event-channel";
+               goto abort_transaction;
+       }
+
+       return 0;
+
+abort_transaction:
+       xenbus_transaction_end(xbt, 1);
+       if (message)
+               xenbus_dev_fatal(info->xbdev, err, "%s", message);
+
+       return err;
+}
 
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_blkback(struct xenbus_device *dev,
@@ -1462,8 +1755,8 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
-       int err, i;
-       unsigned int max_page_order = 0;
+       int err;
+       unsigned int i, max_page_order = 0;
        unsigned int ring_page_order = 0;
 
        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
@@ -1475,10 +1768,14 @@ static int talk_to_blkback(struct xenbus_device *dev,
                info->nr_ring_pages = 1 << ring_page_order;
        }
 
-       /* Create shared ring, alloc event channel. */
-       err = setup_blkring(dev, info);
-       if (err)
-               goto out;
+       for (i = 0; i < info->nr_rings; i++) {
+               struct blkfront_ring_info *rinfo = &info->rinfo[i];
+
+               /* Create shared ring, alloc event channel. */
+               err = setup_blkring(dev, rinfo);
+               if (err)
+                       goto destroy_blkring;
+       }
 
 again:
        err = xenbus_transaction_start(&xbt);
@@ -1487,38 +1784,49 @@ again:
                goto destroy_blkring;
        }
 
-       if (info->nr_ring_pages == 1) {
-               err = xenbus_printf(xbt, dev->nodename,
-                                   "ring-ref", "%u", info->ring_ref[0]);
+       if (info->nr_ring_pages > 1) {
+               err = xenbus_printf(xbt, dev->nodename, "ring-page-order", "%u",
+                                   ring_page_order);
                if (err) {
-                       message = "writing ring-ref";
+                       message = "writing ring-page-order";
                        goto abort_transaction;
                }
+       }
+
+       /* We already got the number of queues/rings in _probe */
+       if (info->nr_rings == 1) {
+               err = write_per_ring_nodes(xbt, &info->rinfo[0], dev->nodename);
+               if (err)
+                       goto destroy_blkring;
        } else {
-               err = xenbus_printf(xbt, dev->nodename,
-                                   "ring-page-order", "%u", ring_page_order);
+               char *path;
+               size_t pathsize;
+
+               err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues", "%u",
+                                   info->nr_rings);
                if (err) {
-                       message = "writing ring-page-order";
+                       message = "writing multi-queue-num-queues";
                        goto abort_transaction;
                }
 
-               for (i = 0; i < info->nr_ring_pages; i++) {
-                       char ring_ref_name[RINGREF_NAME_LEN];
+               pathsize = strlen(dev->nodename) + QUEUE_NAME_LEN;
+               path = kmalloc(pathsize, GFP_KERNEL);
+               if (!path) {
+                       err = -ENOMEM;
+                       message = "ENOMEM while writing ring references";
+                       goto abort_transaction;
+               }
 
-                       snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
-                       err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
-                                           "%u", info->ring_ref[i]);
+               for (i = 0; i < info->nr_rings; i++) {
+                       memset(path, 0, pathsize);
+                       snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i);
+                       err = write_per_ring_nodes(xbt, &info->rinfo[i], path);
                        if (err) {
-                               message = "writing ring-ref";
-                               goto abort_transaction;
+                               kfree(path);
+                               goto destroy_blkring;
                        }
                }
-       }
-       err = xenbus_printf(xbt, dev->nodename,
-                           "event-channel", "%u", info->evtchn);
-       if (err) {
-               message = "writing event-channel";
-               goto abort_transaction;
+               kfree(path);
        }
        err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
                            XEN_IO_PROTO_ABI_NATIVE);
@@ -1540,9 +1848,14 @@ again:
                goto destroy_blkring;
        }
 
-       for (i = 0; i < BLK_RING_SIZE(info); i++)
-               info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+       for (i = 0; i < info->nr_rings; i++) {
+               unsigned int j;
+               struct blkfront_ring_info *rinfo = &info->rinfo[i];
+
+               for (j = 0; j < BLK_RING_SIZE(info); j++)
+                       rinfo->shadow[j].req.u.rw.id = j + 1;
+               rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+       }
        xenbus_switch_state(dev, XenbusStateInitialised);
 
        return 0;
@@ -1553,10 +1866,50 @@ again:
                xenbus_dev_fatal(dev, err, "%s", message);
  destroy_blkring:
        blkif_free(info, 0);
- out:
+
+       kfree(info);
+       dev_set_drvdata(&dev->dev, NULL);
+
        return err;
 }
 
+static int negotiate_mq(struct blkfront_info *info)
+{
+       unsigned int backend_max_queues = 0;
+       int err;
+       unsigned int i;
+
+       BUG_ON(info->nr_rings);
+
+       /* Check if backend supports multiple queues. */
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "multi-queue-max-queues", "%u", &backend_max_queues);
+       if (err < 0)
+               backend_max_queues = 1;
+
+       info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+       /* We need at least one ring. */
+       if (!info->nr_rings)
+               info->nr_rings = 1;
+
+       info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
+       if (!info->rinfo) {
+               xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < info->nr_rings; i++) {
+               struct blkfront_ring_info *rinfo;
+
+               rinfo = &info->rinfo[i];
+               INIT_LIST_HEAD(&rinfo->indirect_pages);
+               INIT_LIST_HEAD(&rinfo->grants);
+               rinfo->dev_info = info;
+               INIT_WORK(&rinfo->work, blkif_restart_queue);
+               spin_lock_init(&rinfo->ring_lock);
+       }
+       return 0;
+}
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffer for communication with the backend, and
@@ -1617,15 +1970,16 @@ static int blkfront_probe(struct xenbus_device *dev,
                return -ENOMEM;
        }
 
-       mutex_init(&info->mutex);
-       spin_lock_init(&info->io_lock);
        info->xbdev = dev;
+       err = negotiate_mq(info);
+       if (err) {
+               kfree(info);
+               return err;
+       }
+
+       mutex_init(&info->mutex);
        info->vdevice = vdevice;
-       INIT_LIST_HEAD(&info->grants);
-       INIT_LIST_HEAD(&info->indirect_pages);
-       info->persistent_gnts_c = 0;
        info->connected = BLKIF_STATE_DISCONNECTED;
-       INIT_WORK(&info->work, blkif_restart_queue);
 
        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
@@ -1649,7 +2003,7 @@ static void split_bio_end(struct bio *bio)
 
 static int blkif_recover(struct blkfront_info *info)
 {
-       int i;
+       unsigned int i, r_index;
        struct request *req, *n;
        struct blk_shadow *copy;
        int rc;
@@ -1660,64 +2014,73 @@ static int blkif_recover(struct blkfront_info *info)
        struct split_bio *split_bio;
        struct list_head requests;
 
-       /* Stage 1: Make a safe copy of the shadow state. */
-       copy = kmemdup(info->shadow, sizeof(info->shadow),
-                      GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
-       if (!copy)
-               return -ENOMEM;
-
-       /* Stage 2: Set up free list. */
-       memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE(info); i++)
-               info->shadow[i].req.u.rw.id = i+1;
-       info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
-
-       rc = blkfront_gather_backend_features(info);
-       if (rc) {
-               kfree(copy);
-               return rc;
-       }
-
+       blkfront_gather_backend_features(info);
        segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
        blk_queue_max_segments(info->rq, segs);
        bio_list_init(&bio_list);
        INIT_LIST_HEAD(&requests);
-       for (i = 0; i < BLK_RING_SIZE(info); i++) {
-               /* Not in use? */
-               if (!copy[i].request)
-                       continue;
 
-               /*
-                * Get the bios in the request so we can re-queue them.
-                */
-               if (copy[i].request->cmd_flags &
-                   (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+       for (r_index = 0; r_index < info->nr_rings; r_index++) {
+               struct blkfront_ring_info *rinfo;
+
+               rinfo = &info->rinfo[r_index];
+               /* Stage 1: Make a safe copy of the shadow state. */
+               copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
+                              GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+               if (!copy)
+                       return -ENOMEM;
+
+               /* Stage 2: Set up free list. */
+               memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
+               for (i = 0; i < BLK_RING_SIZE(info); i++)
+                       rinfo->shadow[i].req.u.rw.id = i+1;
+               rinfo->shadow_free = rinfo->ring.req_prod_pvt;
+               rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+
+               rc = blkfront_setup_indirect(rinfo);
+               if (rc) {
+                       kfree(copy);
+                       return rc;
+               }
+
+               for (i = 0; i < BLK_RING_SIZE(info); i++) {
+                       /* Not in use? */
+                       if (!copy[i].request)
+                               continue;
+
                        /*
-                        * Flush operations don't contain bios, so
-                        * we need to requeue the whole request
+                        * Get the bios in the request so we can re-queue them.
                         */
-                       list_add(&copy[i].request->queuelist, &requests);
-                       continue;
+                       if (copy[i].request->cmd_flags &
+                           (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+                               /*
+                                * Flush operations don't contain bios, so
+                                * we need to requeue the whole request
+                                */
+                               list_add(&copy[i].request->queuelist, &requests);
+                               continue;
+                       }
+                       merge_bio.head = copy[i].request->bio;
+                       merge_bio.tail = copy[i].request->biotail;
+                       bio_list_merge(&bio_list, &merge_bio);
+                       copy[i].request->bio = NULL;
+                       blk_end_request_all(copy[i].request, 0);
                }
-               merge_bio.head = copy[i].request->bio;
-               merge_bio.tail = copy[i].request->biotail;
-               bio_list_merge(&bio_list, &merge_bio);
-               copy[i].request->bio = NULL;
-               blk_end_request_all(copy[i].request, 0);
-       }
-
-       kfree(copy);
 
+               kfree(copy);
+       }
        xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
-       spin_lock_irq(&info->io_lock);
-
        /* Now safe for us to use the shared ring */
        info->connected = BLKIF_STATE_CONNECTED;
 
-       /* Kick any other new requests queued since we resumed */
-       kick_pending_request_queues(info);
+       for (r_index = 0; r_index < info->nr_rings; r_index++) {
+               struct blkfront_ring_info *rinfo;
+
+               rinfo = &info->rinfo[r_index];
+               /* Kick any other new requests queued since we resumed */
+               kick_pending_request_queues(rinfo);
+       }
 
        list_for_each_entry_safe(req, n, &requests, queuelist) {
                /* Requeue pending requests (flush or discard) */
@@ -1725,7 +2088,6 @@ static int blkif_recover(struct blkfront_info *info)
                BUG_ON(req->nr_phys_segments > segs);
                blk_mq_requeue_request(req);
        }
-       spin_unlock_irq(&info->io_lock);
        blk_mq_kick_requeue_list(info->rq);
 
        while ((bio = bio_list_pop(&bio_list)) != NULL) {
@@ -1773,12 +2135,16 @@ static int blkif_recover(struct blkfront_info *info)
 static int blkfront_resume(struct xenbus_device *dev)
 {
        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-       int err;
+       int err = 0;
 
        dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 
        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
+       err = negotiate_mq(info);
+       if (err)
+               return err;
+
        err = talk_to_blkback(dev, info);
 
        /*
@@ -1790,8 +2156,7 @@ static int blkfront_resume(struct xenbus_device *dev)
        return err;
 }
 
-static void
-blkfront_closing(struct blkfront_info *info)
+static void blkfront_closing(struct blkfront_info *info)
 {
        struct xenbus_device *xbdev = info->xbdev;
        struct block_device *bdev = NULL;
@@ -1851,18 +2216,29 @@ static void blkfront_setup_discard(struct blkfront_info *info)
                info->feature_secdiscard = !!discard_secure;
 }
 
-static int blkfront_setup_indirect(struct blkfront_info *info)
+static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
 {
        unsigned int psegs, grants;
        int err, i;
+       struct blkfront_info *info = rinfo->dev_info;
 
-       if (info->max_indirect_segments == 0)
-               grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       if (info->max_indirect_segments == 0) {
+               if (!HAS_EXTRA_REQ)
+                       grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+               else {
+                       /*
+                        * When an extra req is required, the maximum
+                        * grants supported is related to the size of the
+                        * Linux block segment.
+                        */
+                       grants = GRANTS_PER_PSEG;
+               }
+       }
        else
                grants = info->max_indirect_segments;
        psegs = grants / GRANTS_PER_PSEG;
 
-       err = fill_grant_buffer(info,
+       err = fill_grant_buffer(rinfo,
                                (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info));
        if (err)
                goto out_of_memory;
@@ -1875,31 +2251,31 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
                 */
                int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
 
-               BUG_ON(!list_empty(&info->indirect_pages));
+               BUG_ON(!list_empty(&rinfo->indirect_pages));
                for (i = 0; i < num; i++) {
                        struct page *indirect_page = alloc_page(GFP_NOIO);
                        if (!indirect_page)
                                goto out_of_memory;
-                       list_add(&indirect_page->lru, &info->indirect_pages);
+                       list_add(&indirect_page->lru, &rinfo->indirect_pages);
                }
        }
 
        for (i = 0; i < BLK_RING_SIZE(info); i++) {
-               info->shadow[i].grants_used = kzalloc(
-                       sizeof(info->shadow[i].grants_used[0]) * grants,
+               rinfo->shadow[i].grants_used = kzalloc(
+                       sizeof(rinfo->shadow[i].grants_used[0]) * grants,
                        GFP_NOIO);
-               info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * psegs, GFP_NOIO);
+               rinfo->shadow[i].sg = kzalloc(sizeof(rinfo->shadow[i].sg[0]) * psegs, GFP_NOIO);
                if (info->max_indirect_segments)
-                       info->shadow[i].indirect_grants = kzalloc(
-                               sizeof(info->shadow[i].indirect_grants[0]) *
+                       rinfo->shadow[i].indirect_grants = kzalloc(
+                               sizeof(rinfo->shadow[i].indirect_grants[0]) *
                                INDIRECT_GREFS(grants),
                                GFP_NOIO);
-               if ((info->shadow[i].grants_used == NULL) ||
-                       (info->shadow[i].sg == NULL) ||
+               if ((rinfo->shadow[i].grants_used == NULL) ||
+                       (rinfo->shadow[i].sg == NULL) ||
                     (info->max_indirect_segments &&
-                    (info->shadow[i].indirect_grants == NULL)))
+                    (rinfo->shadow[i].indirect_grants == NULL)))
                        goto out_of_memory;
-               sg_init_table(info->shadow[i].sg, psegs);
+               sg_init_table(rinfo->shadow[i].sg, psegs);
        }
 
 
@@ -1907,16 +2283,16 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
 
 out_of_memory:
        for (i = 0; i < BLK_RING_SIZE(info); i++) {
-               kfree(info->shadow[i].grants_used);
-               info->shadow[i].grants_used = NULL;
-               kfree(info->shadow[i].sg);
-               info->shadow[i].sg = NULL;
-               kfree(info->shadow[i].indirect_grants);
-               info->shadow[i].indirect_grants = NULL;
-       }
-       if (!list_empty(&info->indirect_pages)) {
+               kfree(rinfo->shadow[i].grants_used);
+               rinfo->shadow[i].grants_used = NULL;
+               kfree(rinfo->shadow[i].sg);
+               rinfo->shadow[i].sg = NULL;
+               kfree(rinfo->shadow[i].indirect_grants);
+               rinfo->shadow[i].indirect_grants = NULL;
+       }
+       if (!list_empty(&rinfo->indirect_pages)) {
                struct page *indirect_page, *n;
-               list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
+               list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
                        list_del(&indirect_page->lru);
                        __free_page(indirect_page);
                }
@@ -1927,7 +2303,7 @@ out_of_memory:
 /*
  * Gather all backend feature-*
  */
-static int blkfront_gather_backend_features(struct blkfront_info *info)
+static void blkfront_gather_backend_features(struct blkfront_info *info)
 {
        int err;
        int barrier, flush, discard, persistent;
@@ -1982,8 +2358,6 @@ static int blkfront_gather_backend_features(struct blkfront_info *info)
        else
                info->max_indirect_segments = min(indirect_segments,
                                                  xen_blkif_max_segments);
-
-       return blkfront_setup_indirect(info);
 }
 
 /*
@@ -1996,7 +2370,7 @@ static void blkfront_connect(struct blkfront_info *info)
        unsigned long sector_size;
        unsigned int physical_sector_size;
        unsigned int binfo;
-       int err;
+       int err, i;
 
        switch (info->connected) {
        case BLKIF_STATE_CONNECTED:
@@ -2053,11 +2427,15 @@ static void blkfront_connect(struct blkfront_info *info)
        if (err != 1)
                physical_sector_size = sector_size;
 
-       err = blkfront_gather_backend_features(info);
-       if (err) {
-               xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
-                                info->xbdev->otherend);
-               return;
+       blkfront_gather_backend_features(info);
+       for (i = 0; i < info->nr_rings; i++) {
+               err = blkfront_setup_indirect(&info->rinfo[i]);
+               if (err) {
+                       xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+                                        info->xbdev->otherend);
+                       blkif_free(info, 0);
+                       break;
+               }
        }
 
        err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
@@ -2071,10 +2449,9 @@ static void blkfront_connect(struct blkfront_info *info)
        xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
        /* Kick pending requests. */
-       spin_lock_irq(&info->io_lock);
        info->connected = BLKIF_STATE_CONNECTED;
-       kick_pending_request_queues(info);
-       spin_unlock_irq(&info->io_lock);
+       for (i = 0; i < info->nr_rings; i++)
+               kick_pending_request_queues(&info->rinfo[i]);
 
        add_disk(info->gd);
 
@@ -2095,11 +2472,8 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateInitWait:
                if (dev->state != XenbusStateInitialising)
                        break;
-               if (talk_to_blkback(dev, info)) {
-                       kfree(info);
-                       dev_set_drvdata(&dev->dev, NULL);
+               if (talk_to_blkback(dev, info))
                        break;
-               }
        case XenbusStateInitialising:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
@@ -2108,6 +2482,10 @@ static void blkback_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateConnected:
+               if (dev->state != XenbusStateInitialised) {
+                       if (talk_to_blkback(dev, info))
+                               break;
+               }
                blkfront_connect(info);
                break;
 
@@ -2281,6 +2659,7 @@ static struct xenbus_driver blkfront_driver = {
 static int __init xlblk_init(void)
 {
        int ret;
+       int nr_cpus = num_online_cpus();
 
        if (!xen_domain())
                return -ENODEV;
@@ -2288,7 +2667,13 @@ static int __init xlblk_init(void)
        if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
                pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
                        xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
-               xen_blkif_max_ring_order = 0;
+               xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
+       }
+
+       if (xen_blkif_max_queues > nr_cpus) {
+               pr_info("Invalid max_queues (%d), will use default max: %d.\n",
+                       xen_blkif_max_queues, nr_cpus);
+               xen_blkif_max_queues = nr_cpus;
        }
 
        if (!xen_has_pv_disk_devices())
index 129d47bcc5fc8d1b9d0ef0e087bbf36cee1336be..9a92c072a485def0b2730340503d1984d5a6dc1d 100644 (file)
@@ -132,7 +132,7 @@ config SUNXI_RSB
          and AC100/AC200 ICs.
 
 config UNIPHIER_SYSTEM_BUS
-       bool "UniPhier System Bus driver"
+       tristate "UniPhier System Bus driver"
        depends on ARCH_UNIPHIER && OF
        default y
        help
index 6575c0fe6a4ea3a1e3bc1bae99010eefd9d7ff94..c3cb76b363c63c54d67343dacf4c9ec20032a95f 100644 (file)
@@ -192,8 +192,10 @@ static int __init vexpress_config_init(void)
        /* Need the config devices early, before the "normal" devices... */
        for_each_compatible_node(node, NULL, "arm,vexpress,config-bus") {
                err = vexpress_config_populate(node);
-               if (err)
+               if (err) {
+                       of_node_put(node);
                        break;
+               }
        }
 
        return err;
index 240b6cf1d97c66fc828f97df90726cb64d500fba..be54e5331a4517899e0e4ace0c48418cbb082a0e 100644 (file)
@@ -42,7 +42,7 @@
 /*
  * The High Precision Event Timer driver.
  * This driver is closely modelled after the rtc.c driver.
- * http://www.intel.com/hardwaredesign/hpetspec_1.pdf
+ * See HPET spec revision 1.
  */
 #define        HPET_USER_FREQ  (64)
 #define        HPET_DRIFT      (500)
index dbf22719462f9481006b5c874ca99200342b4bee..ff00331bff49bdb1890333b73790ded540dc8697 100644 (file)
@@ -372,6 +372,7 @@ config HW_RANDOM_XGENE
 config HW_RANDOM_STM32
        tristate "STMicroelectronics STM32 random number generator"
        depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
+       depends on HAS_IOMEM
        help
          This driver provides kernel-side support for the Random Number
          Generator hardware found on STM32 microcontrollers.
index 9fda22e3387e59030b4a16f4ab1712bb1555b0ce..7fddd8696211f0320011c964a88a37a16133c4ba 100644 (file)
@@ -68,6 +68,7 @@
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/acpi.h>
 
 #ifdef CONFIG_PARISC
 #include <asm/hardware.h>      /* for register_parisc_driver() stuff */
@@ -2054,8 +2055,6 @@ static int hardcode_find_bmc(void)
 
 #ifdef CONFIG_ACPI
 
-#include <linux/acpi.h>
-
 /*
  * Once we get an ACPI failure, we don't try any more, because we go
  * through the tables sequentially.  Once we don't find a table, there
index 6b1721f978c2945f4af716636c8373fe8807dca0..4f6f94c43412c0773e176cf8d67acd73fb6ca274 100644 (file)
@@ -689,7 +689,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 {
        loff_t ret;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        switch (orig) {
        case SEEK_CUR:
                offset += file->f_pos;
@@ -706,7 +706,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
        default:
                ret = -EINVAL;
        }
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
        return ret;
 }
 
index f1d7fa45c2759b0ed97d9a54669d705f2547040c..f3f92d5fcda05b2a3692d9f100c7caa48445e5e1 100644 (file)
@@ -93,14 +93,11 @@ struct vma_data {
        spinlock_t lock;        /* Serialize access to this structure. */
        int count;              /* Number of pages allocated. */
        enum mspec_page_type type; /* Type of pages allocated. */
-       int flags;              /* See VMD_xxx below. */
        unsigned long vm_start; /* Original (unsplit) base. */
        unsigned long vm_end;   /* Original (unsplit) end. */
        unsigned long maddr[0]; /* Array of MSPEC addresses. */
 };
 
-#define VMD_VMALLOCED 0x1      /* vmalloc'd rather than kmalloc'd */
-
 /* used on shub2 to clear FOP cache in the HUB */
 static unsigned long scratch_page[MAX_NUMNODES];
 #define SH2_AMO_CACHE_ENTRIES  4
@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma)
                               "failed to zero page %ld\n", my_page);
        }
 
-       if (vdata->flags & VMD_VMALLOCED)
-               vfree(vdata);
-       else
-               kfree(vdata);
+       kvfree(vdata);
 }
 
 /*
@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
                                        enum mspec_page_type type)
 {
        struct vma_data *vdata;
-       int pages, vdata_size, flags = 0;
+       int pages, vdata_size;
 
        if (vma->vm_pgoff != 0)
                return -EINVAL;
@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
        vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
        if (vdata_size <= PAGE_SIZE)
                vdata = kzalloc(vdata_size, GFP_KERNEL);
-       else {
+       else
                vdata = vzalloc(vdata_size);
-               flags = VMD_VMALLOCED;
-       }
        if (!vdata)
                return -ENOMEM;
 
        vdata->vm_start = vma->vm_start;
        vdata->vm_end = vma->vm_end;
-       vdata->flags = flags;
        vdata->type = type;
        spin_lock_init(&vdata->lock);
        atomic_set(&vdata->refcnt, 1);
index 0b311fa277ef097f485d9aa92da18c262a7f10dd..b526dc15c27113d70d67bf23464c4a2e1c8c4418 100644 (file)
@@ -290,9 +290,9 @@ static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datas
 {
        struct inode *inode = file_inode(file);
        int err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = ps3flash_writeback(ps3flash_dev);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index d0da5d852d41e5588bb9bd192431a403a9696848..b583e53366306db870a0d61918156f41985f075e 100644 (file)
@@ -1818,6 +1818,28 @@ unsigned int get_random_int(void)
 }
 EXPORT_SYMBOL(get_random_int);
 
+/*
+ * Same as get_random_int(), but returns unsigned long.
+ */
+unsigned long get_random_long(void)
+{
+       __u32 *hash;
+       unsigned long ret;
+
+       if (arch_get_random_long(&ret))
+               return ret;
+
+       hash = get_cpu_var(get_random_int_hash);
+
+       hash[0] += current->pid + jiffies + random_get_entropy();
+       md5_transform(hash, random_int_secret);
+       ret = *(unsigned long *)hash;
+       put_cpu_var(get_random_int_hash);
+
+       return ret;
+}
+EXPORT_SYMBOL(get_random_long);
+
 /*
  * randomize_range() returns a start address such that
  *
index b038e36660587aaf97d4c3b41de17d1175635694..bae4be6501dfb06a51dc0aaa3befb91b47156ca0 100644 (file)
@@ -43,7 +43,7 @@ obj-$(CONFIG_COMMON_CLK_SI514)                += clk-si514.o
 obj-$(CONFIG_COMMON_CLK_SI570)         += clk-si570.o
 obj-$(CONFIG_COMMON_CLK_CDCE925)       += clk-cdce925.o
 obj-$(CONFIG_ARCH_STM32)               += clk-stm32f4.o
-obj-$(CONFIG_ARCH_TANGOX)              += clk-tango4.o
+obj-$(CONFIG_ARCH_TANGO              += clk-tango4.o
 obj-$(CONFIG_CLK_TWL6040)              += clk-twl6040.o
 obj-$(CONFIG_ARCH_U300)                        += clk-u300.o
 obj-$(CONFIG_ARCH_VT8500)              += clk-vt8500.o
index 19fed65587e8206c20f6ee13a1b736276e8922ae..7b09a265d79fc8595a31332ecd40480beaed7bc5 100644 (file)
@@ -289,7 +289,7 @@ static void __init of_gpio_clk_setup(struct device_node *node,
 
        num_parents = of_clk_get_parent_count(node);
        if (num_parents < 0)
-               return;
+               num_parents = 0;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
index cd0f2726f5e0dd0da33b06e4518f02fc231cf1f7..89e9ca78bb947ec2ff8242d5d826ccf8449d6259 100644 (file)
@@ -299,7 +299,7 @@ static int scpi_clocks_probe(struct platform_device *pdev)
        /* Add the virtual cpufreq device */
        cpufreq_dev = platform_device_register_simple("scpi-cpufreq",
                                                      -1, NULL, 0);
-       if (!cpufreq_dev)
+       if (IS_ERR(cpufreq_dev))
                pr_warn("unable to register cpufreq device");
 
        return 0;
index d5c5bfa35a5aef734a47e6d199c410eebe237d75..3e0b52daa35f8814a413ccfe7599ac80a3f59e8f 100644 (file)
@@ -247,7 +247,7 @@ static struct clk_onecell_data dove_divider_data = {
 
 void __init dove_divider_clk_init(struct device_node *np)
 {
-       void *base;
+       void __iomem *base;
 
        base = of_iomap(np, 0);
        if (WARN_ON(!base))
index cf73e539e9f63aee727a5f3bab2e2e28fae4e1d9..070037a29ea5842ba6cd332563570fc1ffd8689c 100644 (file)
@@ -3587,7 +3587,6 @@ static const struct regmap_config gcc_apq8084_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x1fc0,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_apq8084_desc = {
index b692ae881d6a978a8d1e665dd09fa52820f96a90..dd5402bac62029824fd059a353d0c00d4e8576fc 100644 (file)
@@ -3005,7 +3005,6 @@ static const struct regmap_config gcc_ipq806x_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x3e40,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_ipq806x_desc = {
index f6a2b14dfec4ecd026282d01167278df0f2c6c3b..ad413036f7c783ee0ba8576bf512dfe3dea08dda 100644 (file)
@@ -2702,7 +2702,6 @@ static const struct regmap_config gcc_msm8660_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x363c,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8660_desc = {
index e3bf09d7d0ef07f5672f9933dca6dd6896daff4c..8cc9b2868b41a83c1621d0b27bc18598888a6939 100644 (file)
@@ -3336,7 +3336,6 @@ static const struct regmap_config gcc_msm8916_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x80000,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8916_desc = {
index f31111e32d44314f7c808aa56c3b6f78f906bbbb..983dd7dc89a7970e3c1c8d35a427a56b48959929 100644 (file)
@@ -3468,7 +3468,6 @@ static const struct regmap_config gcc_msm8960_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x3660,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct regmap_config gcc_apq8064_regmap_config = {
@@ -3477,7 +3476,6 @@ static const struct regmap_config gcc_apq8064_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x3880,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8960_desc = {
index df164d618e34682ba4f99f9e517c7b1bdd8500fd..335952db309bd3260c4c86cd048417fb43ac9cbd 100644 (file)
@@ -2680,7 +2680,6 @@ static const struct regmap_config gcc_msm8974_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x1fc0,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8974_desc = {
index 62e79fadd5f7aee5772bef7760f9c7e20e721501..db3998e5e2d83d77a5b07f8d1e29490b2e532f6d 100644 (file)
@@ -419,7 +419,6 @@ static const struct regmap_config lcc_ipq806x_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0xfc,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc lcc_ipq806x_desc = {
index bf95bb0ea1b8c536a5e058de40e33edffa412021..4fcf9d1d233c0a0c64c2455efcdd11015da89b18 100644 (file)
@@ -524,7 +524,6 @@ static const struct regmap_config lcc_msm8960_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0xfc,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc lcc_msm8960_desc = {
index 1e703fda8a0f8ee4f18a0434a7ed073c1762ed6a..30777f9f1a439eef1ad5e8d4446017d7f3999392 100644 (file)
@@ -3368,7 +3368,6 @@ static const struct regmap_config mmcc_apq8084_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x5104,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc mmcc_apq8084_desc = {
index d73a048d3b9dc59c62fc3b413a59121d3b47fa06..00e36192a1defe9131f1f1505f89b88b574f4284 100644 (file)
@@ -3029,7 +3029,6 @@ static const struct regmap_config mmcc_msm8960_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x334,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct regmap_config mmcc_apq8064_regmap_config = {
@@ -3038,7 +3037,6 @@ static const struct regmap_config mmcc_apq8064_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x350,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc mmcc_msm8960_desc = {
index bbe28ed936692da75079aace6aa1cbc1e2ce23e4..9d790bcadf25a75a360f8d5aebfdf4ab50237c61 100644 (file)
@@ -2594,7 +2594,6 @@ static const struct regmap_config mmcc_msm8974_regmap_config = {
        .val_bits       = 32,
        .max_register   = 0x5104,
        .fast_io        = true,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc mmcc_msm8974_desc = {
index ebce98033fbb76ea687d439b5842e707f62eeb80..bc7fbac83ab74dfca9e534401bc430f629d01357 100644 (file)
@@ -133,7 +133,7 @@ PNAME(mux_spdif_p)  = { "spdif_src", "spdif_frac", "xin12m" };
 PNAME(mux_uart0_p)     = { "uart0_src", "uart0_frac", "xin24m" };
 PNAME(mux_uart1_p)     = { "uart1_src", "uart1_frac", "xin24m" };
 PNAME(mux_uart2_p)     = { "uart2_src", "uart2_frac", "xin24m" };
-PNAME(mux_mac_p)       = { "mac_pll_src", "ext_gmac" };
+PNAME(mux_mac_p)       = { "mac_pll_src", "rmii_clkin" };
 PNAME(mux_dclk_p)      = { "dclk_lcdc", "dclk_cru" };
 
 static struct rockchip_pll_clock rk3036_pll_clks[] __initdata = {
@@ -224,16 +224,16 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
                        RK2928_CLKGATE_CON(2), 2, GFLAGS),
 
        COMPOSITE_NODIV(SCLK_TIMER0, "sclk_timer0", mux_timer_p, CLK_IGNORE_UNUSED,
-                       RK2928_CLKSEL_CON(2), 4, 1, DFLAGS,
+                       RK2928_CLKSEL_CON(2), 4, 1, MFLAGS,
                        RK2928_CLKGATE_CON(1), 0, GFLAGS),
        COMPOSITE_NODIV(SCLK_TIMER1, "sclk_timer1", mux_timer_p, CLK_IGNORE_UNUSED,
-                       RK2928_CLKSEL_CON(2), 5, 1, DFLAGS,
+                       RK2928_CLKSEL_CON(2), 5, 1, MFLAGS,
                        RK2928_CLKGATE_CON(1), 1, GFLAGS),
        COMPOSITE_NODIV(SCLK_TIMER2, "sclk_timer2", mux_timer_p, CLK_IGNORE_UNUSED,
-                       RK2928_CLKSEL_CON(2), 6, 1, DFLAGS,
+                       RK2928_CLKSEL_CON(2), 6, 1, MFLAGS,
                        RK2928_CLKGATE_CON(2), 4, GFLAGS),
        COMPOSITE_NODIV(SCLK_TIMER3, "sclk_timer3", mux_timer_p, CLK_IGNORE_UNUSED,
-                       RK2928_CLKSEL_CON(2), 7, 1, DFLAGS,
+                       RK2928_CLKSEL_CON(2), 7, 1, MFLAGS,
                        RK2928_CLKGATE_CON(2), 5, GFLAGS),
 
        MUX(0, "uart_pll_clk", mux_pll_src_apll_dpll_gpll_usb480m_p, 0,
@@ -242,11 +242,11 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
                        RK2928_CLKSEL_CON(13), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(1), 8, GFLAGS),
        COMPOSITE_NOMUX(0, "uart1_src", "uart_pll_clk", 0,
-                       RK2928_CLKSEL_CON(13), 0, 7, DFLAGS,
-                       RK2928_CLKGATE_CON(1), 8, GFLAGS),
+                       RK2928_CLKSEL_CON(14), 0, 7, DFLAGS,
+                       RK2928_CLKGATE_CON(1), 10, GFLAGS),
        COMPOSITE_NOMUX(0, "uart2_src", "uart_pll_clk", 0,
-                       RK2928_CLKSEL_CON(13), 0, 7, DFLAGS,
-                       RK2928_CLKGATE_CON(1), 8, GFLAGS),
+                       RK2928_CLKSEL_CON(15), 0, 7, DFLAGS,
+                       RK2928_CLKGATE_CON(1), 12, GFLAGS),
        COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(17), 0,
                        RK2928_CLKGATE_CON(1), 9, GFLAGS,
@@ -279,13 +279,13 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
                        RK2928_CLKGATE_CON(3), 2, GFLAGS),
 
        COMPOSITE_NODIV(0, "sclk_sdmmc_src", mux_mmc_src_p, 0,
-                       RK2928_CLKSEL_CON(12), 8, 2, DFLAGS,
+                       RK2928_CLKSEL_CON(12), 8, 2, MFLAGS,
                        RK2928_CLKGATE_CON(2), 11, GFLAGS),
        DIV(SCLK_SDMMC, "sclk_sdmmc", "sclk_sdmmc_src", 0,
                        RK2928_CLKSEL_CON(11), 0, 7, DFLAGS),
 
        COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0,
-                       RK2928_CLKSEL_CON(12), 10, 2, DFLAGS,
+                       RK2928_CLKSEL_CON(12), 10, 2, MFLAGS,
                        RK2928_CLKGATE_CON(2), 13, GFLAGS),
        DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0,
                        RK2928_CLKSEL_CON(11), 8, 7, DFLAGS),
@@ -344,12 +344,12 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
                        RK2928_CLKGATE_CON(10), 5, GFLAGS),
 
        COMPOSITE_NOGATE(0, "mac_pll_src", mux_pll_src_3plls_p, 0,
-                       RK2928_CLKSEL_CON(21), 0, 2, MFLAGS, 4, 5, DFLAGS),
+                       RK2928_CLKSEL_CON(21), 0, 2, MFLAGS, 9, 5, DFLAGS),
        MUX(SCLK_MACREF, "mac_clk_ref", mux_mac_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(21), 3, 1, MFLAGS),
 
        COMPOSITE_NOMUX(SCLK_MAC, "mac_clk", "mac_clk_ref", 0,
-                       RK2928_CLKSEL_CON(21), 9, 5, DFLAGS,
+                       RK2928_CLKSEL_CON(21), 4, 5, DFLAGS,
                        RK2928_CLKGATE_CON(2), 6, GFLAGS),
 
        MUX(SCLK_HDMI, "dclk_hdmi", mux_dclk_p, 0,
index be0ede52226994a0ae7248c8098285e0ebfd4a86..21f3ea909fabdb7c4f753cd4ed817035c15a31e0 100644 (file)
@@ -780,13 +780,13 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
        GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK3368_CLKGATE_CON(20), 0, GFLAGS),
 
        /* pclk_pd_alive gates */
-       GATE(PCLK_TIMER1, "pclk_timer1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 8, GFLAGS),
-       GATE(PCLK_TIMER0, "pclk_timer0", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 7, GFLAGS),
-       GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(14), 12, GFLAGS),
-       GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(14), 11, GFLAGS),
-       GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 3, GFLAGS),
-       GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 2, GFLAGS),
-       GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 1, GFLAGS),
+       GATE(PCLK_TIMER1, "pclk_timer1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 13, GFLAGS),
+       GATE(PCLK_TIMER0, "pclk_timer0", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 12, GFLAGS),
+       GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 9, GFLAGS),
+       GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 8, GFLAGS),
+       GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 3, GFLAGS),
+       GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 2, GFLAGS),
+       GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 1, GFLAGS),
 
        /*
         * pclk_vio gates
@@ -796,12 +796,12 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
        GATE(0, "pclk_dphytx", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(14), 8, GFLAGS),
 
        /* pclk_pd_pmu gates */
-       GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 0, GFLAGS),
-       GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3368_CLKGATE_CON(17), 4, GFLAGS),
-       GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 3, GFLAGS),
-       GATE(0, "pclk_pmu_noc", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 2, GFLAGS),
-       GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 1, GFLAGS),
-       GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 2, GFLAGS),
+       GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 5, GFLAGS),
+       GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3368_CLKGATE_CON(23), 4, GFLAGS),
+       GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 3, GFLAGS),
+       GATE(0, "pclk_pmu_noc", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 2, GFLAGS),
+       GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 1, GFLAGS),
+       GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 0, GFLAGS),
 
        /* timer gates */
        GATE(0, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS),
index e1fe8f35d45c47c553997845657e09d3b71ca9a1..74e7544f861ba083f63f1d5aa886a7a90b0f295f 100644 (file)
@@ -450,8 +450,10 @@ static int load_timings_from_dt(struct tegra_clk_emc *tegra,
                struct emc_timing *timing = tegra->timings + (i++);
 
                err = load_one_timing_from_dt(tegra, timing, child);
-               if (err)
+               if (err) {
+                       of_node_put(child);
                        return err;
+               }
 
                timing->ram_code = ram_code;
        }
@@ -499,9 +501,9 @@ struct clk *tegra_clk_register_emc(void __iomem *base, struct device_node *np,
                 * fuses until the apbmisc driver is loaded.
                 */
                err = load_timings_from_dt(tegra, node, node_ram_code);
+               of_node_put(node);
                if (err)
                        return ERR_PTR(err);
-               of_node_put(node);
                break;
        }
 
index 19ce0738ee764bb13e9dfde5ca542fd2cf472e9a..62ea38187b715814ac5d7b5265cbd4cd934a22c3 100644 (file)
@@ -11,6 +11,7 @@ enum clk_id {
        tegra_clk_afi,
        tegra_clk_amx,
        tegra_clk_amx1,
+       tegra_clk_apb2ape,
        tegra_clk_apbdma,
        tegra_clk_apbif,
        tegra_clk_ape,
index a534bfab30b39ee53c81a89c3a5c5b1d2979144e..6ac3f843e7caa3a8abb6513a0eb9fc2c829d3a39 100644 (file)
 #define PLLE_SS_DISABLE (PLLE_SS_CNTL_BYPASS_SS | PLLE_SS_CNTL_INTERP_RESET |\
                                PLLE_SS_CNTL_SSC_BYP)
 #define PLLE_SS_MAX_MASK 0x1ff
-#define PLLE_SS_MAX_VAL 0x25
+#define PLLE_SS_MAX_VAL_TEGRA114 0x25
+#define PLLE_SS_MAX_VAL_TEGRA210 0x21
 #define PLLE_SS_INC_MASK (0xff << 16)
 #define PLLE_SS_INC_VAL (0x1 << 16)
 #define PLLE_SS_INCINTRV_MASK (0x3f << 24)
-#define PLLE_SS_INCINTRV_VAL (0x20 << 24)
+#define PLLE_SS_INCINTRV_VAL_TEGRA114 (0x20 << 24)
+#define PLLE_SS_INCINTRV_VAL_TEGRA210 (0x23 << 24)
 #define PLLE_SS_COEFFICIENTS_MASK \
        (PLLE_SS_MAX_MASK | PLLE_SS_INC_MASK | PLLE_SS_INCINTRV_MASK)
-#define PLLE_SS_COEFFICIENTS_VAL \
-       (PLLE_SS_MAX_VAL | PLLE_SS_INC_VAL | PLLE_SS_INCINTRV_VAL)
+#define PLLE_SS_COEFFICIENTS_VAL_TEGRA114 \
+       (PLLE_SS_MAX_VAL_TEGRA114 | PLLE_SS_INC_VAL |\
+        PLLE_SS_INCINTRV_VAL_TEGRA114)
+#define PLLE_SS_COEFFICIENTS_VAL_TEGRA210 \
+       (PLLE_SS_MAX_VAL_TEGRA210 | PLLE_SS_INC_VAL |\
+        PLLE_SS_INCINTRV_VAL_TEGRA210)
 
 #define PLLE_AUX_PLLP_SEL      BIT(2)
 #define PLLE_AUX_USE_LOCKDET   BIT(3)
@@ -880,7 +886,7 @@ static int clk_plle_training(struct tegra_clk_pll *pll)
 static int clk_plle_enable(struct clk_hw *hw)
 {
        struct tegra_clk_pll *pll = to_clk_pll(hw);
-       unsigned long input_rate = clk_get_rate(clk_get_parent(hw->clk));
+       unsigned long input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
        struct tegra_clk_pll_freq_table sel;
        u32 val;
        int err;
@@ -1378,7 +1384,7 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw)
        u32 val;
        int ret;
        unsigned long flags = 0;
-       unsigned long input_rate = clk_get_rate(clk_get_parent(hw->clk));
+       unsigned long input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
 
        if (_get_table_rate(hw, &sel, pll->params->fixed_rate, input_rate))
                return -EINVAL;
@@ -1401,7 +1407,7 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw)
        val |= PLLE_MISC_IDDQ_SW_CTRL;
        val &= ~PLLE_MISC_IDDQ_SW_VALUE;
        val |= PLLE_MISC_PLLE_PTS;
-       val |= PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK;
+       val &= ~(PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK);
        pll_writel_misc(val, pll);
        udelay(5);
 
@@ -1428,7 +1434,7 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw)
        val = pll_readl(PLLE_SS_CTRL, pll);
        val &= ~(PLLE_SS_CNTL_CENTER | PLLE_SS_CNTL_INVERT);
        val &= ~PLLE_SS_COEFFICIENTS_MASK;
-       val |= PLLE_SS_COEFFICIENTS_VAL;
+       val |= PLLE_SS_COEFFICIENTS_VAL_TEGRA114;
        pll_writel(val, PLLE_SS_CTRL, pll);
        val &= ~(PLLE_SS_CNTL_SSC_BYP | PLLE_SS_CNTL_BYPASS_SS);
        pll_writel(val, PLLE_SS_CTRL, pll);
@@ -2012,9 +2018,9 @@ static int clk_plle_tegra210_enable(struct clk_hw *hw)
        struct tegra_clk_pll *pll = to_clk_pll(hw);
        struct tegra_clk_pll_freq_table sel;
        u32 val;
-       int ret;
+       int ret = 0;
        unsigned long flags = 0;
-       unsigned long input_rate = clk_get_rate(clk_get_parent(hw->clk));
+       unsigned long input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
 
        if (_get_table_rate(hw, &sel, pll->params->fixed_rate, input_rate))
                return -EINVAL;
@@ -2022,22 +2028,20 @@ static int clk_plle_tegra210_enable(struct clk_hw *hw)
        if (pll->lock)
                spin_lock_irqsave(pll->lock, flags);
 
+       val = pll_readl(pll->params->aux_reg, pll);
+       if (val & PLLE_AUX_SEQ_ENABLE)
+               goto out;
+
        val = pll_readl_base(pll);
        val &= ~BIT(30); /* Disable lock override */
        pll_writel_base(val, pll);
 
-       val = pll_readl(pll->params->aux_reg, pll);
-       val |= PLLE_AUX_ENABLE_SWCTL;
-       val &= ~PLLE_AUX_SEQ_ENABLE;
-       pll_writel(val, pll->params->aux_reg, pll);
-       udelay(1);
-
        val = pll_readl_misc(pll);
        val |= PLLE_MISC_LOCK_ENABLE;
        val |= PLLE_MISC_IDDQ_SW_CTRL;
        val &= ~PLLE_MISC_IDDQ_SW_VALUE;
        val |= PLLE_MISC_PLLE_PTS;
-       val |= PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK;
+       val &= ~(PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK);
        pll_writel_misc(val, pll);
        udelay(5);
 
@@ -2067,7 +2071,7 @@ static int clk_plle_tegra210_enable(struct clk_hw *hw)
        val = pll_readl(PLLE_SS_CTRL, pll);
        val &= ~(PLLE_SS_CNTL_CENTER | PLLE_SS_CNTL_INVERT);
        val &= ~PLLE_SS_COEFFICIENTS_MASK;
-       val |= PLLE_SS_COEFFICIENTS_VAL;
+       val |= PLLE_SS_COEFFICIENTS_VAL_TEGRA210;
        pll_writel(val, PLLE_SS_CTRL, pll);
        val &= ~(PLLE_SS_CNTL_SSC_BYP | PLLE_SS_CNTL_BYPASS_SS);
        pll_writel(val, PLLE_SS_CTRL, pll);
@@ -2104,15 +2108,25 @@ static void clk_plle_tegra210_disable(struct clk_hw *hw)
        if (pll->lock)
                spin_lock_irqsave(pll->lock, flags);
 
+       /* If PLLE HW sequencer is enabled, SW should not disable PLLE */
+       val = pll_readl(pll->params->aux_reg, pll);
+       if (val & PLLE_AUX_SEQ_ENABLE)
+               goto out;
+
        val = pll_readl_base(pll);
        val &= ~PLLE_BASE_ENABLE;
        pll_writel_base(val, pll);
 
+       val = pll_readl(pll->params->aux_reg, pll);
+       val |= PLLE_AUX_ENABLE_SWCTL | PLLE_AUX_SS_SWCTL;
+       pll_writel(val, pll->params->aux_reg, pll);
+
        val = pll_readl_misc(pll);
        val |= PLLE_MISC_IDDQ_SW_CTRL | PLLE_MISC_IDDQ_SW_VALUE;
        pll_writel_misc(val, pll);
        udelay(1);
 
+out:
        if (pll->lock)
                spin_unlock_irqrestore(pll->lock, flags);
 }
index 6ad381a888a6176801494c60bcc4f057f23ebe82..ea2b9cbf9e70b0c10204d5d34dc96ab929d36427 100644 (file)
@@ -773,7 +773,7 @@ static struct tegra_periph_init_data periph_clks[] = {
        XUSB("xusb_dev_src", mux_clkm_pllp_pllc_pllre, CLK_SOURCE_XUSB_DEV_SRC, 95, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_xusb_dev_src),
        XUSB("xusb_dev_src", mux_clkm_pllp_pllre, CLK_SOURCE_XUSB_DEV_SRC, 95, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_xusb_dev_src_8),
        MUX8("dbgapb", mux_pllp_clkm_2, CLK_SOURCE_DBGAPB, 185, TEGRA_PERIPH_NO_RESET, tegra_clk_dbgapb),
-       MUX8("msenc", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVENC, 219, 0, tegra_clk_nvenc),
+       MUX8("nvenc", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVENC, 219, 0, tegra_clk_nvenc),
        MUX8("nvdec", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVDEC, 194, 0, tegra_clk_nvdec),
        MUX8("nvjpg", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVJPG, 195, 0, tegra_clk_nvjpg),
        MUX8("ape", mux_plla_pllc4_out0_pllc_pllc4_out1_pllp_pllc4_out2_clkm, CLK_SOURCE_APE, 198, TEGRA_PERIPH_ON_APB, tegra_clk_ape),
@@ -782,7 +782,7 @@ static struct tegra_periph_init_data periph_clks[] = {
        NODIV("sor1", mux_clkm_sor1_brick_sor1_src, CLK_SOURCE_SOR1, 15, MASK(1), 183, 0, tegra_clk_sor1, &sor1_lock),
        MUX8("sdmmc_legacy", mux_pllp_out3_clkm_pllp_pllc4, CLK_SOURCE_SDMMC_LEGACY, 193, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_sdmmc_legacy),
        MUX8("qspi", mux_pllp_pllc_pllc_out1_pllc4_out2_pllc4_out1_clkm_pllc4_out0, CLK_SOURCE_QSPI, 211, TEGRA_PERIPH_ON_APB, tegra_clk_qspi),
-       MUX("vii2c", mux_pllp_pllc_clkm, CLK_SOURCE_VI_I2C, 208, TEGRA_PERIPH_ON_APB, tegra_clk_vi_i2c),
+       I2C("vii2c", mux_pllp_pllc_clkm, CLK_SOURCE_VI_I2C, 208, tegra_clk_vi_i2c),
        MUX("mipibif", mux_pllp_clkm, CLK_SOURCE_MIPIBIF, 173, TEGRA_PERIPH_ON_APB, tegra_clk_mipibif),
        MUX("uartape", mux_pllp_pllc_clkm, CLK_SOURCE_UARTAPE, 212, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_uartape),
        MUX8("tsecb", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_TSECB, 206, 0, tegra_clk_tsecb),
@@ -829,6 +829,7 @@ static struct tegra_periph_init_data gate_clks[] = {
        GATE("xusb_gate", "osc", 143, 0, tegra_clk_xusb_gate, 0),
        GATE("pll_p_out_cpu", "pll_p", 223, 0, tegra_clk_pll_p_out_cpu, 0),
        GATE("pll_p_out_adsp", "pll_p", 187, 0, tegra_clk_pll_p_out_adsp, 0),
+       GATE("apb2ape", "clk_m", 107, 0, tegra_clk_apb2ape, 0),
 };
 
 static struct tegra_periph_init_data div_clks[] = {
index 4559a20e3af6e424e52c7b08930cd7664965fea2..474de0f0c26d80b0f9b20ff845575ce049165038 100644 (file)
@@ -67,7 +67,7 @@ static const char *cclk_lp_parents[] = { "clk_m", "pll_c", "clk_32k", "pll_m",
                                         "pll_p", "pll_p_out4", "unused",
                                         "unused", "pll_x", "pll_x_out0" };
 
-const struct tegra_super_gen_info tegra_super_gen_info_gen4 = {
+static const struct tegra_super_gen_info tegra_super_gen_info_gen4 = {
        .gen = gen4,
        .sclk_parents = sclk_parents,
        .cclk_g_parents = cclk_g_parents,
@@ -93,7 +93,7 @@ static const char *cclk_lp_parents_gen5[] = { "clk_m", "unused", "clk_32k", "unu
                                        "unused", "unused", "unused", "unused",
                                        "dfllCPU_out" };
 
-const struct tegra_super_gen_info tegra_super_gen_info_gen5 = {
+static const struct tegra_super_gen_info tegra_super_gen_info_gen5 = {
        .gen = gen5,
        .sclk_parents = sclk_parents_gen5,
        .cclk_g_parents = cclk_g_parents_gen5,
@@ -171,7 +171,7 @@ static void __init tegra_sclk_init(void __iomem *clk_base,
        *dt_clk = clk;
 }
 
-void __init tegra_super_clk_init(void __iomem *clk_base,
+static void __init tegra_super_clk_init(void __iomem *clk_base,
                                void __iomem *pmc_base,
                                struct tegra_clk *tegra_clks,
                                struct tegra_clk_pll_params *params,
index 58514c44ea830c476b8b23606e962ecf8ef8ef7a..637041fd53ad11b95cd305952cb0a80eaf0d3b61 100644 (file)
@@ -59,8 +59,8 @@
 #define PLLC3_MISC3 0x50c
 
 #define PLLM_BASE 0x90
-#define PLLM_MISC0 0x9c
 #define PLLM_MISC1 0x98
+#define PLLM_MISC2 0x9c
 #define PLLP_BASE 0xa0
 #define PLLP_MISC0 0xac
 #define PLLP_MISC1 0x680
@@ -99,7 +99,7 @@
 #define PLLC4_MISC0 0x5a8
 #define PLLC4_OUT 0x5e4
 #define PLLMB_BASE 0x5e8
-#define PLLMB_MISC0 0x5ec
+#define PLLMB_MISC1 0x5ec
 #define PLLA1_BASE 0x6a4
 #define PLLA1_MISC0 0x6a8
 #define PLLA1_MISC1 0x6ac
@@ -243,7 +243,8 @@ static unsigned long tegra210_input_freq[] = {
 };
 
 static const char *mux_pllmcp_clkm[] = {
-       "pll_m", "pll_c", "pll_p", "clk_m", "pll_m_ud", "pll_c2", "pll_c3",
+       "pll_m", "pll_c", "pll_p", "clk_m", "pll_m_ud", "pll_mb", "pll_mb",
+       "pll_p",
 };
 #define mux_pllmcp_clkm_idx NULL
 
@@ -367,12 +368,12 @@ static const char *mux_pllmcp_clkm[] = {
 /* PLLMB */
 #define PLLMB_BASE_LOCK                        (1 << 27)
 
-#define PLLMB_MISC0_LOCK_OVERRIDE      (1 << 18)
-#define PLLMB_MISC0_IDDQ               (1 << 17)
-#define PLLMB_MISC0_LOCK_ENABLE                (1 << 16)
+#define PLLMB_MISC1_LOCK_OVERRIDE      (1 << 18)
+#define PLLMB_MISC1_IDDQ               (1 << 17)
+#define PLLMB_MISC1_LOCK_ENABLE                (1 << 16)
 
-#define PLLMB_MISC0_DEFAULT_VALUE      0x00030000
-#define PLLMB_MISC0_WRITE_MASK         0x0007ffff
+#define PLLMB_MISC1_DEFAULT_VALUE      0x00030000
+#define PLLMB_MISC1_WRITE_MASK         0x0007ffff
 
 /* PLLP */
 #define PLLP_BASE_OVERRIDE             (1 << 28)
@@ -457,7 +458,8 @@ static void pllcx_check_defaults(struct tegra_clk_pll_params *params)
                        PLLCX_MISC3_WRITE_MASK);
 }
 
-void tegra210_pllcx_set_defaults(const char *name, struct tegra_clk_pll *pllcx)
+static void tegra210_pllcx_set_defaults(const char *name,
+                                       struct tegra_clk_pll *pllcx)
 {
        pllcx->params->defaults_set = true;
 
@@ -482,22 +484,22 @@ void tegra210_pllcx_set_defaults(const char *name, struct tegra_clk_pll *pllcx)
        udelay(1);
 }
 
-void _pllc_set_defaults(struct tegra_clk_pll *pllcx)
+static void _pllc_set_defaults(struct tegra_clk_pll *pllcx)
 {
        tegra210_pllcx_set_defaults("PLL_C", pllcx);
 }
 
-void _pllc2_set_defaults(struct tegra_clk_pll *pllcx)
+static void _pllc2_set_defaults(struct tegra_clk_pll *pllcx)
 {
        tegra210_pllcx_set_defaults("PLL_C2", pllcx);
 }
 
-void _pllc3_set_defaults(struct tegra_clk_pll *pllcx)
+static void _pllc3_set_defaults(struct tegra_clk_pll *pllcx)
 {
        tegra210_pllcx_set_defaults("PLL_C3", pllcx);
 }
 
-void _plla1_set_defaults(struct tegra_clk_pll *pllcx)
+static void _plla1_set_defaults(struct tegra_clk_pll *pllcx)
 {
        tegra210_pllcx_set_defaults("PLL_A1", pllcx);
 }
@@ -507,7 +509,7 @@ void _plla1_set_defaults(struct tegra_clk_pll *pllcx)
  * PLL with dynamic ramp and fractional SDM. Dynamic ramp is not used.
  * Fractional SDM is allowed to provide exact audio rates.
  */
-void tegra210_plla_set_defaults(struct tegra_clk_pll *plla)
+static void tegra210_plla_set_defaults(struct tegra_clk_pll *plla)
 {
        u32 mask;
        u32 val = readl_relaxed(clk_base + plla->params->base_reg);
@@ -559,7 +561,7 @@ void tegra210_plla_set_defaults(struct tegra_clk_pll *plla)
  * PLLD
  * PLL with fractional SDM.
  */
-void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
+static void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
 {
        u32 val;
        u32 mask = 0xffff;
@@ -698,7 +700,7 @@ static void plldss_defaults(const char *pll_name, struct tegra_clk_pll *plldss,
        udelay(1);
 }
 
-void tegra210_plld2_set_defaults(struct tegra_clk_pll *plld2)
+static void tegra210_plld2_set_defaults(struct tegra_clk_pll *plld2)
 {
        plldss_defaults("PLL_D2", plld2, PLLD2_MISC0_DEFAULT_VALUE,
                        PLLD2_MISC1_CFG_DEFAULT_VALUE,
@@ -706,7 +708,7 @@ void tegra210_plld2_set_defaults(struct tegra_clk_pll *plld2)
                        PLLD2_MISC3_CTRL2_DEFAULT_VALUE);
 }
 
-void tegra210_plldp_set_defaults(struct tegra_clk_pll *plldp)
+static void tegra210_plldp_set_defaults(struct tegra_clk_pll *plldp)
 {
        plldss_defaults("PLL_DP", plldp, PLLDP_MISC0_DEFAULT_VALUE,
                        PLLDP_MISC1_CFG_DEFAULT_VALUE,
@@ -719,7 +721,7 @@ void tegra210_plldp_set_defaults(struct tegra_clk_pll *plldp)
  * Base and misc0 layout is the same as PLLD2/PLLDP, but no SDM/SSC support.
  * VCO is exposed to the clock tree via fixed 1/3 and 1/5 dividers.
  */
-void tegra210_pllc4_set_defaults(struct tegra_clk_pll *pllc4)
+static void tegra210_pllc4_set_defaults(struct tegra_clk_pll *pllc4)
 {
        plldss_defaults("PLL_C4", pllc4, PLLC4_MISC0_DEFAULT_VALUE, 0, 0, 0);
 }
@@ -728,7 +730,7 @@ void tegra210_pllc4_set_defaults(struct tegra_clk_pll *pllc4)
  * PLLRE
  * VCO is exposed to the clock tree directly along with post-divider output
  */
-void tegra210_pllre_set_defaults(struct tegra_clk_pll *pllre)
+static void tegra210_pllre_set_defaults(struct tegra_clk_pll *pllre)
 {
        u32 mask;
        u32 val = readl_relaxed(clk_base + pllre->params->base_reg);
@@ -780,13 +782,13 @@ static void pllx_get_dyn_steps(struct clk_hw *hw, u32 *step_a, u32 *step_b)
 {
        unsigned long input_rate;
 
-       if (!IS_ERR_OR_NULL(hw->clk)) {
+       /* cf rate */
+       if (!IS_ERR_OR_NULL(hw->clk))
                input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
-               /* cf rate */
-               input_rate /= tegra_pll_get_fixed_mdiv(hw, input_rate);
-       } else {
+       else
                input_rate = 38400000;
-       }
+
+       input_rate /= tegra_pll_get_fixed_mdiv(hw, input_rate);
 
        switch (input_rate) {
        case 12000000:
@@ -841,7 +843,7 @@ static void pllx_check_defaults(struct tegra_clk_pll *pll)
                        PLLX_MISC5_WRITE_MASK);
 }
 
-void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
+static void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
 {
        u32 val;
        u32 step_a, step_b;
@@ -901,7 +903,7 @@ void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
 }
 
 /* PLLMB */
-void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
+static void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
 {
        u32 mask, val = readl_relaxed(clk_base + pllmb->params->base_reg);
 
@@ -914,15 +916,15 @@ void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
                 * PLL is ON: check if defaults already set, then set those
                 * that can be updated in flight.
                 */
-               val = PLLMB_MISC0_DEFAULT_VALUE & (~PLLMB_MISC0_IDDQ);
-               mask = PLLMB_MISC0_LOCK_ENABLE | PLLMB_MISC0_LOCK_OVERRIDE;
+               val = PLLMB_MISC1_DEFAULT_VALUE & (~PLLMB_MISC1_IDDQ);
+               mask = PLLMB_MISC1_LOCK_ENABLE | PLLMB_MISC1_LOCK_OVERRIDE;
                _pll_misc_chk_default(clk_base, pllmb->params, 0, val,
-                               ~mask & PLLMB_MISC0_WRITE_MASK);
+                               ~mask & PLLMB_MISC1_WRITE_MASK);
 
                /* Enable lock detect */
                val = readl_relaxed(clk_base + pllmb->params->ext_misc_reg[0]);
                val &= ~mask;
-               val |= PLLMB_MISC0_DEFAULT_VALUE & mask;
+               val |= PLLMB_MISC1_DEFAULT_VALUE & mask;
                writel_relaxed(val, clk_base + pllmb->params->ext_misc_reg[0]);
                udelay(1);
 
@@ -930,7 +932,7 @@ void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
        }
 
        /* set IDDQ, enable lock detect */
-       writel_relaxed(PLLMB_MISC0_DEFAULT_VALUE,
+       writel_relaxed(PLLMB_MISC1_DEFAULT_VALUE,
                        clk_base + pllmb->params->ext_misc_reg[0]);
        udelay(1);
 }
@@ -960,7 +962,7 @@ static void pllp_check_defaults(struct tegra_clk_pll *pll, bool enabled)
                        ~mask & PLLP_MISC1_WRITE_MASK);
 }
 
-void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
+static void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
 {
        u32 mask;
        u32 val = readl_relaxed(clk_base + pllp->params->base_reg);
@@ -1022,7 +1024,7 @@ static void pllu_check_defaults(struct tegra_clk_pll *pll, bool hw_control)
                        ~mask & PLLU_MISC1_WRITE_MASK);
 }
 
-void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
+static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
 {
        u32 val = readl_relaxed(clk_base + pllu->params->base_reg);
 
@@ -1212,8 +1214,9 @@ static void tegra210_clk_pll_set_gain(struct tegra_clk_pll_freq_table *cfg)
        cfg->m *= PLL_SDM_COEFF;
 }
 
-unsigned long tegra210_clk_adjust_vco_min(struct tegra_clk_pll_params *params,
-                                         unsigned long parent_rate)
+static unsigned long
+tegra210_clk_adjust_vco_min(struct tegra_clk_pll_params *params,
+                           unsigned long parent_rate)
 {
        unsigned long vco_min = params->vco_min;
 
@@ -1386,7 +1389,7 @@ static struct tegra_clk_pll_params pll_c_params = {
        .mdiv_default = 3,
        .div_nmp = &pllc_nmp,
        .freq_table = pll_cx_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .set_defaults = _pllc_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1425,7 +1428,7 @@ static struct tegra_clk_pll_params pll_c2_params = {
        .ext_misc_reg[2] = PLLC2_MISC2,
        .ext_misc_reg[3] = PLLC2_MISC3,
        .freq_table = pll_cx_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .set_defaults = _pllc2_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1455,7 +1458,7 @@ static struct tegra_clk_pll_params pll_c3_params = {
        .ext_misc_reg[2] = PLLC3_MISC2,
        .ext_misc_reg[3] = PLLC3_MISC3,
        .freq_table = pll_cx_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .set_defaults = _pllc3_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1505,7 +1508,6 @@ static struct tegra_clk_pll_params pll_c4_vco_params = {
        .base_reg = PLLC4_BASE,
        .misc_reg = PLLC4_MISC0,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLSS_MISC_LOCK_ENABLE,
        .lock_delay = 300,
        .max_p = PLL_QLIN_PDIV_MAX,
        .ext_misc_reg[0] = PLLC4_MISC0,
@@ -1517,8 +1519,7 @@ static struct tegra_clk_pll_params pll_c4_vco_params = {
        .div_nmp = &pllss_nmp,
        .freq_table = pll_c4_vco_freq_table,
        .set_defaults = tegra210_pllc4_set_defaults,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE |
-                TEGRA_PLL_VCO_OUT,
+       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
 
@@ -1559,15 +1560,15 @@ static struct tegra_clk_pll_params pll_m_params = {
        .vco_min = 800000000,
        .vco_max = 1866000000,
        .base_reg = PLLM_BASE,
-       .misc_reg = PLLM_MISC1,
+       .misc_reg = PLLM_MISC2,
        .lock_mask = PLL_BASE_LOCK,
        .lock_enable_bit_idx = PLLM_MISC_LOCK_ENABLE,
        .lock_delay = 300,
-       .iddq_reg = PLLM_MISC0,
+       .iddq_reg = PLLM_MISC2,
        .iddq_bit_idx = PLLM_IDDQ_BIT,
        .max_p = PLL_QLIN_PDIV_MAX,
-       .ext_misc_reg[0] = PLLM_MISC0,
-       .ext_misc_reg[0] = PLLM_MISC1,
+       .ext_misc_reg[0] = PLLM_MISC2,
+       .ext_misc_reg[1] = PLLM_MISC1,
        .round_p_to_pdiv = pll_qlin_p_to_pdiv,
        .pdiv_tohw = pll_qlin_pdiv_to_hw,
        .div_nmp = &pllm_nmp,
@@ -1586,19 +1587,18 @@ static struct tegra_clk_pll_params pll_mb_params = {
        .vco_min = 800000000,
        .vco_max = 1866000000,
        .base_reg = PLLMB_BASE,
-       .misc_reg = PLLMB_MISC0,
+       .misc_reg = PLLMB_MISC1,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLMB_MISC_LOCK_ENABLE,
        .lock_delay = 300,
-       .iddq_reg = PLLMB_MISC0,
+       .iddq_reg = PLLMB_MISC1,
        .iddq_bit_idx = PLLMB_IDDQ_BIT,
        .max_p = PLL_QLIN_PDIV_MAX,
-       .ext_misc_reg[0] = PLLMB_MISC0,
+       .ext_misc_reg[0] = PLLMB_MISC1,
        .round_p_to_pdiv = pll_qlin_p_to_pdiv,
        .pdiv_tohw = pll_qlin_pdiv_to_hw,
        .div_nmp = &pllm_nmp,
        .freq_table = pll_m_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .set_defaults = tegra210_pllmb_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1671,7 +1671,6 @@ static struct tegra_clk_pll_params pll_re_vco_params = {
        .base_reg = PLLRE_BASE,
        .misc_reg = PLLRE_MISC0,
        .lock_mask = PLLRE_MISC_LOCK,
-       .lock_enable_bit_idx = PLLRE_MISC_LOCK_ENABLE,
        .lock_delay = 300,
        .max_p = PLL_QLIN_PDIV_MAX,
        .ext_misc_reg[0] = PLLRE_MISC0,
@@ -1681,8 +1680,7 @@ static struct tegra_clk_pll_params pll_re_vco_params = {
        .pdiv_tohw = pll_qlin_pdiv_to_hw,
        .div_nmp = &pllre_nmp,
        .freq_table = pll_re_vco_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_LOCK_MISC |
-                TEGRA_PLL_HAS_LOCK_ENABLE | TEGRA_PLL_VCO_OUT,
+       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_LOCK_MISC | TEGRA_PLL_VCO_OUT,
        .set_defaults = tegra210_pllre_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1712,7 +1710,6 @@ static struct tegra_clk_pll_params pll_p_params = {
        .base_reg = PLLP_BASE,
        .misc_reg = PLLP_MISC0,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLP_MISC_LOCK_ENABLE,
        .lock_delay = 300,
        .iddq_reg = PLLP_MISC0,
        .iddq_bit_idx = PLLXP_IDDQ_BIT,
@@ -1721,8 +1718,7 @@ static struct tegra_clk_pll_params pll_p_params = {
        .div_nmp = &pllp_nmp,
        .freq_table = pll_p_freq_table,
        .fixed_rate = 408000000,
-       .flags = TEGRA_PLL_FIXED | TEGRA_PLL_USE_LOCK |
-                TEGRA_PLL_HAS_LOCK_ENABLE | TEGRA_PLL_VCO_OUT,
+       .flags = TEGRA_PLL_FIXED | TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
        .set_defaults = tegra210_pllp_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1750,7 +1746,7 @@ static struct tegra_clk_pll_params pll_a1_params = {
        .ext_misc_reg[2] = PLLA1_MISC2,
        .ext_misc_reg[3] = PLLA1_MISC3,
        .freq_table = pll_cx_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .set_defaults = _plla1_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1787,7 +1783,6 @@ static struct tegra_clk_pll_params pll_a_params = {
        .base_reg = PLLA_BASE,
        .misc_reg = PLLA_MISC0,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLA_MISC_LOCK_ENABLE,
        .lock_delay = 300,
        .round_p_to_pdiv = pll_qlin_p_to_pdiv,
        .pdiv_tohw = pll_qlin_pdiv_to_hw,
@@ -1802,8 +1797,7 @@ static struct tegra_clk_pll_params pll_a_params = {
        .ext_misc_reg[1] = PLLA_MISC1,
        .ext_misc_reg[2] = PLLA_MISC2,
        .freq_table = pll_a_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_MDIV_NEW |
-                TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK | TEGRA_MDIV_NEW,
        .set_defaults = tegra210_plla_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
        .set_gain = tegra210_clk_pll_set_gain,
@@ -1836,7 +1830,6 @@ static struct tegra_clk_pll_params pll_d_params = {
        .base_reg = PLLD_BASE,
        .misc_reg = PLLD_MISC0,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLD_MISC_LOCK_ENABLE,
        .lock_delay = 1000,
        .iddq_reg = PLLD_MISC0,
        .iddq_bit_idx = PLLD_IDDQ_BIT,
@@ -1850,7 +1843,7 @@ static struct tegra_clk_pll_params pll_d_params = {
        .ext_misc_reg[0] = PLLD_MISC0,
        .ext_misc_reg[1] = PLLD_MISC1,
        .freq_table = pll_d_freq_table,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .mdiv_default = 1,
        .set_defaults = tegra210_plld_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
@@ -1876,7 +1869,6 @@ static struct tegra_clk_pll_params pll_d2_params = {
        .base_reg = PLLD2_BASE,
        .misc_reg = PLLD2_MISC0,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLSS_MISC_LOCK_ENABLE,
        .lock_delay = 300,
        .iddq_reg = PLLD2_BASE,
        .iddq_bit_idx = PLLSS_IDDQ_BIT,
@@ -1897,7 +1889,7 @@ static struct tegra_clk_pll_params pll_d2_params = {
        .mdiv_default = 1,
        .freq_table = tegra210_pll_d2_freq_table,
        .set_defaults = tegra210_plld2_set_defaults,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
        .set_gain = tegra210_clk_pll_set_gain,
        .adjust_vco = tegra210_clk_adjust_vco_min,
@@ -1920,7 +1912,6 @@ static struct tegra_clk_pll_params pll_dp_params = {
        .base_reg = PLLDP_BASE,
        .misc_reg = PLLDP_MISC,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLSS_MISC_LOCK_ENABLE,
        .lock_delay = 300,
        .iddq_reg = PLLDP_BASE,
        .iddq_bit_idx = PLLSS_IDDQ_BIT,
@@ -1941,7 +1932,7 @@ static struct tegra_clk_pll_params pll_dp_params = {
        .mdiv_default = 1,
        .freq_table = pll_dp_freq_table,
        .set_defaults = tegra210_plldp_set_defaults,
-       .flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+       .flags = TEGRA_PLL_USE_LOCK,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
        .set_gain = tegra210_clk_pll_set_gain,
        .adjust_vco = tegra210_clk_adjust_vco_min,
@@ -1973,7 +1964,6 @@ static struct tegra_clk_pll_params pll_u_vco_params = {
        .base_reg = PLLU_BASE,
        .misc_reg = PLLU_MISC0,
        .lock_mask = PLL_BASE_LOCK,
-       .lock_enable_bit_idx = PLLU_MISC_LOCK_ENABLE,
        .lock_delay = 1000,
        .iddq_reg = PLLU_MISC0,
        .iddq_bit_idx = PLLU_IDDQ_BIT,
@@ -1983,8 +1973,7 @@ static struct tegra_clk_pll_params pll_u_vco_params = {
        .pdiv_tohw = pll_qlin_pdiv_to_hw,
        .div_nmp = &pllu_nmp,
        .freq_table = pll_u_freq_table,
-       .flags = TEGRA_PLLU | TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE |
-                TEGRA_PLL_VCO_OUT,
+       .flags = TEGRA_PLLU | TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
        .set_defaults = tegra210_pllu_set_defaults,
        .calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -2218,6 +2207,7 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
        [tegra_clk_pll_c4_out1] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT1, .present = true },
        [tegra_clk_pll_c4_out2] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT2, .present = true },
        [tegra_clk_pll_c4_out3] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT3, .present = true },
+       [tegra_clk_apb2ape] = { .dt_id = TEGRA210_CLK_APB2APE, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
@@ -2519,7 +2509,7 @@ static void __init tegra210_pll_init(void __iomem *clk_base,
 
        /* PLLU_VCO */
        val = readl(clk_base + pll_u_vco_params.base_reg);
-       val &= ~BIT(24); /* disable PLLU_OVERRIDE */
+       val &= ~PLLU_BASE_OVERRIDE; /* disable PLLU_OVERRIDE */
        writel(val, clk_base + pll_u_vco_params.base_reg);
 
        clk = tegra_clk_register_pllre("pll_u_vco", "pll_ref", clk_base, pmc,
@@ -2738,8 +2728,6 @@ static struct tegra_clk_init_table init_table[] __initdata = {
        { TEGRA210_CLK_DFLL_REF, TEGRA210_CLK_PLL_P, 51000000, 1 },
        { TEGRA210_CLK_SBC4, TEGRA210_CLK_PLL_P, 12000000, 1 },
        { TEGRA210_CLK_PLL_RE_VCO, TEGRA210_CLK_CLK_MAX, 672000000, 1 },
-       { TEGRA210_CLK_PLL_U_OUT1, TEGRA210_CLK_CLK_MAX, 48000000, 1 },
-       { TEGRA210_CLK_PLL_U_OUT2, TEGRA210_CLK_CLK_MAX, 60000000, 1 },
        { TEGRA210_CLK_XUSB_GATE, TEGRA210_CLK_CLK_MAX, 0, 1 },
        { TEGRA210_CLK_XUSB_SS_SRC, TEGRA210_CLK_PLL_U_480M, 120000000, 0 },
        { TEGRA210_CLK_XUSB_FS_SRC, TEGRA210_CLK_PLL_U_48M, 48000000, 0 },
index 1c300388782ba19d1377bdbbbf9ab05a8939791d..cc739291a3ce47466e87023c2bdfd56951e7bf2f 100644 (file)
@@ -460,7 +460,8 @@ int omap3_noncore_dpll_enable(struct clk_hw *hw)
 
        parent = clk_hw_get_parent(hw);
 
-       if (clk_hw_get_rate(hw) == clk_get_rate(dd->clk_bypass)) {
+       if (clk_hw_get_rate(hw) ==
+           clk_hw_get_rate(__clk_get_hw(dd->clk_bypass))) {
                WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
                r = _omap3_noncore_dpll_bypass(clk);
        } else {
index e62f8cb2c9b53ec4f22811549878ad844e24b612..3bca438ecd19dabbc3df761dedb81c96a463b449 100644 (file)
@@ -78,6 +78,9 @@ static int vco_set(struct clk_icst *icst, struct icst_vco vco)
        ret = regmap_read(icst->map, icst->vcoreg_off, &val);
        if (ret)
                return ret;
+
+       /* Mask the 18 bits used by the VCO */
+       val &= ~0x7ffff;
        val |= vco.v | (vco.r << 9) | (vco.s << 16);
 
        /* This magic unlocks the VCO so it can be controlled */
index 56777f04d2d96381593b4344ac3306bb5dcafbb4..33db7406c0e274e8c39c7b0d71509a117c324022 100644 (file)
@@ -30,6 +30,8 @@ config CLKSRC_MMIO
 config DIGICOLOR_TIMER
        bool "Digicolor timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       select CLKSRC_MMIO
+       depends on HAS_IOMEM
        help
          Enables the support for the digicolor timer driver.
 
@@ -55,6 +57,7 @@ config ARMADA_370_XP_TIMER
        bool "Armada 370 and XP timer driver" if COMPILE_TEST
        depends on ARM
        select CLKSRC_OF
+       select CLKSRC_MMIO
        help
          Enables the support for the Armada 370 and XP timer driver.
 
@@ -76,6 +79,7 @@ config ORION_TIMER
 config SUN4I_TIMER
        bool "Sun4i timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Enables support for the Sun4i timer.
@@ -89,6 +93,7 @@ config SUN5I_HSTIMER
 
 config TEGRA_TIMER
        bool "Tegra timer driver" if COMPILE_TEST
+       select CLKSRC_MMIO
        depends on ARM
        help
          Enables support for the Tegra driver.
@@ -96,6 +101,7 @@ config TEGRA_TIMER
 config VT8500_TIMER
        bool "VT8500 timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Enables support for the VT8500 driver.
 
@@ -131,6 +137,7 @@ config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
 config CLKSRC_DBX500_PRCMU
        bool "Clocksource PRCMU Timer" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Use the always on PRCMU Timer as clocksource
 
@@ -248,6 +255,7 @@ config CLKSRC_EXYNOS_MCT
 config CLKSRC_SAMSUNG_PWM
        bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          This is a new clocksource driver for the PWM timer found in
          Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -257,12 +265,14 @@ config CLKSRC_SAMSUNG_PWM
 config FSL_FTM_TIMER
        bool "Freescale FlexTimer Module driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Support for Freescale FlexTimer Module (FTM) timer.
 
 config VF_PIT_TIMER
        bool
+       select CLKSRC_MMIO
        help
          Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
 
@@ -360,6 +370,7 @@ config CLKSRC_TANGO_XTAL
 config CLKSRC_PXA
        bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          This enables OST0 support available on PXA and SA-11x0
@@ -394,6 +405,7 @@ config CLKSRC_ST_LPC
        bool "Low power clocksource found in the LPC" if COMPILE_TEST
        select CLKSRC_OF if OF
        depends on HAS_IOMEM
+       select CLKSRC_MMIO
        help
          Enable this option to use the Low Power controller timer
          as clocksource.
index 6ee91401918eba99a33c67b554da853747a0c5fa..4da2af9694a23b267cebdfa52da624e9f43b61fe 100644 (file)
@@ -98,7 +98,8 @@ static int tc_shutdown(struct clock_event_device *d)
 
        __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
        __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
-       clk_disable(tcd->clk);
+       if (!clockevent_state_detached(d))
+               clk_disable(tcd->clk);
 
        return 0;
 }
index 9bc37c437874a6ba185799680a093598cb213948..0ca74d07005830cec5abaf92a84c59af17325616 100644 (file)
@@ -142,15 +142,16 @@ static int allocate_resources(int cpu, struct device **cdev,
 
 try_again:
        cpu_reg = regulator_get_optional(cpu_dev, reg);
-       if (IS_ERR(cpu_reg)) {
+       ret = PTR_ERR_OR_ZERO(cpu_reg);
+       if (ret) {
                /*
                 * If cpu's regulator supply node is present, but regulator is
                 * not yet registered, we should try defering probe.
                 */
-               if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+               if (ret == -EPROBE_DEFER) {
                        dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
                                cpu);
-                       return -EPROBE_DEFER;
+                       return ret;
                }
 
                /* Try with "cpu-supply" */
@@ -159,18 +160,16 @@ try_again:
                        goto try_again;
                }
 
-               dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-                       cpu, PTR_ERR(cpu_reg));
+               dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
        }
 
        cpu_clk = clk_get(cpu_dev, NULL);
-       if (IS_ERR(cpu_clk)) {
+       ret = PTR_ERR_OR_ZERO(cpu_clk);
+       if (ret) {
                /* put regulator */
                if (!IS_ERR(cpu_reg))
                        regulator_put(cpu_reg);
 
-               ret = PTR_ERR(cpu_clk);
-
                /*
                 * If cpu's clk node is present, but clock is not yet
                 * registered, we should try defering probe.
index c35e7da1ed7a185fd95d0f0ae7f7b2961d235a0f..e979ec78b69522edefb2a49b6497c2bbb83e1e62 100644 (file)
@@ -48,11 +48,11 @@ static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy,
                                          bool active)
 {
        do {
-               policy = list_next_entry(policy, policy_list);
-
                /* No more policies in the list */
-               if (&policy->policy_list == &cpufreq_policy_list)
+               if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
                        return NULL;
+
+               policy = list_next_entry(policy, policy_list);
        } while (!suitable_policy(policy, active));
 
        return policy;
index bab3a514ec128254d8cff0ccdeaf02f427625d06..e0d111024d4840e8078fc0553c9ef7dabed71447 100644 (file)
@@ -387,16 +387,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
        if (!have_governor_per_policy())
                cdata->gdbs_data = dbs_data;
 
+       policy->governor_data = dbs_data;
+
        ret = sysfs_create_group(get_governor_parent_kobj(policy),
                                 get_sysfs_attr(dbs_data));
        if (ret)
                goto reset_gdbs_data;
 
-       policy->governor_data = dbs_data;
-
        return 0;
 
 reset_gdbs_data:
+       policy->governor_data = NULL;
+
        if (!have_governor_per_policy())
                cdata->gdbs_data = NULL;
        cdata->exit(dbs_data, !policy->governor->initialized);
@@ -417,16 +419,19 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy,
        if (!cdbs->shared || cdbs->shared->policy)
                return -EBUSY;
 
-       policy->governor_data = NULL;
        if (!--dbs_data->usage_count) {
                sysfs_remove_group(get_governor_parent_kobj(policy),
                                   get_sysfs_attr(dbs_data));
 
+               policy->governor_data = NULL;
+
                if (!have_governor_per_policy())
                        cdata->gdbs_data = NULL;
 
                cdata->exit(dbs_data, policy->governor->initialized == 1);
                kfree(dbs_data);
+       } else {
+               policy->governor_data = NULL;
        }
 
        free_common_dbs_info(policy, cdata);
index 1d99c97defa9204f52ad8605fd5f25c1d2ad0540..096377232747652f99d8a89e0681da7134b716fe 100644 (file)
@@ -202,7 +202,7 @@ static void __init pxa_cpufreq_init_voltages(void)
        }
 }
 #else
-static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
+static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 {
        return 0;
 }
index 344058f8501a2c2ee888189950b79f615e815a02..d5657d50ac4044d5da886050e822517fe1134b6f 100644 (file)
@@ -119,7 +119,6 @@ struct cpuidle_coupled {
 
 #define CPUIDLE_COUPLED_NOT_IDLE       (-1)
 
-static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
index 046423b0c5ca22aad3455101d3cafae7fc4dda87..f996efc56605a3d4739ae0bcac83099fd65b0751 100644 (file)
@@ -153,7 +153,7 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         * be frozen safely.
         */
        index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
-       if (index >= 0)
+       if (index > 0)
                enter_freeze_proper(drv, dev, index);
 
        return index;
index 3dd69df9c970fa0e86417d6f4e0a147fe8f65885..07d494276aad04195c9e7247089649b302d3082c 100644 (file)
@@ -381,6 +381,7 @@ config CRYPTO_DEV_BFIN_CRC
 
 config CRYPTO_DEV_ATMEL_AES
        tristate "Support for Atmel AES hw accelerator"
+       depends on HAS_DMA
        depends on AT_XDMAC || AT_HDMAC || COMPILE_TEST
        select CRYPTO_AES
        select CRYPTO_AEAD
index 5621612ee92169da0e7f934a6c45a4ee29690f97..3eb3f1279fb7e93306f903c24ce7719ee0672b70 100644 (file)
@@ -280,6 +280,7 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
        case AES_GCMHR(2):
        case AES_GCMHR(3):
                snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
+               break;
 
        default:
                snprintf(tmp, sz, "0x%02x", offset);
@@ -399,7 +400,7 @@ static int atmel_aes_hw_init(struct atmel_aes_dev *dd)
 {
        int err;
 
-       err = clk_prepare_enable(dd->iclk);
+       err = clk_enable(dd->iclk);
        if (err)
                return err;
 
@@ -429,7 +430,7 @@ static int atmel_aes_hw_version_init(struct atmel_aes_dev *dd)
 
        dev_info(dd->dev, "version: 0x%x\n", dd->hw_version);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
        return 0;
 }
 
@@ -447,7 +448,7 @@ static inline bool atmel_aes_is_encrypt(const struct atmel_aes_dev *dd)
 
 static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 {
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
        dd->flags &= ~AES_FLAGS_BUSY;
 
        if (dd->is_async)
@@ -2090,10 +2091,14 @@ static int atmel_aes_probe(struct platform_device *pdev)
                goto res_err;
        }
 
-       err = atmel_aes_hw_version_init(aes_dd);
+       err = clk_prepare(aes_dd->iclk);
        if (err)
                goto res_err;
 
+       err = atmel_aes_hw_version_init(aes_dd);
+       if (err)
+               goto iclk_unprepare;
+
        atmel_aes_get_cap(aes_dd);
 
        err = atmel_aes_buff_init(aes_dd);
@@ -2126,6 +2131,8 @@ err_algs:
 err_aes_dma:
        atmel_aes_buff_cleanup(aes_dd);
 err_aes_buff:
+iclk_unprepare:
+       clk_unprepare(aes_dd->iclk);
 res_err:
        tasklet_kill(&aes_dd->done_task);
        tasklet_kill(&aes_dd->queue_task);
@@ -2154,6 +2161,8 @@ static int atmel_aes_remove(struct platform_device *pdev)
        atmel_aes_dma_cleanup(aes_dd);
        atmel_aes_buff_cleanup(aes_dd);
 
+       clk_unprepare(aes_dd->iclk);
+
        return 0;
 }
 
index 20de861aa0ea6c275edb746e14fcb372344fcd48..8bf9914d4d150b439e087688b51c884fdd1d022c 100644 (file)
@@ -782,7 +782,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err)
        dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
                        SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
 
        if (req->base.complete)
                req->base.complete(&req->base, err);
@@ -795,7 +795,7 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
 {
        int err;
 
-       err = clk_prepare_enable(dd->iclk);
+       err = clk_enable(dd->iclk);
        if (err)
                return err;
 
@@ -822,7 +822,7 @@ static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd)
        dev_info(dd->dev,
                        "version: 0x%x\n", dd->hw_version);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
 }
 
 static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
@@ -1410,6 +1410,10 @@ static int atmel_sha_probe(struct platform_device *pdev)
                goto res_err;
        }
 
+       err = clk_prepare(sha_dd->iclk);
+       if (err)
+               goto res_err;
+
        atmel_sha_hw_version_init(sha_dd);
 
        atmel_sha_get_cap(sha_dd);
@@ -1421,12 +1425,12 @@ static int atmel_sha_probe(struct platform_device *pdev)
                        if (IS_ERR(pdata)) {
                                dev_err(&pdev->dev, "platform data not available\n");
                                err = PTR_ERR(pdata);
-                               goto res_err;
+                               goto iclk_unprepare;
                        }
                }
                if (!pdata->dma_slave) {
                        err = -ENXIO;
-                       goto res_err;
+                       goto iclk_unprepare;
                }
                err = atmel_sha_dma_init(sha_dd, pdata);
                if (err)
@@ -1457,6 +1461,8 @@ err_algs:
        if (sha_dd->caps.has_dma)
                atmel_sha_dma_cleanup(sha_dd);
 err_sha_dma:
+iclk_unprepare:
+       clk_unprepare(sha_dd->iclk);
 res_err:
        tasklet_kill(&sha_dd->done_task);
 sha_dd_err:
@@ -1483,12 +1489,7 @@ static int atmel_sha_remove(struct platform_device *pdev)
        if (sha_dd->caps.has_dma)
                atmel_sha_dma_cleanup(sha_dd);
 
-       iounmap(sha_dd->io_base);
-
-       clk_put(sha_dd->iclk);
-
-       if (sha_dd->irq >= 0)
-               free_irq(sha_dd->irq, sha_dd);
+       clk_unprepare(sha_dd->iclk);
 
        return 0;
 }
index 8abb4bc548cc06a7a8ae97779701d308255ff050..69d4a1326feefa6ff4c404d7e093a658728f6f4a 100644 (file)
@@ -534,8 +534,8 @@ static int caam_probe(struct platform_device *pdev)
         * long pointers in master configuration register
         */
        clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH |
-                     MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ?
-                                       MCFGR_LONG_PTR : 0));
+                     MCFGR_AWCACHE_BUFF | MCFGR_WDENABLE |
+                     (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
 
        /*
         *  Read the Compile Time paramters and SCFGR to determine
index 0643e3366e3309de88a03e687a2d5353f5715a22..c0656e7f37b5993672002a8a192dc1c9dcf0c4bd 100644 (file)
@@ -306,7 +306,7 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
                return -ENOMEM;
 
        dma->padding_pool = dmam_pool_create("cesa_padding", dev, 72, 1, 0);
-       if (!dma->cache_pool)
+       if (!dma->padding_pool)
                return -ENOMEM;
 
        cesa->dma = dma;
index 0ac0ba86761110db09b9ec68c6c213d6529784eb..1e480f140663530a699d8bf51b402bce394a935e 100644 (file)
@@ -389,7 +389,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
 {
        unsigned int base_cnt, cur_cnt;
        unsigned char ae;
-       unsigned int times = MAX_RETRY_TIMES;
+       int times = MAX_RETRY_TIMES;
 
        for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
                qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
@@ -402,7 +402,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
                        cur_cnt &= 0xffff;
                } while (times-- && (cur_cnt == base_cnt));
 
-               if (!times) {
+               if (times < 0) {
                        pr_err("QAT: AE%d is inactive!!\n", ae);
                        return -EFAULT;
                }
@@ -453,7 +453,11 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
        void __iomem *csr_addr =
                        (void __iomem *)((uintptr_t)handle->hal_ep_csr_addr_v +
                        ESRAM_AUTO_INIT_CSR_OFFSET);
-       unsigned int csr_val, times = 30;
+       unsigned int csr_val;
+       int times = 30;
+
+       if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID)
+               return 0;
 
        csr_val = ADF_CSR_RD(csr_addr, 0);
        if ((csr_val & ESRAM_AUTO_TINIT) && (csr_val & ESRAM_AUTO_TINIT_DONE))
@@ -467,7 +471,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
                qat_hal_wait_cycles(handle, 0, ESRAM_AUTO_INIT_USED_CYCLES, 0);
                csr_val = ADF_CSR_RD(csr_addr, 0);
        } while (!(csr_val & ESRAM_AUTO_TINIT_DONE) && times--);
-       if ((!times)) {
+       if ((times < 0)) {
                pr_err("QAT: Fail to init eSram!\n");
                return -EFAULT;
        }
@@ -658,7 +662,7 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
                        ret = qat_hal_wait_cycles(handle, ae, 20, 1);
                } while (ret && times--);
 
-               if (!times) {
+               if (times < 0) {
                        pr_err("QAT: clear GPR of AE %d failed", ae);
                        return -EINVAL;
                }
@@ -693,14 +697,12 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
        struct adf_bar *misc_bar =
                        &pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)];
-       struct adf_bar *sram_bar =
-                       &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+       struct adf_bar *sram_bar;
 
        handle = kzalloc(sizeof(*handle), GFP_KERNEL);
        if (!handle)
                return -ENOMEM;
 
-       handle->hal_sram_addr_v = sram_bar->virt_addr;
        handle->hal_cap_g_ctl_csr_addr_v =
                (void __iomem *)((uintptr_t)misc_bar->virt_addr +
                                 ICP_QAT_CAP_OFFSET);
@@ -714,6 +716,11 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
                (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v +
                                 LOCAL_TO_XFER_REG_OFFSET);
        handle->pci_dev = pci_info->pci_dev;
+       if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) {
+               sram_bar =
+                       &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+               handle->hal_sram_addr_v = sram_bar->virt_addr;
+       }
        handle->fw_auth = (handle->pci_dev->device ==
                           ADF_DH895XCC_PCI_DEVICE_ID) ? false : true;
        handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL);
index 848b93ee930fd8ec204a332755c57b18457bfede..fe9dce0245bf0c7d32b2c8eb3590e7f73651ae6d 100644 (file)
@@ -500,6 +500,8 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq,
        clk_set_min_rate(tegra->emc_clock, rate);
        clk_set_rate(tegra->emc_clock, 0);
 
+       *freq = rate;
+
        return 0;
 }
 
index e893318560db9f79300579ff01e73b07b5baae38..5ad0ec1f0e29f750eee5c70889df5427d7413556 100644 (file)
@@ -156,7 +156,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
        /* Enable interrupts */
        channel_set_bit(dw, MASK.XFER, dwc->mask);
-       channel_set_bit(dw, MASK.BLOCK, dwc->mask);
        channel_set_bit(dw, MASK.ERROR, dwc->mask);
 
        dwc->initialized = true;
@@ -588,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 
                spin_unlock_irqrestore(&dwc->lock, flags);
        }
+
+       /* Re-enable interrupts */
+       channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 }
 
 /* ------------------------------------------------------------------------- */
@@ -618,11 +620,8 @@ static void dw_dma_tasklet(unsigned long data)
                        dwc_scan_descriptors(dw, dwc);
        }
 
-       /*
-        * Re-enable interrupts.
-        */
+       /* Re-enable interrupts */
        channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
-       channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
        channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
 }
 
@@ -1261,6 +1260,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 int dw_dma_cyclic_start(struct dma_chan *chan)
 {
        struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+       struct dw_dma           *dw = to_dw_dma(chan->device);
        unsigned long           flags;
 
        if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
@@ -1269,7 +1269,12 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
        }
 
        spin_lock_irqsave(&dwc->lock, flags);
+
+       /* Enable interrupts to perform cyclic transfer */
+       channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+
        dwc_dostart(dwc, dwc->cdesc->desc[0]);
+
        spin_unlock_irqrestore(&dwc->lock, flags);
 
        return 0;
index 4c30fdd092b3b1e5b7e6050b7e7d1afee6c11e57..358f9689a3f5ace77d3dfd601f1873b880704060 100644 (file)
@@ -108,6 +108,10 @@ static const struct pci_device_id dw_pci_id_table[] = {
 
        /* Haswell */
        { PCI_VDEVICE(INTEL, 0x9c60) },
+
+       /* Broadwell */
+       { PCI_VDEVICE(INTEL, 0x9ce0) },
+
        { }
 };
 MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
index d92d655494068992f5a689ff83604158115bee4d..e3d7fcb69b4c2e4ffc4221c8ea8ec4e360648fd7 100644 (file)
 #define GET_NUM_REGN(x)                ((x & 0x300000) >> 20) /* bits 20-21 */
 #define CHMAP_EXIST            BIT(24)
 
+/* CCSTAT register */
+#define EDMA_CCSTAT_ACTV       BIT(4)
+
 /*
  * Max of 20 segments per channel to conserve PaRAM slots
  * Also note that MAX_NR_SG should be atleast the no.of periods
@@ -1680,9 +1683,20 @@ static void edma_issue_pending(struct dma_chan *chan)
        spin_unlock_irqrestore(&echan->vchan.lock, flags);
 }
 
+/*
+ * This limit exists to avoid a possible infinite loop when waiting for proof
+ * that a particular transfer is completed. This limit can be hit if there
+ * are large bursts to/from slow devices or the CPU is never able to catch
+ * the DMA hardware idle. On an AM335x transfering 48 bytes from the UART
+ * RX-FIFO, as many as 55 loops have been seen.
+ */
+#define EDMA_MAX_TR_WAIT_LOOPS 1000
+
 static u32 edma_residue(struct edma_desc *edesc)
 {
        bool dst = edesc->direction == DMA_DEV_TO_MEM;
+       int loop_count = EDMA_MAX_TR_WAIT_LOOPS;
+       struct edma_chan *echan = edesc->echan;
        struct edma_pset *pset = edesc->pset;
        dma_addr_t done, pos;
        int i;
@@ -1691,7 +1705,32 @@ static u32 edma_residue(struct edma_desc *edesc)
         * We always read the dst/src position from the first RamPar
         * pset. That's the one which is active now.
         */
-       pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst);
+       pos = edma_get_position(echan->ecc, echan->slot[0], dst);
+
+       /*
+        * "pos" may represent a transfer request that is still being
+        * processed by the EDMACC or EDMATC. We will busy wait until
+        * any one of the situations occurs:
+        *   1. the DMA hardware is idle
+        *   2. a new transfer request is setup
+        *   3. we hit the loop limit
+        */
+       while (edma_read(echan->ecc, EDMA_CCSTAT) & EDMA_CCSTAT_ACTV) {
+               /* check if a new transfer request is setup */
+               if (edma_get_position(echan->ecc,
+                                     echan->slot[0], dst) != pos) {
+                       break;
+               }
+
+               if (!--loop_count) {
+                       dev_dbg_ratelimited(echan->vchan.chan.device->dev,
+                               "%s: timeout waiting for PaRAM update\n",
+                               __func__);
+                       break;
+               }
+
+               cpu_relax();
+       }
 
        /*
         * Cyclic is simple. Just subtract pset[0].addr from pos.
index 1d5df2ef148b16d3c379a11e14a7da5283f9d5b8..21539d5c54c3d5c2d2dc3244650691bf414cf879 100644 (file)
@@ -861,32 +861,42 @@ void ioat_timer_event(unsigned long data)
                        return;
        }
 
+       spin_lock_bh(&ioat_chan->cleanup_lock);
+
+       /* handle the no-actives case */
+       if (!ioat_ring_active(ioat_chan)) {
+               spin_lock_bh(&ioat_chan->prep_lock);
+               check_active(ioat_chan);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+               spin_unlock_bh(&ioat_chan->cleanup_lock);
+               return;
+       }
+
        /* if we haven't made progress and we have already
         * acknowledged a pending completion once, then be more
         * forceful with a restart
         */
-       spin_lock_bh(&ioat_chan->cleanup_lock);
        if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
                __cleanup(ioat_chan, phys_complete);
        else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
+               u32 chanerr;
+
+               chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+               dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
+               dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
+                        status, chanerr);
+               dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n",
+                        ioat_ring_active(ioat_chan));
+
                spin_lock_bh(&ioat_chan->prep_lock);
                ioat_restart_channel(ioat_chan);
                spin_unlock_bh(&ioat_chan->prep_lock);
                spin_unlock_bh(&ioat_chan->cleanup_lock);
                return;
-       } else {
+       } else
                set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
-               mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
-       }
-
 
-       if (ioat_ring_active(ioat_chan))
-               mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
-       else {
-               spin_lock_bh(&ioat_chan->prep_lock);
-               check_active(ioat_chan);
-               spin_unlock_bh(&ioat_chan->prep_lock);
-       }
+       mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
        spin_unlock_bh(&ioat_chan->cleanup_lock);
 }
 
index 756eca8c4cf8f291025a3ad44f7cbb9981aeb5fd..10e6774ab2a2248d0a04d935b773c15a38519f46 100644 (file)
@@ -221,7 +221,7 @@ sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
        }
 
        if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
-           efivar_validate(name, data, size) == false) {
+           efivar_validate(vendor, name, data, size) == false) {
                printk(KERN_ERR "efivars: Malformed variable content\n");
                return -EINVAL;
        }
@@ -447,7 +447,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
        }
 
        if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
-           efivar_validate(name, data, size) == false) {
+           efivar_validate(new_var->VendorGuid, name, data,
+                           size) == false) {
                printk(KERN_ERR "efivars: Malformed variable content\n");
                return -EINVAL;
        }
@@ -540,38 +541,30 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 static int
 efivar_create_sysfs_entry(struct efivar_entry *new_var)
 {
-       int i, short_name_size;
+       int short_name_size;
        char *short_name;
-       unsigned long variable_name_size;
-       efi_char16_t *variable_name;
+       unsigned long utf8_name_size;
+       efi_char16_t *variable_name = new_var->var.VariableName;
        int ret;
 
-       variable_name = new_var->var.VariableName;
-       variable_name_size = ucs2_strlen(variable_name) * sizeof(efi_char16_t);
-
        /*
-        * Length of the variable bytes in ASCII, plus the '-' separator,
+        * Length of the variable bytes in UTF8, plus the '-' separator,
         * plus the GUID, plus trailing NUL
         */
-       short_name_size = variable_name_size / sizeof(efi_char16_t)
-                               + 1 + EFI_VARIABLE_GUID_LEN + 1;
-
-       short_name = kzalloc(short_name_size, GFP_KERNEL);
+       utf8_name_size = ucs2_utf8size(variable_name);
+       short_name_size = utf8_name_size + 1 + EFI_VARIABLE_GUID_LEN + 1;
 
+       short_name = kmalloc(short_name_size, GFP_KERNEL);
        if (!short_name)
                return -ENOMEM;
 
-       /* Convert Unicode to normal chars (assume top bits are 0),
-          ala UTF-8 */
-       for (i=0; i < (int)(variable_name_size / sizeof(efi_char16_t)); i++) {
-               short_name[i] = variable_name[i] & 0xFF;
-       }
+       ucs2_as_utf8(short_name, variable_name, short_name_size);
+
        /* This is ugly, but necessary to separate one vendor's
           private variables from another's.         */
-
-       *(short_name + strlen(short_name)) = '-';
+       short_name[utf8_name_size] = '-';
        efi_guid_to_str(&new_var->var.VendorGuid,
-                        short_name + strlen(short_name));
+                        short_name + utf8_name_size + 1);
 
        new_var->kobj.kset = efivars_kset;
 
index 70a0fb10517f94ea5b28bada280d9935f0693cc7..7f2ea21c730dd76a86f3ab5a1e253878878960a7 100644 (file)
@@ -165,67 +165,133 @@ validate_ascii_string(efi_char16_t *var_name, int match, u8 *buffer,
 }
 
 struct variable_validate {
+       efi_guid_t vendor;
        char *name;
        bool (*validate)(efi_char16_t *var_name, int match, u8 *data,
                         unsigned long len);
 };
 
+/*
+ * This is the list of variables we need to validate, as well as the
+ * whitelist for what we think is safe not to default to immutable.
+ *
+ * If it has a validate() method that's not NULL, it'll go into the
+ * validation routine.  If not, it is assumed valid, but still used for
+ * whitelisting.
+ *
+ * Note that it's sorted by {vendor,name}, but globbed names must come after
+ * any other name with the same prefix.
+ */
 static const struct variable_validate variable_validate[] = {
-       { "BootNext", validate_uint16 },
-       { "BootOrder", validate_boot_order },
-       { "DriverOrder", validate_boot_order },
-       { "Boot*", validate_load_option },
-       { "Driver*", validate_load_option },
-       { "ConIn", validate_device_path },
-       { "ConInDev", validate_device_path },
-       { "ConOut", validate_device_path },
-       { "ConOutDev", validate_device_path },
-       { "ErrOut", validate_device_path },
-       { "ErrOutDev", validate_device_path },
-       { "Timeout", validate_uint16 },
-       { "Lang", validate_ascii_string },
-       { "PlatformLang", validate_ascii_string },
-       { "", NULL },
+       { EFI_GLOBAL_VARIABLE_GUID, "BootNext", validate_uint16 },
+       { EFI_GLOBAL_VARIABLE_GUID, "BootOrder", validate_boot_order },
+       { EFI_GLOBAL_VARIABLE_GUID, "Boot*", validate_load_option },
+       { EFI_GLOBAL_VARIABLE_GUID, "DriverOrder", validate_boot_order },
+       { EFI_GLOBAL_VARIABLE_GUID, "Driver*", validate_load_option },
+       { EFI_GLOBAL_VARIABLE_GUID, "ConIn", validate_device_path },
+       { EFI_GLOBAL_VARIABLE_GUID, "ConInDev", validate_device_path },
+       { EFI_GLOBAL_VARIABLE_GUID, "ConOut", validate_device_path },
+       { EFI_GLOBAL_VARIABLE_GUID, "ConOutDev", validate_device_path },
+       { EFI_GLOBAL_VARIABLE_GUID, "ErrOut", validate_device_path },
+       { EFI_GLOBAL_VARIABLE_GUID, "ErrOutDev", validate_device_path },
+       { EFI_GLOBAL_VARIABLE_GUID, "Lang", validate_ascii_string },
+       { EFI_GLOBAL_VARIABLE_GUID, "OsIndications", NULL },
+       { EFI_GLOBAL_VARIABLE_GUID, "PlatformLang", validate_ascii_string },
+       { EFI_GLOBAL_VARIABLE_GUID, "Timeout", validate_uint16 },
+       { LINUX_EFI_CRASH_GUID, "*", NULL },
+       { NULL_GUID, "", NULL },
 };
 
+static bool
+variable_matches(const char *var_name, size_t len, const char *match_name,
+                int *match)
+{
+       for (*match = 0; ; (*match)++) {
+               char c = match_name[*match];
+               char u = var_name[*match];
+
+               /* Wildcard in the matching name means we've matched */
+               if (c == '*')
+                       return true;
+
+               /* Case sensitive match */
+               if (!c && *match == len)
+                       return true;
+
+               if (c != u)
+                       return false;
+
+               if (!c)
+                       return true;
+       }
+       return true;
+}
+
 bool
-efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len)
+efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data,
+               unsigned long data_size)
 {
        int i;
-       u16 *unicode_name = var_name;
+       unsigned long utf8_size;
+       u8 *utf8_name;
 
-       for (i = 0; variable_validate[i].validate != NULL; i++) {
-               const char *name = variable_validate[i].name;
-               int match;
+       utf8_size = ucs2_utf8size(var_name);
+       utf8_name = kmalloc(utf8_size + 1, GFP_KERNEL);
+       if (!utf8_name)
+               return false;
 
-               for (match = 0; ; match++) {
-                       char c = name[match];
-                       u16 u = unicode_name[match];
+       ucs2_as_utf8(utf8_name, var_name, utf8_size);
+       utf8_name[utf8_size] = '\0';
 
-                       /* All special variables are plain ascii */
-                       if (u > 127)
-                               return true;
+       for (i = 0; variable_validate[i].name[0] != '\0'; i++) {
+               const char *name = variable_validate[i].name;
+               int match = 0;
 
-                       /* Wildcard in the matching name means we've matched */
-                       if (c == '*')
-                               return variable_validate[i].validate(var_name,
-                                                            match, data, len);
+               if (efi_guidcmp(vendor, variable_validate[i].vendor))
+                       continue;
 
-                       /* Case sensitive match */
-                       if (c != u)
+               if (variable_matches(utf8_name, utf8_size+1, name, &match)) {
+                       if (variable_validate[i].validate == NULL)
                                break;
-
-                       /* Reached the end of the string while matching */
-                       if (!c)
-                               return variable_validate[i].validate(var_name,
-                                                            match, data, len);
+                       kfree(utf8_name);
+                       return variable_validate[i].validate(var_name, match,
+                                                            data, data_size);
                }
        }
-
+       kfree(utf8_name);
        return true;
 }
 EXPORT_SYMBOL_GPL(efivar_validate);
 
+bool
+efivar_variable_is_removable(efi_guid_t vendor, const char *var_name,
+                            size_t len)
+{
+       int i;
+       bool found = false;
+       int match = 0;
+
+       /*
+        * Check if our variable is in the validated variables list
+        */
+       for (i = 0; variable_validate[i].name[0] != '\0'; i++) {
+               if (efi_guidcmp(variable_validate[i].vendor, vendor))
+                       continue;
+
+               if (variable_matches(var_name, len,
+                                    variable_validate[i].name, &match)) {
+                       found = true;
+                       break;
+               }
+       }
+
+       /*
+        * If it's in our list, it is removable.
+        */
+       return found;
+}
+EXPORT_SYMBOL_GPL(efivar_variable_is_removable);
+
 static efi_status_t
 check_var_size(u32 attributes, unsigned long size)
 {
@@ -852,7 +918,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes,
 
        *set = false;
 
-       if (efivar_validate(name, data, *size) == false)
+       if (efivar_validate(*vendor, name, data, *size) == false)
                return -EINVAL;
 
        /*
index 2aeaebd1c6e7af574fca9058cd0cf758e635943b..3f87a03abc222bc31cbc68eef98993c9e3303ad4 100644 (file)
@@ -312,8 +312,8 @@ static int altera_gpio_probe(struct platform_device *pdev)
                handle_simple_irq, IRQ_TYPE_NONE);
 
        if (ret) {
-               dev_info(&pdev->dev, "could not add irqchip\n");
-               return ret;
+               dev_err(&pdev->dev, "could not add irqchip\n");
+               goto teardown;
        }
 
        gpiochip_set_chained_irqchip(&altera_gc->mmchip.gc,
@@ -326,6 +326,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
 skip_irq:
        return 0;
 teardown:
+       of_mm_gpiochip_remove(&altera_gc->mmchip);
        pr_err("%s: registration failed with status %d\n",
                node->full_name, ret);
 
index ec58f4288649b4ee1f0c98f7761439a525ac093e..cd007a67b3021e6e384889bf06ff7c9b292a7906 100644 (file)
@@ -195,7 +195,7 @@ static int davinci_gpio_of_xlate(struct gpio_chip *gc,
 static int davinci_gpio_probe(struct platform_device *pdev)
 {
        int i, base;
-       unsigned ngpio;
+       unsigned ngpio, nbank;
        struct davinci_gpio_controller *chips;
        struct davinci_gpio_platform_data *pdata;
        struct davinci_gpio_regs __iomem *regs;
@@ -224,8 +224,9 @@ static int davinci_gpio_probe(struct platform_device *pdev)
        if (WARN_ON(ARCH_NR_GPIOS < ngpio))
                ngpio = ARCH_NR_GPIOS;
 
+       nbank = DIV_ROUND_UP(ngpio, 32);
        chips = devm_kzalloc(dev,
-                            ngpio * sizeof(struct davinci_gpio_controller),
+                            nbank * sizeof(struct davinci_gpio_controller),
                             GFP_KERNEL);
        if (!chips)
                return -ENOMEM;
@@ -511,7 +512,7 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev)
                        return irq;
                }
 
-               irq_domain = irq_domain_add_legacy(NULL, ngpio, irq, 0,
+               irq_domain = irq_domain_add_legacy(dev->of_node, ngpio, irq, 0,
                                                        &davinci_gpio_irq_ops,
                                                        chips);
                if (!irq_domain) {
index 66f729eaf00bb99f09ef5960daf8c7630ceccb96..20c9539abc36e887dea47d00982e308be412d93e 100644 (file)
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
        amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
 
 # add asic specific block
-amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o gmc_v7_0.o cik_ih.o kv_smc.o kv_dpm.o \
+amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
        ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
        amdgpu_amdkfd_gfx_v7.o
 
@@ -34,6 +34,7 @@ amdgpu-y += \
 
 # add GMC block
 amdgpu-y += \
+       gmc_v7_0.o \
        gmc_v8_0.o
 
 # add IH block
index 313b0cc8d676d90d1f4073c28579f096afea1a74..5e7770f9a415be24140df77708e9d9d39dee7faa 100644 (file)
@@ -87,6 +87,8 @@ extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_enable_semaphores;
 extern int amdgpu_powerplay;
+extern unsigned amdgpu_pcie_gen_cap;
+extern unsigned amdgpu_pcie_lane_cap;
 
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS         3000
 #define AMDGPU_MAX_USEC_TIMEOUT                        100000  /* 100 ms */
@@ -132,47 +134,6 @@ extern int amdgpu_powerplay;
 #define AMDGPU_RESET_VCE                       (1 << 13)
 #define AMDGPU_RESET_VCE1                      (1 << 14)
 
-/* CG block flags */
-#define AMDGPU_CG_BLOCK_GFX                    (1 << 0)
-#define AMDGPU_CG_BLOCK_MC                     (1 << 1)
-#define AMDGPU_CG_BLOCK_SDMA                   (1 << 2)
-#define AMDGPU_CG_BLOCK_UVD                    (1 << 3)
-#define AMDGPU_CG_BLOCK_VCE                    (1 << 4)
-#define AMDGPU_CG_BLOCK_HDP                    (1 << 5)
-#define AMDGPU_CG_BLOCK_BIF                    (1 << 6)
-
-/* CG flags */
-#define AMDGPU_CG_SUPPORT_GFX_MGCG             (1 << 0)
-#define AMDGPU_CG_SUPPORT_GFX_MGLS             (1 << 1)
-#define AMDGPU_CG_SUPPORT_GFX_CGCG             (1 << 2)
-#define AMDGPU_CG_SUPPORT_GFX_CGLS             (1 << 3)
-#define AMDGPU_CG_SUPPORT_GFX_CGTS             (1 << 4)
-#define AMDGPU_CG_SUPPORT_GFX_CGTS_LS          (1 << 5)
-#define AMDGPU_CG_SUPPORT_GFX_CP_LS            (1 << 6)
-#define AMDGPU_CG_SUPPORT_GFX_RLC_LS           (1 << 7)
-#define AMDGPU_CG_SUPPORT_MC_LS                        (1 << 8)
-#define AMDGPU_CG_SUPPORT_MC_MGCG              (1 << 9)
-#define AMDGPU_CG_SUPPORT_SDMA_LS              (1 << 10)
-#define AMDGPU_CG_SUPPORT_SDMA_MGCG            (1 << 11)
-#define AMDGPU_CG_SUPPORT_BIF_LS               (1 << 12)
-#define AMDGPU_CG_SUPPORT_UVD_MGCG             (1 << 13)
-#define AMDGPU_CG_SUPPORT_VCE_MGCG             (1 << 14)
-#define AMDGPU_CG_SUPPORT_HDP_LS               (1 << 15)
-#define AMDGPU_CG_SUPPORT_HDP_MGCG             (1 << 16)
-
-/* PG flags */
-#define AMDGPU_PG_SUPPORT_GFX_PG               (1 << 0)
-#define AMDGPU_PG_SUPPORT_GFX_SMG              (1 << 1)
-#define AMDGPU_PG_SUPPORT_GFX_DMG              (1 << 2)
-#define AMDGPU_PG_SUPPORT_UVD                  (1 << 3)
-#define AMDGPU_PG_SUPPORT_VCE                  (1 << 4)
-#define AMDGPU_PG_SUPPORT_CP                   (1 << 5)
-#define AMDGPU_PG_SUPPORT_GDS                  (1 << 6)
-#define AMDGPU_PG_SUPPORT_RLC_SMU_HS           (1 << 7)
-#define AMDGPU_PG_SUPPORT_SDMA                 (1 << 8)
-#define AMDGPU_PG_SUPPORT_ACP                  (1 << 9)
-#define AMDGPU_PG_SUPPORT_SAMU                 (1 << 10)
-
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE                 0x00000000L
 #define AMDGPU_GFX_SAFE_MODE                   0x00000001L
@@ -606,8 +567,6 @@ struct amdgpu_sa_manager {
        uint32_t                align;
 };
 
-struct amdgpu_sa_bo;
-
 /* sub-allocation buffer */
 struct amdgpu_sa_bo {
        struct list_head                olist;
@@ -2278,60 +2237,60 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
 
 #define amdgpu_dpm_get_temperature(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_temperature((adev))
+             (adev)->pm.funcs->get_temperature((adev)))
 
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+             (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
 
 #define amdgpu_dpm_get_fan_control_mode(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_fan_control_mode((adev))
+             (adev)->pm.funcs->get_fan_control_mode((adev)))
 
 #define amdgpu_dpm_set_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_sclk(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
-               (adev)->pm.funcs->get_sclk((adev), (l))
+               (adev)->pm.funcs->get_sclk((adev), (l)))
 
 #define amdgpu_dpm_get_mclk(adev, l)  \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->get_mclk((adev), (l))
+             (adev)->pm.funcs->get_mclk((adev), (l)))
 
 
 #define amdgpu_dpm_force_performance_level(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->force_performance_level((adev), (l))
+             (adev)->pm.funcs->force_performance_level((adev), (l)))
 
 #define amdgpu_dpm_powergate_uvd(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_uvd((adev), (g))
+             (adev)->pm.funcs->powergate_uvd((adev), (g)))
 
 #define amdgpu_dpm_powergate_vce(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_vce((adev), (g))
+             (adev)->pm.funcs->powergate_vce((adev), (g)))
 
 #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 
 #define amdgpu_dpm_get_current_power_state(adev) \
        (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
@@ -2360,6 +2319,8 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
                                     uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+                                 unsigned long end);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
                                 struct ttm_mem_reg *mem);
index 0e1376317683e4de30803a987450dba3e01b9621..362bedc9e50791ee6a9b1e60f4885a87ba158677 100644 (file)
@@ -154,7 +154,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 {
        return (struct kfd2kgd_calls *)&kfd2kgd;
 }
index 79fa5c7de856eab635ea9010ab0b8bfc446c37c2..04b744d64b57a6355f38ff071f48836148880257 100644 (file)
@@ -115,7 +115,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 {
        return (struct kfd2kgd_calls *)&kfd2kgd;
 }
index a081dda9fa2f6e66e82ee135fcdcd8446bd1674a..7a4b101e10c63564aa3b0b8fffc638c171c1d199 100644 (file)
@@ -795,6 +795,12 @@ static int amdgpu_cgs_query_system_info(void *cgs_device,
        case CGS_SYSTEM_INFO_PCIE_MLW:
                sys_info->value = adev->pm.pcie_mlw_mask;
                break;
+       case CGS_SYSTEM_INFO_CG_FLAGS:
+               sys_info->value = adev->cg_flags;
+               break;
+       case CGS_SYSTEM_INFO_PG_FLAGS:
+               sys_info->value = adev->pg_flags;
+               break;
        default:
                return -ENODEV;
        }
index 6f89f8e034d0eb05b96378581f8dfa7dee0fb319..b882e8175615fba482ac9c8b8f29561e7549dc18 100644 (file)
@@ -478,9 +478,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
        unsigned i;
 
-       amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
        if (!error) {
+               amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
                /* Sort the buffer list from the smallest to largest buffer,
                 * which affects the order of buffers in the LRU list.
                 * This assures that the smallest buffers are added first
index 65531463f88e5b9eb736fdd5e9df357f2f191680..51bfc114584ed5e2fcd086efed40b57f344d2c4f 100644 (file)
@@ -1795,15 +1795,20 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
        }
 
        /* post card */
-       amdgpu_atom_asic_init(adev->mode_info.atom_context);
+       if (!amdgpu_card_posted(adev))
+               amdgpu_atom_asic_init(adev->mode_info.atom_context);
 
        r = amdgpu_resume(adev);
+       if (r)
+               DRM_ERROR("amdgpu_resume failed (%d).\n", r);
 
        amdgpu_fence_driver_resume(adev);
 
-       r = amdgpu_ib_ring_tests(adev);
-       if (r)
-               DRM_ERROR("ib ring test failed (%d).\n", r);
+       if (resume) {
+               r = amdgpu_ib_ring_tests(adev);
+               if (r)
+                       DRM_ERROR("ib ring test failed (%d).\n", r);
+       }
 
        r = amdgpu_late_init(adev);
        if (r)
@@ -1933,80 +1938,97 @@ retry:
        return r;
 }
 
+#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007  /* gen: chipset 1/2, asic 1/2/3 */
+#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
+
 void amdgpu_get_pcie_info(struct amdgpu_device *adev)
 {
        u32 mask;
        int ret;
 
-       if (pci_is_root_bus(adev->pdev->bus))
-               return;
+       if (amdgpu_pcie_gen_cap)
+               adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
 
-       if (amdgpu_pcie_gen2 == 0)
-               return;
+       if (amdgpu_pcie_lane_cap)
+               adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
 
-       if (adev->flags & AMD_IS_APU)
+       /* covers APUs as well */
+       if (pci_is_root_bus(adev->pdev->bus)) {
+               if (adev->pm.pcie_gen_mask == 0)
+                       adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+               if (adev->pm.pcie_mlw_mask == 0)
+                       adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
                return;
+       }
 
-       ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-       if (!ret) {
-               adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
-                                         CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
-                                         CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
-
-               if (mask & DRM_PCIE_SPEED_25)
-                       adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
-               if (mask & DRM_PCIE_SPEED_50)
-                       adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
-               if (mask & DRM_PCIE_SPEED_80)
-                       adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
-       }
-       ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
-       if (!ret) {
-               switch (mask) {
-               case 32:
-                       adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-                       break;
-               case 16:
-                       adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-                       break;
-               case 12:
-                       adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-                       break;
-               case 8:
-                       adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-                       break;
-               case 4:
-                       adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-                       break;
-               case 2:
-                       adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-                                                 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-                       break;
-               case 1:
-                       adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
-                       break;
-               default:
-                       break;
+       if (adev->pm.pcie_gen_mask == 0) {
+               ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
+               if (!ret) {
+                       adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+                                                 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+                                                 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+
+                       if (mask & DRM_PCIE_SPEED_25)
+                               adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+                       if (mask & DRM_PCIE_SPEED_50)
+                               adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
+                       if (mask & DRM_PCIE_SPEED_80)
+                               adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
+               } else {
+                       adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+               }
+       }
+       if (adev->pm.pcie_mlw_mask == 0) {
+               ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
+               if (!ret) {
+                       switch (mask) {
+                       case 32:
+                               adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+                               break;
+                       case 16:
+                               adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+                               break;
+                       case 12:
+                               adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+                               break;
+                       case 8:
+                               adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+                               break;
+                       case 4:
+                               adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+                               break;
+                       case 2:
+                               adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+                                                         CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+                               break;
+                       case 1:
+                               adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+                               break;
+                       default:
+                               break;
+                       }
+               } else {
+                       adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
                }
        }
 }
index acd066d0a8051d0ad17f7311d8db1acd37348060..8297bc319369d6e4e4dd1579195ca1b6161a57d1 100644 (file)
@@ -72,8 +72,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 
        struct drm_crtc *crtc = &amdgpuCrtc->base;
        unsigned long flags;
-       unsigned i;
-       int vpos, hpos, stat, min_udelay;
+       unsigned i, repcnt = 4;
+       int vpos, hpos, stat, min_udelay = 0;
        struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
        amdgpu_flip_wait_fence(adev, &work->excl);
@@ -96,7 +96,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
         * In practice this won't execute very often unless on very fast
         * machines because the time window for this to happen is very small.
         */
-       for (;;) {
+       while (amdgpuCrtc->enabled && repcnt--) {
                /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
                 * start in hpos, and to the "fudged earlier" vblank start in
                 * vpos.
@@ -114,10 +114,22 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
                /* Sleep at least until estimated real start of hw vblank */
                spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
                min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+               if (min_udelay > vblank->framedur_ns / 2000) {
+                       /* Don't wait ridiculously long - something is wrong */
+                       repcnt = 0;
+                       break;
+               }
                usleep_range(min_udelay, 2 * min_udelay);
                spin_lock_irqsave(&crtc->dev->event_lock, flags);
        };
 
+       if (!repcnt)
+               DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+                                "framedur %d, linedur %d, stat %d, vpos %d, "
+                                "hpos %d\n", work->crtc_id, min_udelay,
+                                vblank->framedur_ns / 1000,
+                                vblank->linedur_ns / 1000, stat, vpos, hpos);
+
        /* do the flip (mmio) */
        adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
        /* set the flip status */
index b5dbbb57349190ce4b28409547781c515283994f..9ef1db87cf260c2a6a8abd423dde4be313f355ae 100644 (file)
@@ -83,6 +83,8 @@ int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_enable_semaphores = 0;
 int amdgpu_powerplay = -1;
+unsigned amdgpu_pcie_gen_cap = 0;
+unsigned amdgpu_pcie_lane_cap = 0;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -170,6 +172,12 @@ MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 =
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
 #endif
 
+MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
+module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+
+MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
+module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+
 static struct pci_device_id pciidlist[] = {
 #ifdef CONFIG_DRM_AMDGPU_CIK
        /* Kaveri */
@@ -256,11 +264,11 @@ static struct pci_device_id pciidlist[] = {
        {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
 #endif
        /* topaz */
-       {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
        /* tonga */
        {0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
        {0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
index cfb6caad2a73a4264ab9386f1c6616cf75ef0c77..919146780a15a91434f7383e666eedabdf402ca0 100644 (file)
@@ -333,6 +333,10 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
        if (!adev->mode_info.mode_config_initialized)
                return 0;
 
+       /* don't init fbdev if there are no connectors */
+       if (list_empty(&adev->ddev->mode_config.connector_list))
+               return 0;
+
        /* select 8 bpp console on low vram cards */
        if (adev->mc.real_vram_size <= (32*1024*1024))
                bpp_sel = 8;
index 7380f782cd14a2ba1b3bb182832971368545e404..d20c2a8929cbdaf39164e6c709df67d36b352f35 100644 (file)
@@ -596,7 +596,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                break;
        }
        ttm_eu_backoff_reservation(&ticket, &list);
-       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
+       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
+           !amdgpu_vm_debug)
                amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
 
        drm_gem_object_unreference_unlocked(gobj);
index b1969f2b2038db79af8447fc9826ba8257207cf4..d4e2780c079663a643bddeaa7e2da5e15a2ff47d 100644 (file)
@@ -142,7 +142,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 
                list_for_each_entry(bo, &node->bos, mn_list) {
 
-                       if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
+                       if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
+                                                         end))
                                continue;
 
                        r = amdgpu_bo_reserve(bo, true);
index c3ce103b6a33b2ef3a0a6f7f14877e18150ca405..b8fbbd7699e4586e13e57905b0cef818f6e43e6b 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -261,6 +262,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
                                       AMDGPU_GEM_DOMAIN_OA);
 
        bo->flags = flags;
+
+       /* For architectures that don't support WC memory,
+        * mask out the WC flag from the BO
+        */
+       if (!drm_arch_can_wc_memory())
+               bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
        amdgpu_fill_placement_to_bo(bo, placement);
        /* Kernel allocation are uninterruptible */
        r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
@@ -399,7 +407,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                }
                if (fpfn > bo->placements[i].fpfn)
                        bo->placements[i].fpfn = fpfn;
-               if (lpfn && lpfn < bo->placements[i].lpfn)
+               if (!bo->placements[i].lpfn ||
+                   (lpfn && lpfn < bo->placements[i].lpfn))
                        bo->placements[i].lpfn = lpfn;
                bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
        }
index 7d8d84eaea4a5ab258eb5dac4a383f6aff81850d..66855b62a6037b89f5adfe759f583ddc5a1919f6 100644 (file)
@@ -113,6 +113,10 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = ddev->dev_private;
 
+       if  ((adev->flags & AMD_IS_PX) &&
+            (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+               return snprintf(buf, PAGE_SIZE, "off\n");
+
        if (adev->pp_enabled) {
                enum amd_dpm_forced_level level;
 
@@ -140,6 +144,11 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
        enum amdgpu_dpm_forced_level level;
        int ret = 0;
 
+       /* Can't force performance level when the card is off */
+       if  ((adev->flags & AMD_IS_PX) &&
+            (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+               return -EINVAL;
+
        if (strncmp("low", buf, strlen("low")) == 0) {
                level = AMDGPU_DPM_FORCED_LEVEL_LOW;
        } else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -157,6 +166,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
                mutex_lock(&adev->pm.mutex);
                if (adev->pm.dpm.thermal_active) {
                        count = -EINVAL;
+                       mutex_unlock(&adev->pm.mutex);
                        goto fail;
                }
                ret = amdgpu_dpm_force_performance_level(adev, level);
@@ -167,8 +177,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
                mutex_unlock(&adev->pm.mutex);
        }
 fail:
-       mutex_unlock(&adev->pm.mutex);
-
        return count;
 }
 
@@ -182,8 +190,14 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
                                      char *buf)
 {
        struct amdgpu_device *adev = dev_get_drvdata(dev);
+       struct drm_device *ddev = adev->ddev;
        int temp;
 
+       /* Can't get temperature when the card is off */
+       if  ((adev->flags & AMD_IS_PX) &&
+            (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+               return -EINVAL;
+
        if (!adev->pp_enabled && !adev->pm.funcs->get_temperature)
                temp = 0;
        else
@@ -634,8 +648,6 @@ force:
 
        /* update display watermarks based on new power state */
        amdgpu_display_bandwidth_update(adev);
-       /* update displays */
-       amdgpu_dpm_display_configuration_changed(adev);
 
        adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
        adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
@@ -655,6 +667,9 @@ force:
 
        amdgpu_dpm_post_set_power_state(adev);
 
+       /* update displays */
+       amdgpu_dpm_display_configuration_changed(adev);
+
        if (adev->pm.funcs->force_performance_level) {
                if (adev->pm.dpm.thermal_active) {
                        enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level;
@@ -847,12 +862,16 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
        struct drm_info_node *node = (struct drm_info_node *) m->private;
        struct drm_device *dev = node->minor->dev;
        struct amdgpu_device *adev = dev->dev_private;
+       struct drm_device *ddev = adev->ddev;
 
        if (!adev->pm.dpm_enabled) {
                seq_printf(m, "dpm not enabled\n");
                return 0;
        }
-       if (adev->pp_enabled) {
+       if  ((adev->flags & AMD_IS_PX) &&
+            (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
+               seq_printf(m, "PX asic powered off\n");
+       } else if (adev->pp_enabled) {
                amdgpu_dpm_debugfs_print_current_performance_level(adev, m);
        } else {
                mutex_lock(&adev->pm.mutex);
index 5ee9a069027885989f8aad55e9340e1a9d3424ea..b9d0d55f6b47df33ab7ebf7024fe7ca6e821892a 100644 (file)
@@ -99,13 +99,24 @@ static int amdgpu_pp_early_init(void *handle)
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
        switch (adev->asic_type) {
-               case CHIP_TONGA:
-               case CHIP_FIJI:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
-               default:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
+       case CHIP_TONGA:
+       case CHIP_FIJI:
+               adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+               break;
+       case CHIP_CARRIZO:
+       case CHIP_STONEY:
+               adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+               break;
+       /* These chips don't have powerplay implemenations */
+       case CHIP_BONAIRE:
+       case CHIP_HAWAII:
+       case CHIP_KABINI:
+       case CHIP_MULLINS:
+       case CHIP_KAVERI:
+       case CHIP_TOPAZ:
+       default:
+               adev->pp_enabled = false;
+               break;
        }
 #else
        adev->pp_enabled = false;
index 78e9b0f14661a0d0f404f67df3789b052dcd9133..d1f234dd21261e06764b9e5b623d11ff08f20fe4 100644 (file)
@@ -487,7 +487,7 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
        seq_printf(m, "rptr: 0x%08x [%5d]\n",
                   rptr, rptr);
 
-       rptr_next = ~0;
+       rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
        seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
                   ring->wptr, ring->wptr);
index 8b88edb0434bfb43ed4b7aae7e6f3e7dc63d0893..ca72a2e487b9b9d846b65479e9b865dd1ef71281 100644 (file)
@@ -354,12 +354,15 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 
                for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
                        if (fences[i])
-                               fences[count++] = fences[i];
+                               fences[count++] = fence_get(fences[i]);
 
                if (count) {
                        spin_unlock(&sa_manager->wq.lock);
                        t = fence_wait_any_timeout(fences, count, false,
                                                   MAX_SCHEDULE_TIMEOUT);
+                       for (i = 0; i < count; ++i)
+                               fence_put(fences[i]);
+
                        r = (t > 0) ? 0 : t;
                        spin_lock(&sa_manager->wq.lock);
                } else {
index 8a1752ff3d8e55a1d8b8ab3061f5c9c425058d0f..1cbb16e153079e7463a0ac070d1a19461deb588e 100644 (file)
@@ -712,7 +712,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
                                                       0, PAGE_SIZE,
                                                       PCI_DMA_BIDIRECTIONAL);
                if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) {
-                       while (--i) {
+                       while (i--) {
                                pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i],
                                               PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
                                gtt->ttm.dma_address[i] = 0;
@@ -783,6 +783,25 @@ bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm)
        return !!gtt->userptr;
 }
 
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+                                 unsigned long end)
+{
+       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       unsigned long size;
+
+       if (gtt == NULL)
+               return false;
+
+       if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
+               return false;
+
+       size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
+       if (gtt->userptr > end || gtt->userptr + size <= start)
+               return false;
+
+       return true;
+}
+
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -808,7 +827,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
                        flags |= AMDGPU_PTE_SNOOPED;
        }
 
-       if (adev->asic_type >= CHIP_TOPAZ)
+       if (adev->asic_type >= CHIP_TONGA)
                flags |= AMDGPU_PTE_EXECUTABLE;
 
        flags |= AMDGPU_PTE_READABLE;
index aefc668e6b5d79e65873e973275d5f0beb7b10d3..9599f7559b3dc86308d2c375739d0cd20ee60941 100644 (file)
@@ -1282,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
        const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
                AMDGPU_VM_PTE_COUNT * 8);
-       unsigned pd_size, pd_entries, pts_size;
+       unsigned pd_size, pd_entries;
        int i, r;
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        pd_entries = amdgpu_vm_num_pdes(adev);
 
        /* allocate page table array */
-       pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-       vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+       vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
        if (vm->page_tables == NULL) {
                DRM_ERROR("Cannot allocate memory for page table array\n");
                return -ENOMEM;
@@ -1361,7 +1360,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
        for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
                amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
-       kfree(vm->page_tables);
+       drm_free_large(vm->page_tables);
 
        amdgpu_bo_unref(&vm->page_directory);
        fence_put(vm->page_directory_fence);
index 8b4731d4e10eabbd78f17f799c5fcd1156ba9862..474ca02b094935283c3548534a2502cd01412665 100644 (file)
@@ -31,6 +31,7 @@
 #include "ci_dpm.h"
 #include "gfx_v7_0.h"
 #include "atom.h"
+#include "amd_pcie.h"
 #include <linux/seq_file.h>
 
 #include "smu/smu_7_0_1_d.h"
@@ -5835,18 +5836,16 @@ static int ci_dpm_init(struct amdgpu_device *adev)
        u8 frev, crev;
        struct ci_power_info *pi;
        int ret;
-       u32 mask;
 
        pi = kzalloc(sizeof(struct ci_power_info), GFP_KERNEL);
        if (pi == NULL)
                return -ENOMEM;
        adev->pm.dpm.priv = pi;
 
-       ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-       if (ret)
-               pi->sys_pcie_mask = 0;
-       else
-               pi->sys_pcie_mask = mask;
+       pi->sys_pcie_mask =
+               (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
+               CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
+
        pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
 
        pi->pcie_gen_performance.max = AMDGPU_PCIE_GEN1;
index fd9c9588ef46335d7a154e664bce8275d62f3ae9..155965ed14a3bee05d745b5e25fccf03546dc4ae 100644 (file)
@@ -1762,6 +1762,9 @@ static void cik_program_aspm(struct amdgpu_device *adev)
        if (amdgpu_aspm == 0)
                return;
 
+       if (pci_is_root_bus(adev->pdev->bus))
+               return;
+
        /* XXX double check APUs */
        if (adev->flags & AMD_IS_APU)
                return;
@@ -2332,72 +2335,72 @@ static int cik_common_early_init(void *handle)
        switch (adev->asic_type) {
        case CHIP_BONAIRE:
                adev->cg_flags =
-                       AMDGPU_CG_SUPPORT_GFX_MGCG |
-                       AMDGPU_CG_SUPPORT_GFX_MGLS |
-                       /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-                       AMDGPU_CG_SUPPORT_GFX_CGLS |
-                       AMDGPU_CG_SUPPORT_GFX_CGTS |
-                       AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
-                       AMDGPU_CG_SUPPORT_GFX_CP_LS |
-                       AMDGPU_CG_SUPPORT_MC_LS |
-                       AMDGPU_CG_SUPPORT_MC_MGCG |
-                       AMDGPU_CG_SUPPORT_SDMA_MGCG |
-                       AMDGPU_CG_SUPPORT_SDMA_LS |
-                       AMDGPU_CG_SUPPORT_BIF_LS |
-                       AMDGPU_CG_SUPPORT_VCE_MGCG |
-                       AMDGPU_CG_SUPPORT_UVD_MGCG |
-                       AMDGPU_CG_SUPPORT_HDP_LS |
-                       AMDGPU_CG_SUPPORT_HDP_MGCG;
+                       AMD_CG_SUPPORT_GFX_MGCG |
+                       AMD_CG_SUPPORT_GFX_MGLS |
+                       /*AMD_CG_SUPPORT_GFX_CGCG |*/
+                       AMD_CG_SUPPORT_GFX_CGLS |
+                       AMD_CG_SUPPORT_GFX_CGTS |
+                       AMD_CG_SUPPORT_GFX_CGTS_LS |
+                       AMD_CG_SUPPORT_GFX_CP_LS |
+                       AMD_CG_SUPPORT_MC_LS |
+                       AMD_CG_SUPPORT_MC_MGCG |
+                       AMD_CG_SUPPORT_SDMA_MGCG |
+                       AMD_CG_SUPPORT_SDMA_LS |
+                       AMD_CG_SUPPORT_BIF_LS |
+                       AMD_CG_SUPPORT_VCE_MGCG |
+                       AMD_CG_SUPPORT_UVD_MGCG |
+                       AMD_CG_SUPPORT_HDP_LS |
+                       AMD_CG_SUPPORT_HDP_MGCG;
                adev->pg_flags = 0;
                adev->external_rev_id = adev->rev_id + 0x14;
                break;
        case CHIP_HAWAII:
                adev->cg_flags =
-                       AMDGPU_CG_SUPPORT_GFX_MGCG |
-                       AMDGPU_CG_SUPPORT_GFX_MGLS |
-                       /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-                       AMDGPU_CG_SUPPORT_GFX_CGLS |
-                       AMDGPU_CG_SUPPORT_GFX_CGTS |
-                       AMDGPU_CG_SUPPORT_GFX_CP_LS |
-                       AMDGPU_CG_SUPPORT_MC_LS |
-                       AMDGPU_CG_SUPPORT_MC_MGCG |
-                       AMDGPU_CG_SUPPORT_SDMA_MGCG |
-                       AMDGPU_CG_SUPPORT_SDMA_LS |
-                       AMDGPU_CG_SUPPORT_BIF_LS |
-                       AMDGPU_CG_SUPPORT_VCE_MGCG |
-                       AMDGPU_CG_SUPPORT_UVD_MGCG |
-                       AMDGPU_CG_SUPPORT_HDP_LS |
-                       AMDGPU_CG_SUPPORT_HDP_MGCG;
+                       AMD_CG_SUPPORT_GFX_MGCG |
+                       AMD_CG_SUPPORT_GFX_MGLS |
+                       /*AMD_CG_SUPPORT_GFX_CGCG |*/
+                       AMD_CG_SUPPORT_GFX_CGLS |
+                       AMD_CG_SUPPORT_GFX_CGTS |
+                       AMD_CG_SUPPORT_GFX_CP_LS |
+                       AMD_CG_SUPPORT_MC_LS |
+                       AMD_CG_SUPPORT_MC_MGCG |
+                       AMD_CG_SUPPORT_SDMA_MGCG |
+                       AMD_CG_SUPPORT_SDMA_LS |
+                       AMD_CG_SUPPORT_BIF_LS |
+                       AMD_CG_SUPPORT_VCE_MGCG |
+                       AMD_CG_SUPPORT_UVD_MGCG |
+                       AMD_CG_SUPPORT_HDP_LS |
+                       AMD_CG_SUPPORT_HDP_MGCG;
                adev->pg_flags = 0;
                adev->external_rev_id = 0x28;
                break;
        case CHIP_KAVERI:
                adev->cg_flags =
-                       AMDGPU_CG_SUPPORT_GFX_MGCG |
-                       AMDGPU_CG_SUPPORT_GFX_MGLS |
-                       /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-                       AMDGPU_CG_SUPPORT_GFX_CGLS |
-                       AMDGPU_CG_SUPPORT_GFX_CGTS |
-                       AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
-                       AMDGPU_CG_SUPPORT_GFX_CP_LS |
-                       AMDGPU_CG_SUPPORT_SDMA_MGCG |
-                       AMDGPU_CG_SUPPORT_SDMA_LS |
-                       AMDGPU_CG_SUPPORT_BIF_LS |
-                       AMDGPU_CG_SUPPORT_VCE_MGCG |
-                       AMDGPU_CG_SUPPORT_UVD_MGCG |
-                       AMDGPU_CG_SUPPORT_HDP_LS |
-                       AMDGPU_CG_SUPPORT_HDP_MGCG;
+                       AMD_CG_SUPPORT_GFX_MGCG |
+                       AMD_CG_SUPPORT_GFX_MGLS |
+                       /*AMD_CG_SUPPORT_GFX_CGCG |*/
+                       AMD_CG_SUPPORT_GFX_CGLS |
+                       AMD_CG_SUPPORT_GFX_CGTS |
+                       AMD_CG_SUPPORT_GFX_CGTS_LS |
+                       AMD_CG_SUPPORT_GFX_CP_LS |
+                       AMD_CG_SUPPORT_SDMA_MGCG |
+                       AMD_CG_SUPPORT_SDMA_LS |
+                       AMD_CG_SUPPORT_BIF_LS |
+                       AMD_CG_SUPPORT_VCE_MGCG |
+                       AMD_CG_SUPPORT_UVD_MGCG |
+                       AMD_CG_SUPPORT_HDP_LS |
+                       AMD_CG_SUPPORT_HDP_MGCG;
                adev->pg_flags =
-                       /*AMDGPU_PG_SUPPORT_GFX_PG |
-                         AMDGPU_PG_SUPPORT_GFX_SMG |
-                         AMDGPU_PG_SUPPORT_GFX_DMG |*/
-                       AMDGPU_PG_SUPPORT_UVD |
-                       /*AMDGPU_PG_SUPPORT_VCE |
-                         AMDGPU_PG_SUPPORT_CP |
-                         AMDGPU_PG_SUPPORT_GDS |
-                         AMDGPU_PG_SUPPORT_RLC_SMU_HS |
-                         AMDGPU_PG_SUPPORT_ACP |
-                         AMDGPU_PG_SUPPORT_SAMU |*/
+                       /*AMD_PG_SUPPORT_GFX_PG |
+                         AMD_PG_SUPPORT_GFX_SMG |
+                         AMD_PG_SUPPORT_GFX_DMG |*/
+                       AMD_PG_SUPPORT_UVD |
+                       /*AMD_PG_SUPPORT_VCE |
+                         AMD_PG_SUPPORT_CP |
+                         AMD_PG_SUPPORT_GDS |
+                         AMD_PG_SUPPORT_RLC_SMU_HS |
+                         AMD_PG_SUPPORT_ACP |
+                         AMD_PG_SUPPORT_SAMU |*/
                        0;
                if (adev->pdev->device == 0x1312 ||
                        adev->pdev->device == 0x1316 ||
@@ -2409,29 +2412,29 @@ static int cik_common_early_init(void *handle)
        case CHIP_KABINI:
        case CHIP_MULLINS:
                adev->cg_flags =
-                       AMDGPU_CG_SUPPORT_GFX_MGCG |
-                       AMDGPU_CG_SUPPORT_GFX_MGLS |
-                       /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-                       AMDGPU_CG_SUPPORT_GFX_CGLS |
-                       AMDGPU_CG_SUPPORT_GFX_CGTS |
-                       AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
-                       AMDGPU_CG_SUPPORT_GFX_CP_LS |
-                       AMDGPU_CG_SUPPORT_SDMA_MGCG |
-                       AMDGPU_CG_SUPPORT_SDMA_LS |
-                       AMDGPU_CG_SUPPORT_BIF_LS |
-                       AMDGPU_CG_SUPPORT_VCE_MGCG |
-                       AMDGPU_CG_SUPPORT_UVD_MGCG |
-                       AMDGPU_CG_SUPPORT_HDP_LS |
-                       AMDGPU_CG_SUPPORT_HDP_MGCG;
+                       AMD_CG_SUPPORT_GFX_MGCG |
+                       AMD_CG_SUPPORT_GFX_MGLS |
+                       /*AMD_CG_SUPPORT_GFX_CGCG |*/
+                       AMD_CG_SUPPORT_GFX_CGLS |
+                       AMD_CG_SUPPORT_GFX_CGTS |
+                       AMD_CG_SUPPORT_GFX_CGTS_LS |
+                       AMD_CG_SUPPORT_GFX_CP_LS |
+                       AMD_CG_SUPPORT_SDMA_MGCG |
+                       AMD_CG_SUPPORT_SDMA_LS |
+                       AMD_CG_SUPPORT_BIF_LS |
+                       AMD_CG_SUPPORT_VCE_MGCG |
+                       AMD_CG_SUPPORT_UVD_MGCG |
+                       AMD_CG_SUPPORT_HDP_LS |
+                       AMD_CG_SUPPORT_HDP_MGCG;
                adev->pg_flags =
-                       /*AMDGPU_PG_SUPPORT_GFX_PG |
-                         AMDGPU_PG_SUPPORT_GFX_SMG | */
-                       AMDGPU_PG_SUPPORT_UVD |
-                       /*AMDGPU_PG_SUPPORT_VCE |
-                         AMDGPU_PG_SUPPORT_CP |
-                         AMDGPU_PG_SUPPORT_GDS |
-                         AMDGPU_PG_SUPPORT_RLC_SMU_HS |
-                         AMDGPU_PG_SUPPORT_SAMU |*/
+                       /*AMD_PG_SUPPORT_GFX_PG |
+                         AMD_PG_SUPPORT_GFX_SMG | */
+                       AMD_PG_SUPPORT_UVD |
+                       /*AMD_PG_SUPPORT_VCE |
+                         AMD_PG_SUPPORT_CP |
+                         AMD_PG_SUPPORT_GDS |
+                         AMD_PG_SUPPORT_RLC_SMU_HS |
+                         AMD_PG_SUPPORT_SAMU |*/
                        0;
                if (adev->asic_type == CHIP_KABINI) {
                        if (adev->rev_id == 0)
index 5f712ceddf08e4765fb544aff0cad26e6bbcd890..c55ecf0ea8454822a07711c3b85479a9e51e214d 100644 (file)
@@ -885,7 +885,7 @@ static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
 {
        u32 orig, data;
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_SDMA_MGCG)) {
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
                WREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
                WREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
        } else {
@@ -906,7 +906,7 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
 {
        u32 orig, data;
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_SDMA_LS)) {
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
                orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
                data |= 0x100;
                if (orig != data)
index 4dd17f2dd9059125eace5ffd5fc2a8cf2eb46802..9056355309d18cbc97beaf015b7e34be6c8b6b8f 100644 (file)
@@ -445,13 +445,13 @@ static int cz_dpm_init(struct amdgpu_device *adev)
        pi->gfx_pg_threshold = 500;
        pi->caps_fps = true;
        /* uvd */
-       pi->caps_uvd_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_UVD) ? true : false;
+       pi->caps_uvd_pg = (adev->pg_flags & AMD_PG_SUPPORT_UVD) ? true : false;
        pi->caps_uvd_dpm = true;
        /* vce */
-       pi->caps_vce_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_VCE) ? true : false;
+       pi->caps_vce_pg = (adev->pg_flags & AMD_PG_SUPPORT_VCE) ? true : false;
        pi->caps_vce_dpm = true;
        /* acp */
-       pi->caps_acp_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_ACP) ? true : false;
+       pi->caps_acp_pg = (adev->pg_flags & AMD_PG_SUPPORT_ACP) ? true : false;
        pi->caps_acp_dpm = true;
 
        pi->caps_stable_power_state = false;
index 72793f93e2fcc9989e5f936ec1d43ba0593ac69e..7732059ae30f51414ab9173cc1d2fdaa7c07159c 100644 (file)
@@ -4109,7 +4109,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
 
        orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGCG)) {
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
                gfx_v7_0_enable_gui_idle_interrupt(adev, true);
 
                tmp = gfx_v7_0_halt_rlc(adev);
@@ -4147,9 +4147,9 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 {
        u32 data, orig, tmp = 0;
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGCG)) {
-               if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) {
-                       if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CP_LS) {
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+               if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+                       if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
                                orig = data = RREG32(mmCP_MEM_SLP_CNTL);
                                data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
                                if (orig != data)
@@ -4176,14 +4176,14 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 
                gfx_v7_0_update_rlc(adev, tmp);
 
-               if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS) {
+               if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
                        orig = data = RREG32(mmCGTS_SM_CTRL_REG);
                        data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
-                       if ((adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) &&
-                           (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS_LS))
+                       if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
+                           (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
                        data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
@@ -4249,7 +4249,7 @@ static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
        u32 data, orig;
 
        orig = data = RREG32(mmRLC_PG_CNTL);
-       if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
                data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
        else
                data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
@@ -4263,7 +4263,7 @@ static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
        u32 data, orig;
 
        orig = data = RREG32(mmRLC_PG_CNTL);
-       if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
                data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
        else
                data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
@@ -4276,7 +4276,7 @@ static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
        u32 data, orig;
 
        orig = data = RREG32(mmRLC_PG_CNTL);
-       if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_CP))
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
                data &= ~0x8000;
        else
                data |= 0x8000;
@@ -4289,7 +4289,7 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
        u32 data, orig;
 
        orig = data = RREG32(mmRLC_PG_CNTL);
-       if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GDS))
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
                data &= ~0x2000;
        else
                data |= 0x2000;
@@ -4370,7 +4370,7 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
 {
        u32 data, orig;
 
-       if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)) {
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
                orig = data = RREG32(mmRLC_PG_CNTL);
                data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
                if (orig != data)
@@ -4442,7 +4442,7 @@ static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
        u32 data, orig;
 
        orig = data = RREG32(mmRLC_PG_CNTL);
-       if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG))
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
                data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
        else
                data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
@@ -4456,7 +4456,7 @@ static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
        u32 data, orig;
 
        orig = data = RREG32(mmRLC_PG_CNTL);
-       if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_DMG))
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
                data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
        else
                data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
@@ -4623,15 +4623,15 @@ static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
 
 static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
 {
-       if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
-                             AMDGPU_PG_SUPPORT_GFX_SMG |
-                             AMDGPU_PG_SUPPORT_GFX_DMG |
-                             AMDGPU_PG_SUPPORT_CP |
-                             AMDGPU_PG_SUPPORT_GDS |
-                             AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+       if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+                             AMD_PG_SUPPORT_GFX_SMG |
+                             AMD_PG_SUPPORT_GFX_DMG |
+                             AMD_PG_SUPPORT_CP |
+                             AMD_PG_SUPPORT_GDS |
+                             AMD_PG_SUPPORT_RLC_SMU_HS)) {
                gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
                gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
-               if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+               if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
                        gfx_v7_0_init_gfx_cgpg(adev);
                        gfx_v7_0_enable_cp_pg(adev, true);
                        gfx_v7_0_enable_gds_pg(adev, true);
@@ -4643,14 +4643,14 @@ static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
 
 static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
 {
-       if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
-                             AMDGPU_PG_SUPPORT_GFX_SMG |
-                             AMDGPU_PG_SUPPORT_GFX_DMG |
-                             AMDGPU_PG_SUPPORT_CP |
-                             AMDGPU_PG_SUPPORT_GDS |
-                             AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+       if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+                             AMD_PG_SUPPORT_GFX_SMG |
+                             AMD_PG_SUPPORT_GFX_DMG |
+                             AMD_PG_SUPPORT_CP |
+                             AMD_PG_SUPPORT_GDS |
+                             AMD_PG_SUPPORT_RLC_SMU_HS)) {
                gfx_v7_0_update_gfx_pg(adev, false);
-               if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+               if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
                        gfx_v7_0_enable_cp_pg(adev, false);
                        gfx_v7_0_enable_gds_pg(adev, false);
                }
@@ -4738,6 +4738,22 @@ static int gfx_v7_0_early_init(void *handle)
        return 0;
 }
 
+static int gfx_v7_0_late_init(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+       if (r)
+               return r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+       if (r)
+               return r;
+
+       return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
        struct amdgpu_ring *ring;
@@ -4890,6 +4906,8 @@ static int gfx_v7_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
        gfx_v7_0_cp_enable(adev, false);
        gfx_v7_0_rlc_stop(adev);
        gfx_v7_0_fini_pg(adev);
@@ -5509,14 +5527,14 @@ static int gfx_v7_0_set_powergating_state(void *handle,
        if (state == AMD_PG_STATE_GATE)
                gate = true;
 
-       if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
-                             AMDGPU_PG_SUPPORT_GFX_SMG |
-                             AMDGPU_PG_SUPPORT_GFX_DMG |
-                             AMDGPU_PG_SUPPORT_CP |
-                             AMDGPU_PG_SUPPORT_GDS |
-                             AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+       if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+                             AMD_PG_SUPPORT_GFX_SMG |
+                             AMD_PG_SUPPORT_GFX_DMG |
+                             AMD_PG_SUPPORT_CP |
+                             AMD_PG_SUPPORT_GDS |
+                             AMD_PG_SUPPORT_RLC_SMU_HS)) {
                gfx_v7_0_update_gfx_pg(adev, gate);
-               if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+               if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
                        gfx_v7_0_enable_cp_pg(adev, gate);
                        gfx_v7_0_enable_gds_pg(adev, gate);
                }
@@ -5527,7 +5545,7 @@ static int gfx_v7_0_set_powergating_state(void *handle,
 
 const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
        .early_init = gfx_v7_0_early_init,
-       .late_init = NULL,
+       .late_init = gfx_v7_0_late_init,
        .sw_init = gfx_v7_0_sw_init,
        .sw_fini = gfx_v7_0_sw_fini,
        .hw_init = gfx_v7_0_hw_init,
index 13235d84e5a67d45ca52bf4de9536d1c22a26dc6..1c40bd90afbb30097980f0acfa698928f84e1aef 100644 (file)
@@ -111,7 +111,6 @@ MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
-MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 
 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
@@ -828,7 +827,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       if (adev->asic_type != CHIP_STONEY) {
+       if ((adev->asic_type != CHIP_STONEY) &&
+           (adev->asic_type != CHIP_TOPAZ)) {
                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
                if (!err) {
@@ -3851,10 +3851,16 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
                        if (r)
                                return -EINVAL;
 
-                       r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-                                                       AMDGPU_UCODE_ID_CP_MEC1);
-                       if (r)
-                               return -EINVAL;
+                       if (adev->asic_type == CHIP_TOPAZ) {
+                               r = gfx_v8_0_cp_compute_load_microcode(adev);
+                               if (r)
+                                       return r;
+                       } else {
+                               r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+                                                                                AMDGPU_UCODE_ID_CP_MEC1);
+                               if (r)
+                                       return -EINVAL;
+                       }
                }
        }
 
@@ -3901,6 +3907,8 @@ static int gfx_v8_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
        gfx_v8_0_cp_enable(adev, false);
        gfx_v8_0_rlc_stop(adev);
        gfx_v8_0_cp_compute_fini(adev);
@@ -4186,7 +4194,18 @@ static int gfx_v8_0_soft_reset(void *handle)
                gfx_v8_0_cp_gfx_enable(adev, false);
 
                /* Disable MEC parsing/prefetching */
-               /* XXX todo */
+               gfx_v8_0_cp_compute_enable(adev, false);
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 1);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 1);
+                       WREG32(mmGMCON_DEBUG, tmp);
+
+                       udelay(50);
+               }
 
                if (grbm_soft_reset) {
                        tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4234,16 @@ static int gfx_v8_0_soft_reset(void *handle)
                        WREG32(mmSRBM_SOFT_RESET, tmp);
                        tmp = RREG32(mmSRBM_SOFT_RESET);
                }
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 0);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 0);
+                       WREG32(mmGMCON_DEBUG, tmp);
+               }
+
                /* Wait a little for things to settle down */
                udelay(50);
                gfx_v8_0_print_status((void *)adev);
@@ -4308,6 +4337,14 @@ static int gfx_v8_0_late_init(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int r;
 
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+       if (r)
+               return r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+       if (r)
+               return r;
+
        /* requires IBs so do in late init after IB pool is initialized */
        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
        if (r)
@@ -4958,7 +4995,7 @@ static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
        case AMDGPU_IRQ_STATE_ENABLE:
                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-                                           PRIV_REG_INT_ENABLE, 0);
+                                           PRIV_REG_INT_ENABLE, 1);
                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
                break;
        default:
index 3f956065d069125df0200f78527437134bf7f6d1..b8060795b27b96409a37ce213a3f6076127c7d71 100644 (file)
@@ -42,9 +42,39 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
+MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
+
+static const u32 golden_settings_iceland_a11[] =
+{
+       mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
+};
+
+static const u32 iceland_mgcg_cgcg_init[] =
+{
+       mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
+{
+       switch (adev->asic_type) {
+       case CHIP_TOPAZ:
+               amdgpu_program_register_sequence(adev,
+                                                iceland_mgcg_cgcg_init,
+                                                (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
+               amdgpu_program_register_sequence(adev,
+                                                golden_settings_iceland_a11,
+                                                (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
+               break;
+       default:
+               break;
+       }
+}
 
 /**
- * gmc8_mc_wait_for_idle - wait for MC idle callback.
+ * gmc7_mc_wait_for_idle - wait for MC idle callback.
  *
  * @adev: amdgpu_device pointer
  *
@@ -132,13 +162,20 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
        case CHIP_HAWAII:
                chip_name = "hawaii";
                break;
+       case CHIP_TOPAZ:
+               chip_name = "topaz";
+               break;
        case CHIP_KAVERI:
        case CHIP_KABINI:
                return 0;
        default: BUG();
        }
 
-       snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+       if (adev->asic_type == CHIP_TOPAZ)
+               snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
+       else
+               snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+
        err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -755,7 +792,7 @@ static void gmc_v7_0_enable_mc_ls(struct amdgpu_device *adev,
 
        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
                orig = data = RREG32(mc_cg_registers[i]);
-               if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+               if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
                        data |= mc_cg_ls_en[i];
                else
                        data &= ~mc_cg_ls_en[i];
@@ -772,7 +809,7 @@ static void gmc_v7_0_enable_mc_mgcg(struct amdgpu_device *adev,
 
        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
                orig = data = RREG32(mc_cg_registers[i]);
-               if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+               if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
                        data |= mc_cg_en[i];
                else
                        data &= ~mc_cg_en[i];
@@ -788,7 +825,7 @@ static void gmc_v7_0_enable_bif_mgls(struct amdgpu_device *adev,
 
        orig = data = RREG32_PCIE(ixPCIE_CNTL2);
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
                data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
                data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
                data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -811,7 +848,7 @@ static void gmc_v7_0_enable_hdp_mgcg(struct amdgpu_device *adev,
 
        orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
                data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
        else
                data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -827,7 +864,7 @@ static void gmc_v7_0_enable_hdp_ls(struct amdgpu_device *adev,
 
        orig = data = RREG32(mmHDP_MEM_POWER_LS);
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
                data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
        else
                data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
@@ -984,6 +1021,8 @@ static int gmc_v7_0_hw_init(void *handle)
        int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       gmc_v7_0_init_golden_registers(adev);
+
        gmc_v7_0_mc_program(adev);
 
        if (!(adev->flags & AMD_IS_APU)) {
index c0c9a0101eb453c3139b9796ac8449350f0fb4bd..3efd45546241afc65e5d7db1fbc541fda946857b 100644 (file)
@@ -42,9 +42,7 @@
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 
-MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
-MODULE_FIRMWARE("amdgpu/fiji_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
 {
@@ -75,19 +73,6 @@ static const u32 fiji_mgcg_cgcg_init[] =
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
 };
 
-static const u32 golden_settings_iceland_a11[] =
-{
-       mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
-};
-
-static const u32 iceland_mgcg_cgcg_init[] =
-{
-       mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
-};
-
 static const u32 cz_mgcg_cgcg_init[] =
 {
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
@@ -102,14 +87,6 @@ static const u32 stoney_mgcg_cgcg_init[] =
 static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
        switch (adev->asic_type) {
-       case CHIP_TOPAZ:
-               amdgpu_program_register_sequence(adev,
-                                                iceland_mgcg_cgcg_init,
-                                                (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
-               amdgpu_program_register_sequence(adev,
-                                                golden_settings_iceland_a11,
-                                                (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
-               break;
        case CHIP_FIJI:
                amdgpu_program_register_sequence(adev,
                                                 fiji_mgcg_cgcg_init,
@@ -229,15 +206,10 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
        DRM_DEBUG("\n");
 
        switch (adev->asic_type) {
-       case CHIP_TOPAZ:
-               chip_name = "topaz";
-               break;
        case CHIP_TONGA:
                chip_name = "tonga";
                break;
        case CHIP_FIJI:
-               chip_name = "fiji";
-               break;
        case CHIP_CARRIZO:
        case CHIP_STONEY:
                return 0;
@@ -1007,7 +979,7 @@ static int gmc_v8_0_hw_init(void *handle)
 
        gmc_v8_0_mc_program(adev);
 
-       if (!(adev->flags & AMD_IS_APU)) {
+       if (adev->asic_type == CHIP_TONGA) {
                r = gmc_v8_0_mc_load_microcode(adev);
                if (r) {
                        DRM_ERROR("Failed to load MC firmware!\n");
index 966d4b2ed9dad0527a2d0246b23731f1d802d9e2..090486c182497ba8de2aaa7840242be9cf4750aa 100644 (file)
@@ -432,7 +432,7 @@ static uint32_t iceland_smu_get_mask_for_fw_type(uint32_t fw_type)
                case AMDGPU_UCODE_ID_CP_ME:
                        return UCODE_ID_CP_ME_MASK;
                case AMDGPU_UCODE_ID_CP_MEC1:
-                       return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK | UCODE_ID_CP_MEC_JT2_MASK;
+                       return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK;
                case AMDGPU_UCODE_ID_CP_MEC2:
                        return UCODE_ID_CP_MEC_MASK;
                case AMDGPU_UCODE_ID_RLC_G:
@@ -522,12 +522,6 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev)
                return -EINVAL;
        }
 
-       if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_CP_MEC_JT2,
-                       &toc->entry[toc->num_entries++])) {
-               DRM_ERROR("Failed to get firmware entry for MEC_JT2\n");
-               return -EINVAL;
-       }
-
        if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_SDMA0,
                        &toc->entry[toc->num_entries++])) {
                DRM_ERROR("Failed to get firmware entry for SDMA0\n");
@@ -550,8 +544,8 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev)
                        UCODE_ID_CP_ME_MASK |
                        UCODE_ID_CP_PFP_MASK |
                        UCODE_ID_CP_MEC_MASK |
-                       UCODE_ID_CP_MEC_JT1_MASK |
-                       UCODE_ID_CP_MEC_JT2_MASK;
+                       UCODE_ID_CP_MEC_JT1_MASK;
+
 
        if (iceland_send_msg_to_smc_with_parameter_without_waiting(adev, PPSMC_MSG_LoadUcodes, fw_to_load)) {
                DRM_ERROR("Fail to request SMU load ucode\n");
index 7e9154c7f1dbbb7f9d3eee6791c0e4fcc835ea8d..654d76723bc39d8004b461c7c51d401384d2ee20 100644 (file)
@@ -2859,11 +2859,11 @@ static int kv_dpm_init(struct amdgpu_device *adev)
        pi->voltage_drop_t = 0;
        pi->caps_sclk_throttle_low_notification = false;
        pi->caps_fps = false; /* true? */
-       pi->caps_uvd_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_UVD) ? true : false;
+       pi->caps_uvd_pg = (adev->pg_flags & AMD_PG_SUPPORT_UVD) ? true : false;
        pi->caps_uvd_dpm = true;
-       pi->caps_vce_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_VCE) ? true : false;
-       pi->caps_samu_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_SAMU) ? true : false;
-       pi->caps_acp_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_ACP) ? true : false;
+       pi->caps_vce_pg = (adev->pg_flags & AMD_PG_SUPPORT_VCE) ? true : false;
+       pi->caps_samu_pg = (adev->pg_flags & AMD_PG_SUPPORT_SAMU) ? true : false;
+       pi->caps_acp_pg = (adev->pg_flags & AMD_PG_SUPPORT_ACP) ? true : false;
        pi->caps_stable_p_state = false;
 
        ret = kv_parse_sys_info_table(adev);
index f4a1346525febf9c9a463e32525c635f70873a93..0497784b36523086a029bb5a08cd977c9a9f6372 100644 (file)
@@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle)
 
 static int tonga_dpm_suspend(void *handle)
 {
-       return 0;
+       return tonga_dpm_hw_fini(handle);
 }
 
 static int tonga_dpm_resume(void *handle)
 {
-       int ret;
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       mutex_lock(&adev->pm.mutex);
-
-       ret = tonga_smu_start(adev);
-       if (ret) {
-               DRM_ERROR("SMU start failed\n");
-               goto fail;
-       }
-
-fail:
-       mutex_unlock(&adev->pm.mutex);
-       return ret;
+       return tonga_dpm_hw_init(handle);
 }
 
 static int tonga_dpm_set_clockgating_state(void *handle,
index 5e9f73af83a8431b25d6d153132df3a22ce31e31..fbd3767671bb9266d68ac6229a41194aa5a184ac 100644 (file)
@@ -611,7 +611,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
 {
        u32 orig, data;
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG)) {
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
                data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
                data = 0xfff;
                WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
@@ -830,6 +830,9 @@ static int uvd_v4_2_set_clockgating_state(void *handle,
        bool gate = false;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
+               return 0;
+
        if (state == AMD_CG_STATE_GATE)
                gate = true;
 
@@ -848,7 +851,10 @@ static int uvd_v4_2_set_powergating_state(void *handle,
         * revisit this when there is a cleaner line between
         * the smc and the hw blocks
         */
-        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+               return 0;
 
        if (state == AMD_PG_STATE_GATE) {
                uvd_v4_2_stop(adev);
index 38864f5629814c7782682b4b8436b67e30aede3a..57f1c5bf3bf19a107c2c35d1b9fb653e2d37ac8d 100644 (file)
@@ -774,6 +774,11 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev,
 static int uvd_v5_0_set_clockgating_state(void *handle,
                                          enum amd_clockgating_state state)
 {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
+               return 0;
+
        return 0;
 }
 
@@ -789,6 +794,9 @@ static int uvd_v5_0_set_powergating_state(void *handle,
         */
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+               return 0;
+
        if (state == AMD_PG_STATE_GATE) {
                uvd_v5_0_stop(adev);
                return 0;
index 3d5913926436b6b759169575b9e235bc38f0d0df..0b365b7651ffbac65324701a13da702306e27467 100644 (file)
@@ -532,7 +532,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
        uvd_v6_0_mc_resume(adev);
 
        /* Set dynamic clock gating in S/W control mode */
-       if (adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG) {
+       if (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG) {
                if (adev->flags & AMD_IS_APU)
                        cz_set_uvd_clock_gating_branches(adev, false);
                else
@@ -1000,7 +1000,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 
-       if (!(adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG))
+       if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
                return 0;
 
        if (enable) {
@@ -1030,6 +1030,9 @@ static int uvd_v6_0_set_powergating_state(void *handle,
         */
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+               return 0;
+
        if (state == AMD_PG_STATE_GATE) {
                uvd_v6_0_stop(adev);
                return 0;
index 52ac7a8f1e58b91f3ad0c441b6a175062bddad29..a822edacfa95c979a60cb49eb6e4bb51239e05f0 100644 (file)
@@ -373,7 +373,7 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 {
        bool sw_cg = false;
 
-       if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG)) {
+       if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
                if (sw_cg)
                        vce_v2_0_set_sw_cg(adev, true);
                else
@@ -608,6 +608,9 @@ static int vce_v2_0_set_powergating_state(void *handle,
         */
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
+               return 0;
+
        if (state == AMD_PG_STATE_GATE)
                /* XXX do we need a vce_v2_0_stop()? */
                return 0;
index e99af81e4aec3543278c681418138b12feae4c94..d662fa9f9091a33c51eefa8b0ae5d753229e195b 100644 (file)
@@ -277,7 +277,7 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
                WREG32_P(mmVCE_STATUS, 0, ~1);
 
                /* Set Clock-Gating off */
-               if (adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG)
+               if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
                        vce_v3_0_set_vce_sw_clock_gating(adev, false);
 
                if (r) {
@@ -676,7 +676,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
        bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
        int i;
 
-       if (!(adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG))
+       if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
                return 0;
 
        mutex_lock(&adev->grbm_idx_mutex);
@@ -728,6 +728,9 @@ static int vce_v3_0_set_powergating_state(void *handle,
         */
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
+               return 0;
+
        if (state == AMD_PG_STATE_GATE)
                /* XXX do we need a vce_v3_0_stop()? */
                return 0;
index 652e76644c31cab37fee1964797d890749fbf0a7..0d14d108a6c4adf7a152edcff56cafa4b219cf1a 100644 (file)
@@ -61,6 +61,7 @@
 #include "vi.h"
 #include "vi_dpm.h"
 #include "gmc_v8_0.h"
+#include "gmc_v7_0.h"
 #include "gfx_v8_0.h"
 #include "sdma_v2_4.h"
 #include "sdma_v3_0.h"
@@ -1109,10 +1110,10 @@ static const struct amdgpu_ip_block_version topaz_ip_blocks[] =
        },
        {
                .type = AMD_IP_BLOCK_TYPE_GMC,
-               .major = 8,
-               .minor = 0,
+               .major = 7,
+               .minor = 4,
                .rev = 0,
-               .funcs = &gmc_v8_0_ip_funcs,
+               .funcs = &gmc_v7_0_ip_funcs,
        },
        {
                .type = AMD_IP_BLOCK_TYPE_IH,
@@ -1442,8 +1443,7 @@ static int vi_common_early_init(void *handle)
                break;
        case CHIP_FIJI:
                adev->has_uvd = true;
-               adev->cg_flags = AMDGPU_CG_SUPPORT_UVD_MGCG |
-                               AMDGPU_CG_SUPPORT_VCE_MGCG;
+               adev->cg_flags = 0;
                adev->pg_flags = 0;
                adev->external_rev_id = adev->rev_id + 0x3c;
                break;
@@ -1457,8 +1457,7 @@ static int vi_common_early_init(void *handle)
        case CHIP_STONEY:
                adev->has_uvd = true;
                adev->cg_flags = 0;
-               /* Disable UVD pg */
-               adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
+               adev->pg_flags = 0;
                adev->external_rev_id = adev->rev_id + 0x1;
                break;
        default:
index 9be007081b72a8b5c75b4baf1fe01e92bfe792bc..a902ae037398389cf6e19c1ab7379ca64e6a46ed 100644 (file)
@@ -194,7 +194,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 
        kfree(p);
 
-       kfree((void *)work);
+       kfree(work);
 }
 
 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
index 1195d06f55bc491930f434684324a6618f22d087..dbf7e6413cab4207940659b81ceb02095b4ae515 100644 (file)
@@ -85,6 +85,38 @@ enum amd_powergating_state {
        AMD_PG_STATE_UNGATE,
 };
 
+/* CG flags */
+#define AMD_CG_SUPPORT_GFX_MGCG                        (1 << 0)
+#define AMD_CG_SUPPORT_GFX_MGLS                        (1 << 1)
+#define AMD_CG_SUPPORT_GFX_CGCG                        (1 << 2)
+#define AMD_CG_SUPPORT_GFX_CGLS                        (1 << 3)
+#define AMD_CG_SUPPORT_GFX_CGTS                        (1 << 4)
+#define AMD_CG_SUPPORT_GFX_CGTS_LS             (1 << 5)
+#define AMD_CG_SUPPORT_GFX_CP_LS               (1 << 6)
+#define AMD_CG_SUPPORT_GFX_RLC_LS              (1 << 7)
+#define AMD_CG_SUPPORT_MC_LS                   (1 << 8)
+#define AMD_CG_SUPPORT_MC_MGCG                 (1 << 9)
+#define AMD_CG_SUPPORT_SDMA_LS                 (1 << 10)
+#define AMD_CG_SUPPORT_SDMA_MGCG               (1 << 11)
+#define AMD_CG_SUPPORT_BIF_LS                  (1 << 12)
+#define AMD_CG_SUPPORT_UVD_MGCG                        (1 << 13)
+#define AMD_CG_SUPPORT_VCE_MGCG                        (1 << 14)
+#define AMD_CG_SUPPORT_HDP_LS                  (1 << 15)
+#define AMD_CG_SUPPORT_HDP_MGCG                        (1 << 16)
+
+/* PG flags */
+#define AMD_PG_SUPPORT_GFX_PG                  (1 << 0)
+#define AMD_PG_SUPPORT_GFX_SMG                 (1 << 1)
+#define AMD_PG_SUPPORT_GFX_DMG                 (1 << 2)
+#define AMD_PG_SUPPORT_UVD                     (1 << 3)
+#define AMD_PG_SUPPORT_VCE                     (1 << 4)
+#define AMD_PG_SUPPORT_CP                      (1 << 5)
+#define AMD_PG_SUPPORT_GDS                     (1 << 6)
+#define AMD_PG_SUPPORT_RLC_SMU_HS              (1 << 7)
+#define AMD_PG_SUPPORT_SDMA                    (1 << 8)
+#define AMD_PG_SUPPORT_ACP                     (1 << 9)
+#define AMD_PG_SUPPORT_SAMU                    (1 << 10)
+
 enum amd_pm_state_type {
        /* not used for dpm */
        POWER_STATE_TYPE_DEFAULT,
index 713aec95469271e7a678b73ab917f9a49258455e..aec38fc3834f9d867b4477c2f1d1d1166285f1fe 100644 (file)
@@ -109,6 +109,8 @@ enum cgs_system_info_id {
        CGS_SYSTEM_INFO_ADAPTER_BDF_ID = 1,
        CGS_SYSTEM_INFO_PCIE_GEN_INFO,
        CGS_SYSTEM_INFO_PCIE_MLW,
+       CGS_SYSTEM_INFO_CG_FLAGS,
+       CGS_SYSTEM_INFO_PG_FLAGS,
        CGS_SYSTEM_INFO_ID_MAXIMUM,
 };
 
index 8f5d5edcf193b67a67f6990824f8ef8260e1afa5..aa67244a77ae38cf69761bc964c5f1d4400874c5 100644 (file)
@@ -64,6 +64,11 @@ static int pp_sw_init(void *handle)
        if (ret == 0)
                ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 
+       if (ret)
+               printk("amdgpu: powerplay initialization failed\n");
+       else
+               printk("amdgpu: powerplay initialized\n");
+
        return ret;
 }
 
index 52a3efc97f05651bfb0daeaf475d5a4bfadbd43f..46410e3c73493acce741f955f60790fc9745256e 100644 (file)
@@ -31,7 +31,7 @@
 static int pem_init(struct pp_eventmgr *eventmgr)
 {
        int result = 0;
-       struct pem_event_data event_data;
+       struct pem_event_data event_data = { {0} };
 
        /* Initialize PowerPlay feature info */
        pem_init_feature_info(eventmgr);
@@ -52,7 +52,7 @@ static int pem_init(struct pp_eventmgr *eventmgr)
 
 static void pem_fini(struct pp_eventmgr *eventmgr)
 {
-       struct pem_event_data event_data;
+       struct pem_event_data event_data = { {0} };
 
        pem_uninit_featureInfo(eventmgr);
        pem_unregister_interrupts(eventmgr);
index 0874ab42ee95e7e83d4a027639b97a1a45a35890..cf01177ca3b5e36eb845fe187f13b7135cde03b6 100644 (file)
@@ -174,6 +174,8 @@ static int cz_initialize_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
        struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
        uint32_t i;
+       struct cgs_system_info sys_info = {0};
+       int result;
 
        cz_hwmgr->gfx_ramp_step = 256*25/100;
 
@@ -247,6 +249,22 @@ static int cz_initialize_dpm_defaults(struct pp_hwmgr *hwmgr)
        phm_cap_set(hwmgr->platform_descriptor.platformCaps,
                                   PHM_PlatformCaps_DisableVoltageIsland);
 
+       phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+                     PHM_PlatformCaps_UVDPowerGating);
+       phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+                     PHM_PlatformCaps_VCEPowerGating);
+       sys_info.size = sizeof(struct cgs_system_info);
+       sys_info.info_id = CGS_SYSTEM_INFO_PG_FLAGS;
+       result = cgs_query_system_info(hwmgr->device, &sys_info);
+       if (!result) {
+               if (sys_info.value & AMD_PG_SUPPORT_UVD)
+                       phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+                                     PHM_PlatformCaps_UVDPowerGating);
+               if (sys_info.value & AMD_PG_SUPPORT_VCE)
+                       phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+                                     PHM_PlatformCaps_VCEPowerGating);
+       }
+
        return 0;
 }
 
index 44a925006479176617c54a5fa3c33a1eb6b92bad..980d3bf8ea768e161ffbba4cde1b960ea46e7819 100644 (file)
@@ -4451,6 +4451,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
        pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
        struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable);
        phw_tonga_ulv_parm *ulv;
+       struct cgs_system_info sys_info = {0};
 
        PP_ASSERT_WITH_CODE((NULL != hwmgr),
                "Invalid Parameter!", return -1;);
@@ -4615,9 +4616,23 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
        data->vddc_phase_shed_control = 0;
 
-       if (0 == result) {
-               struct cgs_system_info sys_info = {0};
+       phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+                     PHM_PlatformCaps_UVDPowerGating);
+       phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+                     PHM_PlatformCaps_VCEPowerGating);
+       sys_info.size = sizeof(struct cgs_system_info);
+       sys_info.info_id = CGS_SYSTEM_INFO_PG_FLAGS;
+       result = cgs_query_system_info(hwmgr->device, &sys_info);
+       if (!result) {
+               if (sys_info.value & AMD_PG_SUPPORT_UVD)
+                       phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+                                     PHM_PlatformCaps_UVDPowerGating);
+               if (sys_info.value & AMD_PG_SUPPORT_VCE)
+                       phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+                                     PHM_PlatformCaps_VCEPowerGating);
+       }
 
+       if (0 == result) {
                data->is_tlu_enabled = 0;
                hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
                        TONGA_MAX_HARDWARE_POWERLEVELS;
index 873a8d264d5c6aa7af31322fd5818dc1f46c7317..ec222c665602df7113cb4ec75b3fc2633d86a8a4 100644 (file)
@@ -272,6 +272,9 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
                                UCODE_ID_CP_MEC_JT1_MASK |
                                UCODE_ID_CP_MEC_JT2_MASK;
 
+       if (smumgr->chip_id == CHIP_STONEY)
+               fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
        cz_request_smu_load_fw(smumgr);
        cz_check_fw_load_finish(smumgr, fw_to_check);
 
@@ -282,7 +285,7 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
        return ret;
 }
 
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
                        enum cz_scratch_entry firmware_enum)
 {
        uint8_t ret = 0;
@@ -292,7 +295,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_SDMA0;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
-               ret = UCODE_ID_SDMA1;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_SDMA0;
+               else
+                       ret = UCODE_ID_SDMA1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
                ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_CP_MEC_JT1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
-               ret = UCODE_ID_CP_MEC_JT2;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_CP_MEC_JT1;
+               else
+                       ret = UCODE_ID_CP_MEC_JT2;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
                ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@ static int cz_smu_populate_single_scratch_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = type;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@ static int cz_smu_populate_single_ucode_load_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = TASK_TYPE_UCODE_LOAD;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
 
@@ -551,7 +566,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
 
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@ static int cz_smu_populate_firmware_entries(struct pp_smumgr *smumgr)
 
        for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
 
-               firmware_type = cz_translate_firmware_enum_to_arg(
+               firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
                                        firmware_list[i]);
 
                ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
index 3f74193885f1ff1df95db09fa76e01174f6353b4..9a7b44616b552920b6ef8f62553bd791d36af935 100644 (file)
@@ -65,8 +65,6 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
         */
        state->allow_modeset = true;
 
-       state->num_connector = ACCESS_ONCE(dev->mode_config.num_connector);
-
        state->crtcs = kcalloc(dev->mode_config.num_crtc,
                               sizeof(*state->crtcs), GFP_KERNEL);
        if (!state->crtcs)
@@ -83,16 +81,6 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
                                      sizeof(*state->plane_states), GFP_KERNEL);
        if (!state->plane_states)
                goto fail;
-       state->connectors = kcalloc(state->num_connector,
-                                   sizeof(*state->connectors),
-                                   GFP_KERNEL);
-       if (!state->connectors)
-               goto fail;
-       state->connector_states = kcalloc(state->num_connector,
-                                         sizeof(*state->connector_states),
-                                         GFP_KERNEL);
-       if (!state->connector_states)
-               goto fail;
 
        state->dev = dev;
 
@@ -823,19 +811,27 @@ drm_atomic_get_connector_state(struct drm_atomic_state *state,
 
        index = drm_connector_index(connector);
 
-       /*
-        * Construction of atomic state updates can race with a connector
-        * hot-add which might overflow. In this case flip the table and just
-        * restart the entire ioctl - no one is fast enough to livelock a cpu
-        * with physical hotplug events anyway.
-        *
-        * Note that we only grab the indexes once we have the right lock to
-        * prevent hotplug/unplugging of connectors. So removal is no problem,
-        * at most the array is a bit too large.
-        */
        if (index >= state->num_connector) {
-               DRM_DEBUG_ATOMIC("Hot-added connector would overflow state array, restarting\n");
-               return ERR_PTR(-EAGAIN);
+               struct drm_connector **c;
+               struct drm_connector_state **cs;
+               int alloc = max(index + 1, config->num_connector);
+
+               c = krealloc(state->connectors, alloc * sizeof(*state->connectors), GFP_KERNEL);
+               if (!c)
+                       return ERR_PTR(-ENOMEM);
+
+               state->connectors = c;
+               memset(&state->connectors[state->num_connector], 0,
+                      sizeof(*state->connectors) * (alloc - state->num_connector));
+
+               cs = krealloc(state->connector_states, alloc * sizeof(*state->connector_states), GFP_KERNEL);
+               if (!cs)
+                       return ERR_PTR(-ENOMEM);
+
+               state->connector_states = cs;
+               memset(&state->connector_states[state->num_connector], 0,
+                      sizeof(*state->connector_states) * (alloc - state->num_connector));
+               state->num_connector = alloc;
        }
 
        if (state->connector_states[index])
index 57cccd68ca522548c6c098ee72798a5b3369def0..4f2d3e161593057bfff2b046c1b1f30a3cc90d41 100644 (file)
@@ -946,9 +946,23 @@ static void wait_for_fences(struct drm_device *dev,
        }
 }
 
-static bool framebuffer_changed(struct drm_device *dev,
-                               struct drm_atomic_state *old_state,
-                               struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update.  This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc)
 {
        struct drm_plane *plane;
        struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@ static bool framebuffer_changed(struct drm_device *dev,
 
        return false;
 }
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
 
 /**
  * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                if (old_state->legacy_cursor_update)
                        continue;
 
-               if (!framebuffer_changed(dev, old_state, crtc))
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
                        continue;
 
                ret = drm_crtc_vblank_get(crtc);
@@ -1477,7 +1493,7 @@ void drm_atomic_helper_swap_state(struct drm_device *dev,
 {
        int i;
 
-       for (i = 0; i < dev->mode_config.num_connector; i++) {
+       for (i = 0; i < state->num_connector; i++) {
                struct drm_connector *connector = state->connectors[i];
 
                if (!connector)
index d40bab29747edb0ea989afd81b0e5b5102dfb555..f6191215b2cbc97c11fbfda5eeadb9f61b2387e8 100644 (file)
@@ -918,12 +918,19 @@ int drm_connector_init(struct drm_device *dev,
        connector->base.properties = &connector->properties;
        connector->dev = dev;
        connector->funcs = funcs;
+
+       connector->connector_id = ida_simple_get(&config->connector_ida, 0, 0, GFP_KERNEL);
+       if (connector->connector_id < 0) {
+               ret = connector->connector_id;
+               goto out_put;
+       }
+
        connector->connector_type = connector_type;
        connector->connector_type_id =
                ida_simple_get(connector_ida, 1, 0, GFP_KERNEL);
        if (connector->connector_type_id < 0) {
                ret = connector->connector_type_id;
-               goto out_put;
+               goto out_put_id;
        }
        connector->name =
                kasprintf(GFP_KERNEL, "%s-%d",
@@ -931,7 +938,7 @@ int drm_connector_init(struct drm_device *dev,
                          connector->connector_type_id);
        if (!connector->name) {
                ret = -ENOMEM;
-               goto out_put;
+               goto out_put_type_id;
        }
 
        INIT_LIST_HEAD(&connector->probed_modes);
@@ -959,7 +966,12 @@ int drm_connector_init(struct drm_device *dev,
        }
 
        connector->debugfs_entry = NULL;
-
+out_put_type_id:
+       if (ret)
+               ida_remove(connector_ida, connector->connector_type_id);
+out_put_id:
+       if (ret)
+               ida_remove(&config->connector_ida, connector->connector_id);
 out_put:
        if (ret)
                drm_mode_object_put(dev, &connector->base);
@@ -996,6 +1008,9 @@ void drm_connector_cleanup(struct drm_connector *connector)
        ida_remove(&drm_connector_enum_list[connector->connector_type].ida,
                   connector->connector_type_id);
 
+       ida_remove(&dev->mode_config.connector_ida,
+                  connector->connector_id);
+
        kfree(connector->display_info.bus_formats);
        drm_mode_object_put(dev, &connector->base);
        kfree(connector->name);
@@ -1012,32 +1027,6 @@ void drm_connector_cleanup(struct drm_connector *connector)
 }
 EXPORT_SYMBOL(drm_connector_cleanup);
 
-/**
- * drm_connector_index - find the index of a registered connector
- * @connector: connector to find index for
- *
- * Given a registered connector, return the index of that connector within a DRM
- * device's list of connectors.
- */
-unsigned int drm_connector_index(struct drm_connector *connector)
-{
-       unsigned int index = 0;
-       struct drm_connector *tmp;
-       struct drm_mode_config *config = &connector->dev->mode_config;
-
-       WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
-
-       drm_for_each_connector(tmp, connector->dev) {
-               if (tmp == connector)
-                       return index;
-
-               index++;
-       }
-
-       BUG();
-}
-EXPORT_SYMBOL(drm_connector_index);
-
 /**
  * drm_connector_register - register a connector
  * @connector: the connector to register
@@ -5789,6 +5778,7 @@ void drm_mode_config_init(struct drm_device *dev)
        INIT_LIST_HEAD(&dev->mode_config.plane_list);
        idr_init(&dev->mode_config.crtc_idr);
        idr_init(&dev->mode_config.tile_idr);
+       ida_init(&dev->mode_config.connector_ida);
 
        drm_modeset_lock_all(dev);
        drm_mode_create_standard_properties(dev);
@@ -5869,6 +5859,7 @@ void drm_mode_config_cleanup(struct drm_device *dev)
                crtc->funcs->destroy(crtc);
        }
 
+       ida_destroy(&dev->mode_config.connector_ida);
        idr_destroy(&dev->mode_config.tile_idr);
        idr_destroy(&dev->mode_config.crtc_idr);
        drm_modeset_lock_fini(&dev->mode_config.connection_mutex);
index 6ed90a2437e50438c713fc028f28705e7954b806..27fbd79d0daf0003be2014cee00c9dcdb49fdb43 100644 (file)
@@ -803,12 +803,33 @@ static struct drm_dp_mst_branch *drm_dp_add_mst_branch_device(u8 lct, u8 *rad)
        return mstb;
 }
 
+static void drm_dp_free_mst_port(struct kref *kref);
+
+static void drm_dp_free_mst_branch_device(struct kref *kref)
+{
+       struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
+       if (mstb->port_parent) {
+               if (list_empty(&mstb->port_parent->next))
+                       kref_put(&mstb->port_parent->kref, drm_dp_free_mst_port);
+       }
+       kfree(mstb);
+}
+
 static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 {
        struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
        struct drm_dp_mst_port *port, *tmp;
        bool wake_tx = false;
 
+       /*
+        * init kref again to be used by ports to remove mst branch when it is
+        * not needed anymore
+        */
+       kref_init(kref);
+
+       if (mstb->port_parent && list_empty(&mstb->port_parent->next))
+               kref_get(&mstb->port_parent->kref);
+
        /*
         * destroy all ports - don't need lock
         * as there are no more references to the mst branch
@@ -835,7 +856,8 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 
        if (wake_tx)
                wake_up(&mstb->mgr->tx_waitq);
-       kfree(mstb);
+
+       kref_put(kref, drm_dp_free_mst_branch_device);
 }
 
 static void drm_dp_put_mst_branch_device(struct drm_dp_mst_branch *mstb)
@@ -883,6 +905,7 @@ static void drm_dp_destroy_port(struct kref *kref)
                         * from an EDID retrieval */
 
                        mutex_lock(&mgr->destroy_connector_lock);
+                       kref_get(&port->parent->kref);
                        list_add(&port->next, &mgr->destroy_connector_list);
                        mutex_unlock(&mgr->destroy_connector_lock);
                        schedule_work(&mgr->destroy_connector_work);
@@ -1018,18 +1041,27 @@ static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port)
        return send_link;
 }
 
-static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb,
-                                  struct drm_dp_mst_port *port)
+static void drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid)
 {
        int ret;
-       if (port->dpcd_rev >= 0x12) {
-               port->guid_valid = drm_dp_validate_guid(mstb->mgr, port->guid);
-               if (!port->guid_valid) {
-                       ret = drm_dp_send_dpcd_write(mstb->mgr,
-                                                    port,
-                                                    DP_GUID,
-                                                    16, port->guid);
-                       port->guid_valid = true;
+
+       memcpy(mstb->guid, guid, 16);
+
+       if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) {
+               if (mstb->port_parent) {
+                       ret = drm_dp_send_dpcd_write(
+                                       mstb->mgr,
+                                       mstb->port_parent,
+                                       DP_GUID,
+                                       16,
+                                       mstb->guid);
+               } else {
+
+                       ret = drm_dp_dpcd_write(
+                                       mstb->mgr->aux,
+                                       DP_GUID,
+                                       mstb->guid,
+                                       16);
                }
        }
 }
@@ -1086,7 +1118,6 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
        port->dpcd_rev = port_msg->dpcd_revision;
        port->num_sdp_streams = port_msg->num_sdp_streams;
        port->num_sdp_stream_sinks = port_msg->num_sdp_stream_sinks;
-       memcpy(port->guid, port_msg->peer_guid, 16);
 
        /* manage mstb port lists with mgr lock - take a reference
           for this list */
@@ -1099,11 +1130,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
 
        if (old_ddps != port->ddps) {
                if (port->ddps) {
-                       drm_dp_check_port_guid(mstb, port);
                        if (!port->input)
                                drm_dp_send_enum_path_resources(mstb->mgr, mstb, port);
                } else {
-                       port->guid_valid = false;
                        port->available_pbn = 0;
                        }
        }
@@ -1162,10 +1191,8 @@ static void drm_dp_update_port(struct drm_dp_mst_branch *mstb,
 
        if (old_ddps != port->ddps) {
                if (port->ddps) {
-                       drm_dp_check_port_guid(mstb, port);
                        dowork = true;
                } else {
-                       port->guid_valid = false;
                        port->available_pbn = 0;
                }
        }
@@ -1222,13 +1249,14 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper(
        struct drm_dp_mst_branch *found_mstb;
        struct drm_dp_mst_port *port;
 
+       if (memcmp(mstb->guid, guid, 16) == 0)
+               return mstb;
+
+
        list_for_each_entry(port, &mstb->ports, next) {
                if (!port->mstb)
                        continue;
 
-               if (port->guid_valid && memcmp(port->guid, guid, 16) == 0)
-                       return port->mstb;
-
                found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid);
 
                if (found_mstb)
@@ -1247,10 +1275,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid(
        /* find the port by iterating down */
        mutex_lock(&mgr->lock);
 
-       if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0)
-               mstb = mgr->mst_primary;
-       else
-               mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
+       mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
 
        if (mstb)
                kref_get(&mstb->kref);
@@ -1555,6 +1580,9 @@ static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
                                       txmsg->reply.u.link_addr.ports[i].num_sdp_streams,
                                       txmsg->reply.u.link_addr.ports[i].num_sdp_stream_sinks);
                        }
+
+                       drm_dp_check_mstb_guid(mstb, txmsg->reply.u.link_addr.guid);
+
                        for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) {
                                drm_dp_add_port(mstb, mgr->dev, &txmsg->reply.u.link_addr.ports[i]);
                        }
@@ -1602,6 +1630,37 @@ static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
        return 0;
 }
 
+static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb)
+{
+       if (!mstb->port_parent)
+               return NULL;
+
+       if (mstb->port_parent->mstb != mstb)
+               return mstb->port_parent;
+
+       return drm_dp_get_last_connected_port_to_mstb(mstb->port_parent->parent);
+}
+
+static struct drm_dp_mst_branch *drm_dp_get_last_connected_port_and_mstb(struct drm_dp_mst_topology_mgr *mgr,
+                                                                        struct drm_dp_mst_branch *mstb,
+                                                                        int *port_num)
+{
+       struct drm_dp_mst_branch *rmstb = NULL;
+       struct drm_dp_mst_port *found_port;
+       mutex_lock(&mgr->lock);
+       if (mgr->mst_primary) {
+               found_port = drm_dp_get_last_connected_port_to_mstb(mstb);
+
+               if (found_port) {
+                       rmstb = found_port->parent;
+                       kref_get(&rmstb->kref);
+                       *port_num = found_port->port_num;
+               }
+       }
+       mutex_unlock(&mgr->lock);
+       return rmstb;
+}
+
 static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
                                   struct drm_dp_mst_port *port,
                                   int id,
@@ -1609,13 +1668,18 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 {
        struct drm_dp_sideband_msg_tx *txmsg;
        struct drm_dp_mst_branch *mstb;
-       int len, ret;
+       int len, ret, port_num;
        u8 sinks[DRM_DP_MAX_SDP_STREAMS];
        int i;
 
+       port_num = port->port_num;
        mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent);
-       if (!mstb)
-               return -EINVAL;
+       if (!mstb) {
+               mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num);
+
+               if (!mstb)
+                       return -EINVAL;
+       }
 
        txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
        if (!txmsg) {
@@ -1627,7 +1691,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
                sinks[i] = i;
 
        txmsg->dst = mstb;
-       len = build_allocate_payload(txmsg, port->port_num,
+       len = build_allocate_payload(txmsg, port_num,
                                     id,
                                     pbn, port->num_sdp_streams, sinks);
 
@@ -1983,31 +2047,17 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
                mgr->mst_primary = mstb;
                kref_get(&mgr->mst_primary->kref);
 
-               {
-                       struct drm_dp_payload reset_pay;
-                       reset_pay.start_slot = 0;
-                       reset_pay.num_slots = 0x3f;
-                       drm_dp_dpcd_write_payload(mgr, 0, &reset_pay);
-               }
-
                ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
-                                        DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
+                                                        DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
                if (ret < 0) {
                        goto out_unlock;
                }
 
-
-               /* sort out guid */
-               ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, mgr->guid, 16);
-               if (ret != 16) {
-                       DRM_DEBUG_KMS("failed to read DP GUID %d\n", ret);
-                       goto out_unlock;
-               }
-
-               mgr->guid_valid = drm_dp_validate_guid(mgr, mgr->guid);
-               if (!mgr->guid_valid) {
-                       ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, mgr->guid, 16);
-                       mgr->guid_valid = true;
+               {
+                       struct drm_dp_payload reset_pay;
+                       reset_pay.start_slot = 0;
+                       reset_pay.num_slots = 0x3f;
+                       drm_dp_dpcd_write_payload(mgr, 0, &reset_pay);
                }
 
                queue_work(system_long_wq, &mgr->work);
@@ -2231,6 +2281,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
                        }
 
                        drm_dp_update_port(mstb, &msg.u.conn_stat);
+
                        DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type);
                        (*mgr->cbs->hotplug)(mgr);
 
@@ -2446,6 +2497,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp
                DRM_DEBUG_KMS("payload: vcpi %d already allocated for pbn %d - requested pbn %d\n", port->vcpi.vcpi, port->vcpi.pbn, pbn);
                if (pbn == port->vcpi.pbn) {
                        *slots = port->vcpi.num_slots;
+                       drm_dp_put_port(port);
                        return true;
                }
        }
@@ -2605,32 +2657,31 @@ EXPORT_SYMBOL(drm_dp_check_act_status);
  */
 int drm_dp_calc_pbn_mode(int clock, int bpp)
 {
-       fixed20_12 pix_bw;
-       fixed20_12 fbpp;
-       fixed20_12 result;
-       fixed20_12 margin, tmp;
-       u32 res;
-
-       pix_bw.full = dfixed_const(clock);
-       fbpp.full = dfixed_const(bpp);
-       tmp.full = dfixed_const(8);
-       fbpp.full = dfixed_div(fbpp, tmp);
-
-       result.full = dfixed_mul(pix_bw, fbpp);
-       margin.full = dfixed_const(54);
-       tmp.full = dfixed_const(64);
-       margin.full = dfixed_div(margin, tmp);
-       result.full = dfixed_div(result, margin);
-
-       margin.full = dfixed_const(1006);
-       tmp.full = dfixed_const(1000);
-       margin.full = dfixed_div(margin, tmp);
-       result.full = dfixed_mul(result, margin);
-
-       result.full = dfixed_div(result, tmp);
-       result.full = dfixed_ceil(result);
-       res = dfixed_trunc(result);
-       return res;
+       u64 kbps;
+       s64 peak_kbps;
+       u32 numerator;
+       u32 denominator;
+
+       kbps = clock * bpp;
+
+       /*
+        * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+        * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
+        * common multiplier to render an integer PBN for all link rate/lane
+        * counts combinations
+        * calculate
+        * peak_kbps *= (1006/1000)
+        * peak_kbps *= (64/54)
+        * peak_kbps *= 8    convert to bytes
+        */
+
+       numerator = 64 * 1006;
+       denominator = 54 * 8 * 1000 * 1000;
+
+       kbps *= numerator;
+       peak_kbps = drm_fixp_from_fraction(kbps, denominator);
+
+       return drm_fixp2int_ceil(peak_kbps);
 }
 EXPORT_SYMBOL(drm_dp_calc_pbn_mode);
 
@@ -2638,11 +2689,23 @@ static int test_calc_pbn_mode(void)
 {
        int ret;
        ret = drm_dp_calc_pbn_mode(154000, 30);
-       if (ret != 689)
+       if (ret != 689) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               154000, 30, 689, ret);
                return -EINVAL;
+       }
        ret = drm_dp_calc_pbn_mode(234000, 30);
-       if (ret != 1047)
+       if (ret != 1047) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               234000, 30, 1047, ret);
+               return -EINVAL;
+       }
+       ret = drm_dp_calc_pbn_mode(297000, 24);
+       if (ret != 1063) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               297000, 24, 1063, ret);
                return -EINVAL;
+       }
        return 0;
 }
 
@@ -2783,6 +2846,13 @@ static void drm_dp_tx_work(struct work_struct *work)
        mutex_unlock(&mgr->qlock);
 }
 
+static void drm_dp_free_mst_port(struct kref *kref)
+{
+       struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref);
+       kref_put(&port->parent->kref, drm_dp_free_mst_branch_device);
+       kfree(port);
+}
+
 static void drm_dp_destroy_connector_work(struct work_struct *work)
 {
        struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
@@ -2803,13 +2873,22 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
                list_del(&port->next);
                mutex_unlock(&mgr->destroy_connector_lock);
 
+               kref_init(&port->kref);
+               INIT_LIST_HEAD(&port->next);
+
                mgr->cbs->destroy_connector(mgr, port->connector);
 
                drm_dp_port_teardown_pdt(port, port->pdt);
 
-               if (!port->input && port->vcpi.vcpi > 0)
-                       drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-               kfree(port);
+               if (!port->input && port->vcpi.vcpi > 0) {
+                       if (mgr->mst_state) {
+                               drm_dp_mst_reset_vcpi_slots(mgr, port);
+                               drm_dp_update_payload_part1(mgr);
+                               drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
+                       }
+               }
+
+               kref_put(&port->kref, drm_dp_free_mst_port);
                send_hotplug = true;
        }
        if (send_hotplug)
@@ -2847,6 +2926,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
        mgr->max_dpcd_transaction_bytes = max_dpcd_transaction_bytes;
        mgr->max_payloads = max_payloads;
        mgr->conn_base_id = conn_base_id;
+       if (max_payloads + 1 > sizeof(mgr->payload_mask) * 8 ||
+           max_payloads + 1 > sizeof(mgr->vcpi_mask) * 8)
+               return -EINVAL;
        mgr->payloads = kcalloc(max_payloads, sizeof(struct drm_dp_payload), GFP_KERNEL);
        if (!mgr->payloads)
                return -ENOMEM;
@@ -2854,7 +2936,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
        if (!mgr->proposed_vcpis)
                return -ENOMEM;
        set_bit(0, &mgr->payload_mask);
-       test_calc_pbn_mode();
+       if (test_calc_pbn_mode() < 0)
+               DRM_ERROR("MST PBN self-test failed\n");
+
        return 0;
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init);
index c3b80fd65d6254e89caf3381529f673764286115..7b30b307674ba6d1b71065a6923758ea251d05b2 100644 (file)
@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item);
 void drm_ht_remove(struct drm_open_hash *ht)
 {
        if (ht->table) {
-               if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order)
-                       kfree(ht->table);
-               else
-                       vfree(ht->table);
+               kvfree(ht->table);
                ht->table = NULL;
        }
 }
index d12a4efa651b015179f776e1fc056da96dc1faf0..1fe14579e8c941b002c5362f7c69936c85cf855b 100644 (file)
@@ -224,6 +224,64 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
                diff = (flags & DRM_CALLED_FROM_VBLIRQ) != 0;
        }
 
+       /*
+        * Within a drm_vblank_pre_modeset - drm_vblank_post_modeset
+        * interval? If so then vblank irqs keep running and it will likely
+        * happen that the hardware vblank counter is not trustworthy as it
+        * might reset at some point in that interval and vblank timestamps
+        * are not trustworthy either in that interval. Iow. this can result
+        * in a bogus diff >> 1 which must be avoided as it would cause
+        * random large forward jumps of the software vblank counter.
+        */
+       if (diff > 1 && (vblank->inmodeset & 0x2)) {
+               DRM_DEBUG_VBL("clamping vblank bump to 1 on crtc %u: diffr=%u"
+                             " due to pre-modeset.\n", pipe, diff);
+               diff = 1;
+       }
+
+       /*
+        * FIMXE: Need to replace this hack with proper seqlocks.
+        *
+        * Restrict the bump of the software vblank counter to a safe maximum
+        * value of +1 whenever there is the possibility that concurrent readers
+        * of vblank timestamps could be active at the moment, as the current
+        * implementation of the timestamp caching and updating is not safe
+        * against concurrent readers for calls to store_vblank() with a bump
+        * of anything but +1. A bump != 1 would very likely return corrupted
+        * timestamps to userspace, because the same slot in the cache could
+        * be concurrently written by store_vblank() and read by one of those
+        * readers without the read-retry logic detecting the collision.
+        *
+        * Concurrent readers can exist when we are called from the
+        * drm_vblank_off() or drm_vblank_on() functions and other non-vblank-
+        * irq callers. However, all those calls to us are happening with the
+        * vbl_lock locked to prevent drm_vblank_get(), so the vblank refcount
+        * can't increase while we are executing. Therefore a zero refcount at
+        * this point is safe for arbitrary counter bumps if we are called
+        * outside vblank irq, a non-zero count is not 100% safe. Unfortunately
+        * we must also accept a refcount of 1, as whenever we are called from
+        * drm_vblank_get() -> drm_vblank_enable() the refcount will be 1 and
+        * we must let that one pass through in order to not lose vblank counts
+        * during vblank irq off - which would completely defeat the whole
+        * point of this routine.
+        *
+        * Whenever we are called from vblank irq, we have to assume concurrent
+        * readers exist or can show up any time during our execution, even if
+        * the refcount is currently zero, as vblank irqs are usually only
+        * enabled due to the presence of readers, and because when we are called
+        * from vblank irq we can't hold the vbl_lock to protect us from sudden
+        * bumps in vblank refcount. Therefore also restrict bumps to +1 when
+        * called from vblank irq.
+        */
+       if ((diff > 1) && (atomic_read(&vblank->refcount) > 1 ||
+           (flags & DRM_CALLED_FROM_VBLIRQ))) {
+               DRM_DEBUG_VBL("clamping vblank bump to 1 on crtc %u: diffr=%u "
+                             "refcount %u, vblirq %u\n", pipe, diff,
+                             atomic_read(&vblank->refcount),
+                             (flags & DRM_CALLED_FROM_VBLIRQ) != 0);
+               diff = 1;
+       }
+
        DRM_DEBUG_VBL("updating vblank count on crtc %u:"
                      " current=%u, diff=%u, hw=%u hw_last=%u\n",
                      pipe, vblank->count, diff, cur_vblank, vblank->last);
@@ -1316,7 +1374,13 @@ void drm_vblank_off(struct drm_device *dev, unsigned int pipe)
        spin_lock_irqsave(&dev->event_lock, irqflags);
 
        spin_lock(&dev->vbl_lock);
-       vblank_disable_and_save(dev, pipe);
+       DRM_DEBUG_VBL("crtc %d, vblank enabled %d, inmodeset %d\n",
+                     pipe, vblank->enabled, vblank->inmodeset);
+
+       /* Avoid redundant vblank disables without previous drm_vblank_on(). */
+       if (drm_core_check_feature(dev, DRIVER_ATOMIC) || !vblank->inmodeset)
+               vblank_disable_and_save(dev, pipe);
+
        wake_up(&vblank->queue);
 
        /*
@@ -1418,6 +1482,9 @@ void drm_vblank_on(struct drm_device *dev, unsigned int pipe)
                return;
 
        spin_lock_irqsave(&dev->vbl_lock, irqflags);
+       DRM_DEBUG_VBL("crtc %d, vblank enabled %d, inmodeset %d\n",
+                     pipe, vblank->enabled, vblank->inmodeset);
+
        /* Drop our private "prevent drm_vblank_get" refcount */
        if (vblank->inmodeset) {
                atomic_dec(&vblank->refcount);
@@ -1430,8 +1497,7 @@ void drm_vblank_on(struct drm_device *dev, unsigned int pipe)
         * re-enable interrupts if there are users left, or the
         * user wishes vblank interrupts to be enabled all the time.
         */
-       if (atomic_read(&vblank->refcount) != 0 ||
-           (!dev->vblank_disable_immediate && drm_vblank_offdelay == 0))
+       if (atomic_read(&vblank->refcount) != 0 || drm_vblank_offdelay == 0)
                WARN_ON(drm_vblank_enable(dev, pipe));
        spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
@@ -1526,6 +1592,7 @@ void drm_vblank_post_modeset(struct drm_device *dev, unsigned int pipe)
        if (vblank->inmodeset) {
                spin_lock_irqsave(&dev->vbl_lock, irqflags);
                dev->vblank_disable_allowed = true;
+               drm_reset_vblank_timestamp(dev, pipe);
                spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 
                if (vblank->inmodeset & 0x2)
index 9e585d51fb7807278aeef68221f186f8f36c4a3c..e881482b5971d16ee2539307f6bf41746fc4dbef 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18379 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -30,15 +30,19 @@ Copyright (C) 2015
 #define ENDIAN_MODE_NO_SWAP                                    0x00000000
 #define ENDIAN_MODE_SWAP_16                                    0x00000001
 #define ENDIAN_MODE_SWAP_32                                    0x00000002
+#define chipModel_GC200                                                0x00000200
 #define chipModel_GC300                                                0x00000300
 #define chipModel_GC320                                                0x00000320
+#define chipModel_GC328                                                0x00000328
 #define chipModel_GC350                                                0x00000350
 #define chipModel_GC355                                                0x00000355
 #define chipModel_GC400                                                0x00000400
 #define chipModel_GC410                                                0x00000410
 #define chipModel_GC420                                                0x00000420
+#define chipModel_GC428                                                0x00000428
 #define chipModel_GC450                                                0x00000450
 #define chipModel_GC500                                                0x00000500
+#define chipModel_GC520                                                0x00000520
 #define chipModel_GC530                                                0x00000530
 #define chipModel_GC600                                                0x00000600
 #define chipModel_GC700                                                0x00000700
@@ -46,9 +50,16 @@ Copyright (C) 2015
 #define chipModel_GC860                                                0x00000860
 #define chipModel_GC880                                                0x00000880
 #define chipModel_GC1000                                       0x00001000
+#define chipModel_GC1500                                       0x00001500
 #define chipModel_GC2000                                       0x00002000
 #define chipModel_GC2100                                       0x00002100
+#define chipModel_GC2200                                       0x00002200
+#define chipModel_GC2500                                       0x00002500
+#define chipModel_GC3000                                       0x00003000
 #define chipModel_GC4000                                       0x00004000
+#define chipModel_GC5000                                       0x00005000
+#define chipModel_GC5200                                       0x00005200
+#define chipModel_GC6400                                       0x00006400
 #define RGBA_BITS_R                                            0x00000001
 #define RGBA_BITS_G                                            0x00000002
 #define RGBA_BITS_B                                            0x00000004
@@ -160,7 +171,7 @@ Copyright (C) 2015
 #define chipMinorFeatures2_UNK8                                        0x00000100
 #define chipMinorFeatures2_UNK9                                        0x00000200
 #define chipMinorFeatures2_UNK10                               0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16                      0x00000800
+#define chipMinorFeatures2_HALTI1                              0x00000800
 #define chipMinorFeatures2_UNK12                               0x00001000
 #define chipMinorFeatures2_UNK13                               0x00002000
 #define chipMinorFeatures2_UNK14                               0x00004000
@@ -189,7 +200,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK5                                        0x00000020
 #define chipMinorFeatures3_UNK6                                        0x00000040
 #define chipMinorFeatures3_UNK7                                        0x00000080
-#define chipMinorFeatures3_UNK8                                        0x00000100
+#define chipMinorFeatures3_FAST_MSAA                           0x00000100
 #define chipMinorFeatures3_UNK9                                        0x00000200
 #define chipMinorFeatures3_BUG_FIXES10                         0x00000400
 #define chipMinorFeatures3_UNK11                               0x00000800
@@ -199,7 +210,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK15                               0x00008000
 #define chipMinorFeatures3_UNK16                               0x00010000
 #define chipMinorFeatures3_UNK17                               0x00020000
-#define chipMinorFeatures3_UNK18                               0x00040000
+#define chipMinorFeatures3_ACE                                 0x00040000
 #define chipMinorFeatures3_UNK19                               0x00080000
 #define chipMinorFeatures3_UNK20                               0x00100000
 #define chipMinorFeatures3_UNK21                               0x00200000
@@ -207,7 +218,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK23                               0x00800000
 #define chipMinorFeatures3_UNK24                               0x01000000
 #define chipMinorFeatures3_UNK25                               0x02000000
-#define chipMinorFeatures3_UNK26                               0x04000000
+#define chipMinorFeatures3_NEW_HZ                              0x04000000
 #define chipMinorFeatures3_UNK27                               0x08000000
 #define chipMinorFeatures3_UNK28                               0x10000000
 #define chipMinorFeatures3_UNK29                               0x20000000
@@ -229,9 +240,9 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK13                               0x00002000
 #define chipMinorFeatures4_UNK14                               0x00004000
 #define chipMinorFeatures4_UNK15                               0x00008000
-#define chipMinorFeatures4_UNK16                               0x00010000
+#define chipMinorFeatures4_HALTI2                              0x00010000
 #define chipMinorFeatures4_UNK17                               0x00020000
-#define chipMinorFeatures4_UNK18                               0x00040000
+#define chipMinorFeatures4_SMALL_MSAA                          0x00040000
 #define chipMinorFeatures4_UNK19                               0x00080000
 #define chipMinorFeatures4_UNK20                               0x00100000
 #define chipMinorFeatures4_UNK21                               0x00200000
@@ -245,5 +256,37 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK29                               0x20000000
 #define chipMinorFeatures4_UNK30                               0x40000000
 #define chipMinorFeatures4_UNK31                               0x80000000
+#define chipMinorFeatures5_UNK0                                        0x00000001
+#define chipMinorFeatures5_UNK1                                        0x00000002
+#define chipMinorFeatures5_UNK2                                        0x00000004
+#define chipMinorFeatures5_UNK3                                        0x00000008
+#define chipMinorFeatures5_UNK4                                        0x00000010
+#define chipMinorFeatures5_UNK5                                        0x00000020
+#define chipMinorFeatures5_UNK6                                        0x00000040
+#define chipMinorFeatures5_UNK7                                        0x00000080
+#define chipMinorFeatures5_UNK8                                        0x00000100
+#define chipMinorFeatures5_HALTI3                              0x00000200
+#define chipMinorFeatures5_UNK10                               0x00000400
+#define chipMinorFeatures5_UNK11                               0x00000800
+#define chipMinorFeatures5_UNK12                               0x00001000
+#define chipMinorFeatures5_UNK13                               0x00002000
+#define chipMinorFeatures5_UNK14                               0x00004000
+#define chipMinorFeatures5_UNK15                               0x00008000
+#define chipMinorFeatures5_UNK16                               0x00010000
+#define chipMinorFeatures5_UNK17                               0x00020000
+#define chipMinorFeatures5_UNK18                               0x00040000
+#define chipMinorFeatures5_UNK19                               0x00080000
+#define chipMinorFeatures5_UNK20                               0x00100000
+#define chipMinorFeatures5_UNK21                               0x00200000
+#define chipMinorFeatures5_UNK22                               0x00400000
+#define chipMinorFeatures5_UNK23                               0x00800000
+#define chipMinorFeatures5_UNK24                               0x01000000
+#define chipMinorFeatures5_UNK25                               0x02000000
+#define chipMinorFeatures5_UNK26                               0x04000000
+#define chipMinorFeatures5_UNK27                               0x08000000
+#define chipMinorFeatures5_UNK28                               0x10000000
+#define chipMinorFeatures5_UNK29                               0x20000000
+#define chipMinorFeatures5_UNK30                               0x40000000
+#define chipMinorFeatures5_UNK31                               0x80000000
 
 #endif /* COMMON_XML */
index 5c89ebb52fd29b1357316fa000d4a61188f4d320..e8858985f01ea0fb5a17e39a3c7b246d6ae562d9 100644 (file)
@@ -668,7 +668,6 @@ static struct platform_driver etnaviv_platform_driver = {
        .probe      = etnaviv_pdev_probe,
        .remove     = etnaviv_pdev_remove,
        .driver     = {
-               .owner  = THIS_MODULE,
                .name   = "etnaviv",
                .of_match_table = dt_match,
        },
index d6bd438bd5bec73eb074aa15626fc8eb3d92d70b..1cd6046e76b11d824a06b24bd77a4cea3d1a1a93 100644 (file)
@@ -85,7 +85,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
        struct dma_buf_attachment *attach, struct sg_table *sg);
 int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
                struct timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
index bf8fa859e8bece4f037e1ef7eae1c6caf0a0c85d..4a29eeadbf1e738da23a6c29a2e00e8f68d81576 100644 (file)
@@ -201,7 +201,9 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                obj = vram->object;
 
+               mutex_lock(&obj->lock);
                pages = etnaviv_gem_get_pages(obj);
+               mutex_unlock(&obj->lock);
                if (pages) {
                        int j;
 
@@ -213,8 +215,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                iter.hdr->iova = cpu_to_le64(vram->iova);
 
-               vaddr = etnaviv_gem_vaddr(&obj->base);
-               if (vaddr && !IS_ERR(vaddr))
+               vaddr = etnaviv_gem_vmap(&obj->base);
+               if (vaddr)
                        memcpy(iter.data, vaddr, obj->base.size);
 
                etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
index 9f77c3b94cc696c4683d18da3e7848fb040874c6..4b519e4309b28edb1a1a9f56c40fb00153c87036 100644 (file)
@@ -353,25 +353,39 @@ void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
        drm_gem_object_unreference_unlocked(obj);
 }
 
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
        struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
-       mutex_lock(&etnaviv_obj->lock);
-       if (!etnaviv_obj->vaddr) {
-               struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
-               if (IS_ERR(pages))
-                       return ERR_CAST(pages);
+       if (etnaviv_obj->vaddr)
+               return etnaviv_obj->vaddr;
 
-               etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
-                               VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-       }
+       mutex_lock(&etnaviv_obj->lock);
+       /*
+        * Need to check again, as we might have raced with another thread
+        * while waiting for the mutex.
+        */
+       if (!etnaviv_obj->vaddr)
+               etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
        mutex_unlock(&etnaviv_obj->lock);
 
        return etnaviv_obj->vaddr;
 }
 
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+       struct page **pages;
+
+       lockdep_assert_held(&obj->lock);
+
+       pages = etnaviv_gem_get_pages(obj);
+       if (IS_ERR(pages))
+               return NULL;
+
+       return vmap(pages, obj->base.size >> PAGE_SHIFT,
+                       VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
 static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 {
        if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@ static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
        .get_pages = etnaviv_gem_shmem_get_pages,
        .release = etnaviv_gem_shmem_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
        .get_pages = etnaviv_gem_userptr_get_pages,
        .release = etnaviv_gem_userptr_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
index a300b4b3d5458654fd1c0f200227976f64529e97..ab5df8147a5f6f1aaccad5c6ce14d99612e0ea27 100644 (file)
@@ -78,6 +78,7 @@ struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
 struct etnaviv_gem_ops {
        int (*get_pages)(struct etnaviv_gem_object *);
        void (*release)(struct etnaviv_gem_object *);
+       void *(*vmap)(struct etnaviv_gem_object *);
 };
 
 static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
index e94db4f95770e586837653b0a1b73d251bbe75d8..4e67395f5fa1c1572aa7051a86538aa16389f807 100644 (file)
@@ -31,7 +31,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
 {
-       return etnaviv_gem_vaddr(obj);
+       return etnaviv_gem_vmap(obj);
 }
 
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
        drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
 
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+       lockdep_assert_held(&etnaviv_obj->lock);
+
+       return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
 static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
        /* .get_pages should never be called */
        .release = etnaviv_gem_prime_release,
+       .vmap = etnaviv_gem_prime_vmap_impl,
 };
 
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
index 056a72e6ed26273146269ee0258784e8a714d36a..a33162cf4f4cc6c4f0aee66433680a15f64cfffc 100644 (file)
@@ -72,6 +72,14 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.minor_features3;
                break;
 
+       case ETNAVIV_PARAM_GPU_FEATURES_5:
+               *value = gpu->identity.minor_features4;
+               break;
+
+       case ETNAVIV_PARAM_GPU_FEATURES_6:
+               *value = gpu->identity.minor_features5;
+               break;
+
        case ETNAVIV_PARAM_GPU_STREAM_COUNT:
                *value = gpu->identity.stream_count;
                break;
@@ -112,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.num_constants;
                break;
 
+       case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+               *value = gpu->identity.varyings_count;
+               break;
+
        default:
                DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
                return -EINVAL;
@@ -120,46 +132,56 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
        return 0;
 }
 
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+       ((gpu)->identity.model == chipModel_##mod && \
+        (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+       (((val) & field##__MASK) >> field##__SHIFT)
+
 static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
        if (gpu->identity.minor_features0 &
            chipMinorFeatures0_MORE_MINOR_FEATURES) {
-               u32 specs[2];
+               u32 specs[4];
+               unsigned int streams;
 
                specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
                specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
-
-               gpu->identity.stream_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
-               gpu->identity.register_max =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
-                               >> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
-               gpu->identity.thread_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
-               gpu->identity.vertex_cache_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
-               gpu->identity.shader_core_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
-               gpu->identity.pixel_pipes =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
-                               >> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+               specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+               specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
+
+               gpu->identity.stream_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+               gpu->identity.register_max = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+               gpu->identity.thread_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+               gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+               gpu->identity.shader_core_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+               gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
                gpu->identity.vertex_output_buffer_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
-
-               gpu->identity.buffer_size =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
-               gpu->identity.instruction_count =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
-               gpu->identity.num_constants =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+                       etnaviv_field(specs[0],
+                               VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
+
+               gpu->identity.buffer_size = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+               gpu->identity.instruction_count = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+               gpu->identity.num_constants = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+               gpu->identity.varyings_count = etnaviv_field(specs[2],
+                                       VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+               /* This overrides the value from older register if non-zero */
+               streams = etnaviv_field(specs[3],
+                                       VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+               if (streams)
+                       gpu->identity.stream_count = streams;
        }
 
        /* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert the register max value */
        if (gpu->identity.register_max)
                gpu->identity.register_max = 1 << gpu->identity.register_max;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.register_max = 32;
        else
                gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert thread count */
        if (gpu->identity.thread_count)
                gpu->identity.thread_count = 1 << gpu->identity.thread_count;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.thread_count = 64;
-       else if (gpu->identity.model == 0x0500 ||
-                gpu->identity.model == 0x0530)
+       else if (gpu->identity.model == chipModel_GC500 ||
+                gpu->identity.model == chipModel_GC530)
                gpu->identity.thread_count = 128;
        else
                gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        if (gpu->identity.vertex_output_buffer_size) {
                gpu->identity.vertex_output_buffer_size =
                        1 << gpu->identity.vertex_output_buffer_size;
-       } else if (gpu->identity.model == 0x0400) {
+       } else if (gpu->identity.model == chipModel_GC400) {
                if (gpu->identity.revision < 0x4000)
                        gpu->identity.vertex_output_buffer_size = 512;
                else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        switch (gpu->identity.instruction_count) {
        case 0:
-               if ((gpu->identity.model == 0x2000 &&
-                    gpu->identity.revision == 0x5108) ||
-                   gpu->identity.model == 0x880)
+               if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+                   gpu->identity.model == chipModel_GC880)
                        gpu->identity.instruction_count = 512;
                else
                        gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.num_constants == 0)
                gpu->identity.num_constants = 168;
+
+       if (gpu->identity.varyings_count == 0) {
+               if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+                       gpu->identity.varyings_count = 12;
+               else
+                       gpu->identity.varyings_count = 8;
+       }
+
+       /*
+        * For some cores, two varyings are consumed for position, so the
+        * maximum varying count needs to be reduced by one.
+        */
+       if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+           etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+           etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+           etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5106))
+               gpu->identity.varyings_count -= 1;
 }
 
 static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
 
        /* Special case for older graphic cores. */
-       if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
-            >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
-               gpu->identity.model    = 0x500; /* gc500 */
-               gpu->identity.revision =
-                       (chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
-                       >> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+       if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+               gpu->identity.model    = chipModel_GC500;
+               gpu->identity.revision = etnaviv_field(chipIdentity,
+                                        VIVS_HI_CHIP_IDENTITY_REVISION);
        } else {
 
                gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                 * same.  Only for GC400 family.
                 */
                if ((gpu->identity.model & 0xff00) == 0x0400 &&
-                   gpu->identity.model != 0x0420) {
+                   gpu->identity.model != chipModel_GC420) {
                        gpu->identity.model = gpu->identity.model & 0x0400;
                }
 
                /* Another special case */
-               if (gpu->identity.model == 0x300 &&
-                   gpu->identity.revision == 0x2201) {
+               if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
                        u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
                        u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
 
@@ -295,11 +337,13 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
 
        /* Disable fast clear on GC700. */
-       if (gpu->identity.model == 0x700)
+       if (gpu->identity.model == chipModel_GC700)
                gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
-       if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
-           (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+       if ((gpu->identity.model == chipModel_GC500 &&
+            gpu->identity.revision < 2) ||
+           (gpu->identity.model == chipModel_GC300 &&
+            gpu->identity.revision < 0x2000)) {
 
                /*
                 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                gpu->identity.minor_features1 = 0;
                gpu->identity.minor_features2 = 0;
                gpu->identity.minor_features3 = 0;
+               gpu->identity.minor_features4 = 0;
+               gpu->identity.minor_features5 = 0;
        } else
                gpu->identity.minor_features0 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
                gpu->identity.minor_features3 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+               gpu->identity.minor_features4 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+               gpu->identity.minor_features5 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
        }
 
        /* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
        u16 prefetch;
 
-       if (gpu->identity.model == chipModel_GC320 &&
-           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
-           (gpu->identity.revision == 0x5007 ||
-            gpu->identity.revision == 0x5220)) {
+       if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+            etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
                u32 mc_memory_debug;
 
                mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
                  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
        /* GC2000 rev 5108 needs a special bus config */
-       if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+       if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
                u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
                bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
                                VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.model == 0) {
                dev_err(gpu->dev, "Unknown GPU model\n");
-               pm_runtime_put_autosuspend(gpu->dev);
-               return -ENXIO;
+               ret = -ENXIO;
+               goto fail;
+       }
+
+       /* Exclude VG cores with FE2.0 */
+       if (gpu->identity.features & chipFeatures_PIPE_VG &&
+           gpu->identity.features & chipFeatures_FE20) {
+               dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+               ret = -ENXIO;
+               goto fail;
        }
 
        ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
                goto fail;
        }
 
-       /* TODO: we will leak here memory - fix it! */
-
        gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
        if (!gpu->mmu) {
+               iommu_domain_free(iommu);
                ret = -ENOMEM;
                goto fail;
        }
@@ -552,7 +608,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
        if (!gpu->buffer) {
                ret = -ENOMEM;
                dev_err(gpu->dev, "could not create command buffer\n");
-               goto fail;
+               goto destroy_iommu;
        }
        if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
                ret = -EINVAL;
@@ -582,6 +638,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 free_buffer:
        etnaviv_gpu_cmdbuf_free(gpu->buffer);
        gpu->buffer = NULL;
+destroy_iommu:
+       etnaviv_iommu_destroy(gpu->mmu);
+       gpu->mmu = NULL;
 fail:
        pm_runtime_mark_last_busy(gpu->dev);
        pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                   gpu->identity.minor_features2);
        seq_printf(m, "\t minor_features3: 0x%08x\n",
                   gpu->identity.minor_features3);
+       seq_printf(m, "\t minor_features4: 0x%08x\n",
+                  gpu->identity.minor_features4);
+       seq_printf(m, "\t minor_features5: 0x%08x\n",
+                  gpu->identity.minor_features5);
 
        seq_puts(m, "\tspecs\n");
        seq_printf(m, "\t stream_count:  %d\n",
@@ -664,6 +727,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                        gpu->identity.instruction_count);
        seq_printf(m, "\t num_constants: %d\n",
                        gpu->identity.num_constants);
+       seq_printf(m, "\t varyings_count: %d\n",
+                       gpu->identity.varyings_count);
 
        seq_printf(m, "\taxi: 0x%08x\n", axi);
        seq_printf(m, "\tidle: 0x%08x\n", idle);
index c75d50359ab07baa40d7813fe07cd9928009df91..f233ac4c7c1cea456ee4c4c4b7e0d0878aefc093 100644 (file)
@@ -46,6 +46,12 @@ struct etnaviv_chip_identity {
        /* Supported minor feature 3 fields. */
        u32 minor_features3;
 
+       /* Supported minor feature 4 fields. */
+       u32 minor_features4;
+
+       /* Supported minor feature 5 fields. */
+       u32 minor_features5;
+
        /* Number of streams supported. */
        u32 stream_count;
 
@@ -75,6 +81,9 @@ struct etnaviv_chip_identity {
 
        /* Buffer size */
        u32 buffer_size;
+
+       /* Number of varyings */
+       u8 varyings_count;
 };
 
 struct etnaviv_event {
index 0064f2640396111bce54f3ae0caa9f45e94717d0..6a7de5f1454a51532c0809e2e741c53db4fba4e9 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18437 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -182,8 +182,25 @@ Copyright (C) 2015
 
 #define VIVS_HI_CHIP_MINOR_FEATURE_3                           0x00000088
 
+#define VIVS_HI_CHIP_SPECS_3                                   0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK              0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT             4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK              0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT             0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
 #define VIVS_HI_CHIP_MINOR_FEATURE_4                           0x00000094
 
+#define VIVS_HI_CHIP_SPECS_4                                   0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK                        0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT               12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x)                   (((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5                           0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID                                        0x000000a8
+
 #define VIVS_PM                                                        0x00000000
 
 #define VIVS_PM_POWER_CONTROLS                                 0x00000100
@@ -206,6 +223,11 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE            0x00000001
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE            0x00000002
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE            0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH            0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA            0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE            0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA            0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX            0x00000080
 
 #define VIVS_PM_PULSE_EATER                                    0x0000010c
 
index 83efca941388a73874a8da10412c77d9bc3801e4..f17d3927959604301e9e53c7614fcb1e4982e80c 100644 (file)
@@ -1,6 +1,6 @@
 config DRM_EXYNOS
        tristate "DRM Support for Samsung SoC EXYNOS Series"
-       depends on OF && DRM && (PLAT_SAMSUNG || ARCH_MULTIPLATFORM)
+       depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
        select DRM_KMS_HELPER
        select DRM_KMS_FB_HELPER
        select FB_CFB_FILLRECT
index 1bf6a21130c7cbde6088dbb70e2a2b650f244b06..162ab93e99cb519e20963066c2285d50b6f08b88 100644 (file)
@@ -93,7 +93,7 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc)
        if (test_bit(BIT_SUSPENDED, &ctx->flags))
                return -EPERM;
 
-       if (test_and_set_bit(BIT_IRQS_ENABLED, &ctx->flags)) {
+       if (!test_and_set_bit(BIT_IRQS_ENABLED, &ctx->flags)) {
                val = VIDINTCON0_INTEN;
                if (ctx->out_type == IFTYPE_I80)
                        val |= VIDINTCON0_FRAMEDONE;
@@ -402,8 +402,6 @@ static void decon_enable(struct exynos_drm_crtc *crtc)
                decon_enable_vblank(ctx->crtc);
 
        decon_commit(ctx->crtc);
-
-       set_bit(BIT_SUSPENDED, &ctx->flags);
 }
 
 static void decon_disable(struct exynos_drm_crtc *crtc)
@@ -582,9 +580,9 @@ out:
 static int exynos5433_decon_suspend(struct device *dev)
 {
        struct decon_context *ctx = dev_get_drvdata(dev);
-       int i;
+       int i = ARRAY_SIZE(decon_clks_name);
 
-       for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++)
+       while (--i >= 0)
                clk_disable_unprepare(ctx->clks[i]);
 
        return 0;
index b79c316c2ad2ce7b7eb48e152a49c41ab6e93431..673164b331c8806e14aaa10d82e2f787c10bfb09 100644 (file)
@@ -1392,7 +1392,7 @@ static const struct component_ops exynos_dp_ops = {
 static int exynos_dp_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
-       struct device_node *panel_node = NULL, *bridge_node, *endpoint = NULL;
+       struct device_node *np = NULL, *endpoint = NULL;
        struct exynos_dp_device *dp;
        int ret;
 
@@ -1404,41 +1404,36 @@ static int exynos_dp_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dp);
 
        /* This is for the backward compatibility. */
-       panel_node = of_parse_phandle(dev->of_node, "panel", 0);
-       if (panel_node) {
-               dp->panel = of_drm_find_panel(panel_node);
-               of_node_put(panel_node);
+       np = of_parse_phandle(dev->of_node, "panel", 0);
+       if (np) {
+               dp->panel = of_drm_find_panel(np);
+               of_node_put(np);
                if (!dp->panel)
                        return -EPROBE_DEFER;
-       } else {
-               endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-               if (endpoint) {
-                       panel_node = of_graph_get_remote_port_parent(endpoint);
-                       if (panel_node) {
-                               dp->panel = of_drm_find_panel(panel_node);
-                               of_node_put(panel_node);
-                               if (!dp->panel)
-                                       return -EPROBE_DEFER;
-                       } else {
-                               DRM_ERROR("no port node for panel device.\n");
-                               return -EINVAL;
-                       }
-               }
-       }
-
-       if (endpoint)
                goto out;
+       }
 
        endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
        if (endpoint) {
-               bridge_node = of_graph_get_remote_port_parent(endpoint);
-               if (bridge_node) {
-                       dp->ptn_bridge = of_drm_find_bridge(bridge_node);
-                       of_node_put(bridge_node);
-                       if (!dp->ptn_bridge)
-                               return -EPROBE_DEFER;
-               } else
-                       return -EPROBE_DEFER;
+               np = of_graph_get_remote_port_parent(endpoint);
+               if (np) {
+                       /* The remote port can be either a panel or a bridge */
+                       dp->panel = of_drm_find_panel(np);
+                       if (!dp->panel) {
+                               dp->ptn_bridge = of_drm_find_bridge(np);
+                               if (!dp->ptn_bridge) {
+                                       of_node_put(np);
+                                       return -EPROBE_DEFER;
+                               }
+                       }
+                       of_node_put(np);
+               } else {
+                       DRM_ERROR("no remote endpoint device node found.\n");
+                       return -EINVAL;
+               }
+       } else {
+               DRM_ERROR("no port endpoint subnode found.\n");
+               return -EINVAL;
        }
 
 out:
index d84a498ef099712c56f319c8333c8c8564ef457a..26e81d191f56eaa8b63f9755f9542dac034c9096 100644 (file)
@@ -1782,6 +1782,7 @@ static int exynos_dsi_bind(struct device *dev, struct device *master,
 
        bridge = of_drm_find_bridge(dsi->bridge_node);
        if (bridge) {
+               encoder->bridge = bridge;
                drm_bridge_attach(drm_dev, bridge);
        }
 
@@ -1906,8 +1907,7 @@ static int exynos_dsi_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int exynos_dsi_suspend(struct device *dev)
+static int __maybe_unused exynos_dsi_suspend(struct device *dev)
 {
        struct drm_encoder *encoder = dev_get_drvdata(dev);
        struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1938,7 +1938,7 @@ static int exynos_dsi_suspend(struct device *dev)
        return 0;
 }
 
-static int exynos_dsi_resume(struct device *dev)
+static int __maybe_unused exynos_dsi_resume(struct device *dev)
 {
        struct drm_encoder *encoder = dev_get_drvdata(dev);
        struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1972,7 +1972,6 @@ err_clk:
 
        return ret;
 }
-#endif
 
 static const struct dev_pm_ops exynos_dsi_pm_ops = {
        SET_RUNTIME_PM_OPS(exynos_dsi_suspend, exynos_dsi_resume, NULL)
index f6118baa8e3efad8732f8297ab6f6529d763171e..8baabd813ff55a1d713b0bdbf11386c135878d9a 100644 (file)
@@ -50,7 +50,7 @@ static int exynos_drm_fb_mmap(struct fb_info *info,
        if (vm_size > exynos_gem->size)
                return -EINVAL;
 
-       ret = dma_mmap_attrs(helper->dev->dev, vma, exynos_gem->pages,
+       ret = dma_mmap_attrs(helper->dev->dev, vma, exynos_gem->cookie,
                             exynos_gem->dma_addr, exynos_gem->size,
                             &exynos_gem->dma_attrs);
        if (ret < 0) {
index c747824f3c98551bf2883a7659249a840359de62..8a4f4a0211d0dd57001f2605a4815dae06686b9f 100644 (file)
@@ -1723,7 +1723,7 @@ static int fimc_probe(struct platform_device *pdev)
                goto err_put_clk;
        }
 
-       DRM_DEBUG_KMS("id[%d]ippdrv[0x%x]\n", ctx->id, (int)ippdrv);
+       DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
 
        spin_lock_init(&ctx->lock);
        platform_set_drvdata(pdev, ctx);
index c17efdb238a6e24f6fcecac58c395b8964b12703..8dfe6e113a883b315419755b75c5b01159c60373 100644 (file)
@@ -1166,7 +1166,7 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
                goto err_free_event;
        }
 
-       cmd = (struct drm_exynos_g2d_cmd *)(uint32_t)req->cmd;
+       cmd = (struct drm_exynos_g2d_cmd *)(unsigned long)req->cmd;
 
        if (copy_from_user(cmdlist->data + cmdlist->last,
                                (void __user *)cmd,
@@ -1184,7 +1184,8 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
        if (req->cmd_buf_nr) {
                struct drm_exynos_g2d_cmd *cmd_buf;
 
-               cmd_buf = (struct drm_exynos_g2d_cmd *)(uint32_t)req->cmd_buf;
+               cmd_buf = (struct drm_exynos_g2d_cmd *)
+                               (unsigned long)req->cmd_buf;
 
                if (copy_from_user(cmdlist->data + cmdlist->last,
                                        (void __user *)cmd_buf,
index 32358c5e3db4be25e7127225fa86e343b97757c7..26b5e4bd55b6afd8f1cb042da863e90a5ee6ef8c 100644 (file)
@@ -218,7 +218,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev,
                return ERR_PTR(ret);
        }
 
-       DRM_DEBUG_KMS("created file object = 0x%x\n", (unsigned int)obj->filp);
+       DRM_DEBUG_KMS("created file object = %p\n", obj->filp);
 
        return exynos_gem;
 }
@@ -335,7 +335,7 @@ static int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem *exynos_gem,
        if (vm_size > exynos_gem->size)
                return -EINVAL;
 
-       ret = dma_mmap_attrs(drm_dev->dev, vma, exynos_gem->pages,
+       ret = dma_mmap_attrs(drm_dev->dev, vma, exynos_gem->cookie,
                             exynos_gem->dma_addr, exynos_gem->size,
                             &exynos_gem->dma_attrs);
        if (ret < 0) {
index 7aecd23cfa11a6638d783366b890fd33a9c0ea2e..5d20da8f957e2eac724331bb866f7c1b96244087 100644 (file)
@@ -1723,7 +1723,7 @@ static int gsc_probe(struct platform_device *pdev)
                return ret;
        }
 
-       DRM_DEBUG_KMS("id[%d]ippdrv[0x%x]\n", ctx->id, (int)ippdrv);
+       DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
 
        mutex_init(&ctx->lock);
        platform_set_drvdata(pdev, ctx);
index 67d24236e745c4dd1cc014ba0c5386784d3441f8..95eeb9116f102a931313eaea212e1e0aa4f89a18 100644 (file)
@@ -208,7 +208,7 @@ static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id)
         * e.g PAUSE state, queue buf, command control.
         */
        list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-               DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]\n", count++, (int)ippdrv);
+               DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n", count++, ippdrv);
 
                mutex_lock(&ippdrv->cmd_lock);
                list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
@@ -388,8 +388,8 @@ int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
        }
        property->prop_id = ret;
 
-       DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[0x%x]\n",
-               property->prop_id, property->cmd, (int)ippdrv);
+       DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%p]\n",
+               property->prop_id, property->cmd, ippdrv);
 
        /* stored property information and ippdrv in private data */
        c_node->property = *property;
@@ -518,7 +518,7 @@ static int ipp_put_mem_node(struct drm_device *drm_dev,
 {
        int i;
 
-       DRM_DEBUG_KMS("node[0x%x]\n", (int)m_node);
+       DRM_DEBUG_KMS("node[%p]\n", m_node);
 
        if (!m_node) {
                DRM_ERROR("invalid dequeue node.\n");
@@ -562,7 +562,7 @@ static struct drm_exynos_ipp_mem_node
        m_node->buf_id = qbuf->buf_id;
        INIT_LIST_HEAD(&m_node->list);
 
-       DRM_DEBUG_KMS("m_node[0x%x]ops_id[%d]\n", (int)m_node, qbuf->ops_id);
+       DRM_DEBUG_KMS("m_node[%p]ops_id[%d]\n", m_node, qbuf->ops_id);
        DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id);
 
        for_each_ipp_planar(i) {
@@ -582,8 +582,8 @@ static struct drm_exynos_ipp_mem_node
 
                        buf_info->handles[i] = qbuf->handle[i];
                        buf_info->base[i] = *addr;
-                       DRM_DEBUG_KMS("i[%d]base[0x%x]hd[0x%lx]\n", i,
-                                     buf_info->base[i], buf_info->handles[i]);
+                       DRM_DEBUG_KMS("i[%d]base[%pad]hd[0x%lx]\n", i,
+                                     &buf_info->base[i], buf_info->handles[i]);
                }
        }
 
@@ -664,7 +664,7 @@ static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
 
        mutex_lock(&c_node->event_lock);
        list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
-               DRM_DEBUG_KMS("count[%d]e[0x%x]\n", count++, (int)e);
+               DRM_DEBUG_KMS("count[%d]e[%p]\n", count++, e);
 
                /*
                 * qbuf == NULL condition means all event deletion.
@@ -755,7 +755,7 @@ static struct drm_exynos_ipp_mem_node
 
        /* find memory node from memory list */
        list_for_each_entry(m_node, head, list) {
-               DRM_DEBUG_KMS("count[%d]m_node[0x%x]\n", count++, (int)m_node);
+               DRM_DEBUG_KMS("count[%d]m_node[%p]\n", count++, m_node);
 
                /* compare buffer id */
                if (m_node->buf_id == qbuf->buf_id)
@@ -772,7 +772,7 @@ static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv,
        struct exynos_drm_ipp_ops *ops = NULL;
        int ret = 0;
 
-       DRM_DEBUG_KMS("node[0x%x]\n", (int)m_node);
+       DRM_DEBUG_KMS("node[%p]\n", m_node);
 
        if (!m_node) {
                DRM_ERROR("invalid queue node.\n");
@@ -1237,7 +1237,7 @@ static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv,
                        m_node = list_first_entry(head,
                                struct drm_exynos_ipp_mem_node, list);
 
-                       DRM_DEBUG_KMS("m_node[0x%x]\n", (int)m_node);
+                       DRM_DEBUG_KMS("m_node[%p]\n", m_node);
 
                        ret = ipp_set_mem_node(ippdrv, c_node, m_node);
                        if (ret) {
@@ -1610,8 +1610,8 @@ static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
                }
                ippdrv->prop_list.ipp_id = ret;
 
-               DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]ipp_id[%d]\n",
-                       count++, (int)ippdrv, ret);
+               DRM_DEBUG_KMS("count[%d]ippdrv[%p]ipp_id[%d]\n",
+                       count++, ippdrv, ret);
 
                /* store parent device for node */
                ippdrv->parent_dev = dev;
@@ -1668,7 +1668,7 @@ static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev,
 
        file_priv->ipp_dev = dev;
 
-       DRM_DEBUG_KMS("done priv[0x%x]\n", (int)dev);
+       DRM_DEBUG_KMS("done priv[%p]\n", dev);
 
        return 0;
 }
@@ -1685,8 +1685,8 @@ static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev,
                mutex_lock(&ippdrv->cmd_lock);
                list_for_each_entry_safe(c_node, tc_node,
                        &ippdrv->cmd_list, list) {
-                       DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]\n",
-                               count++, (int)ippdrv);
+                       DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n",
+                               count++, ippdrv);
 
                        if (c_node->filp == file) {
                                /*
index 4eaef36aec5a42d9d1ccd70ff4f15f39d707648c..9869d70e9e54af32abb775dc0e17f0c1da83999b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_graph.h>
 #include <linux/clk.h>
+#include <linux/component.h>
 #include <drm/drmP.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
@@ -306,9 +307,9 @@ exit:
        return ret;
 }
 
-void mic_disable(struct drm_bridge *bridge) { }
+static void mic_disable(struct drm_bridge *bridge) { }
 
-void mic_post_disable(struct drm_bridge *bridge)
+static void mic_post_disable(struct drm_bridge *bridge)
 {
        struct exynos_mic *mic = bridge->driver_private;
        int i;
@@ -328,7 +329,7 @@ already_disabled:
        mutex_unlock(&mic_mutex);
 }
 
-void mic_pre_enable(struct drm_bridge *bridge)
+static void mic_pre_enable(struct drm_bridge *bridge)
 {
        struct exynos_mic *mic = bridge->driver_private;
        int ret, i;
@@ -371,11 +372,35 @@ already_enabled:
        mutex_unlock(&mic_mutex);
 }
 
-void mic_enable(struct drm_bridge *bridge) { }
+static void mic_enable(struct drm_bridge *bridge) { }
 
-void mic_destroy(struct drm_bridge *bridge)
+static const struct drm_bridge_funcs mic_bridge_funcs = {
+       .disable = mic_disable,
+       .post_disable = mic_post_disable,
+       .pre_enable = mic_pre_enable,
+       .enable = mic_enable,
+};
+
+static int exynos_mic_bind(struct device *dev, struct device *master,
+                          void *data)
 {
-       struct exynos_mic *mic = bridge->driver_private;
+       struct exynos_mic *mic = dev_get_drvdata(dev);
+       int ret;
+
+       mic->bridge.funcs = &mic_bridge_funcs;
+       mic->bridge.of_node = dev->of_node;
+       mic->bridge.driver_private = mic;
+       ret = drm_bridge_add(&mic->bridge);
+       if (ret)
+               DRM_ERROR("mic: Failed to add MIC to the global bridge list\n");
+
+       return ret;
+}
+
+static void exynos_mic_unbind(struct device *dev, struct device *master,
+                             void *data)
+{
+       struct exynos_mic *mic = dev_get_drvdata(dev);
        int i;
 
        mutex_lock(&mic_mutex);
@@ -387,16 +412,16 @@ void mic_destroy(struct drm_bridge *bridge)
 
 already_disabled:
        mutex_unlock(&mic_mutex);
+
+       drm_bridge_remove(&mic->bridge);
 }
 
-static const struct drm_bridge_funcs mic_bridge_funcs = {
-       .disable = mic_disable,
-       .post_disable = mic_post_disable,
-       .pre_enable = mic_pre_enable,
-       .enable = mic_enable,
+static const struct component_ops exynos_mic_component_ops = {
+       .bind   = exynos_mic_bind,
+       .unbind = exynos_mic_unbind,
 };
 
-int exynos_mic_probe(struct platform_device *pdev)
+static int exynos_mic_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct exynos_mic *mic;
@@ -435,17 +460,8 @@ int exynos_mic_probe(struct platform_device *pdev)
                goto err;
        }
 
-       mic->bridge.funcs = &mic_bridge_funcs;
-       mic->bridge.of_node = dev->of_node;
-       mic->bridge.driver_private = mic;
-       ret = drm_bridge_add(&mic->bridge);
-       if (ret) {
-               DRM_ERROR("mic: Failed to add MIC to the global bridge list\n");
-               goto err;
-       }
-
        for (i = 0; i < NUM_CLKS; i++) {
-               mic->clks[i] = of_clk_get_by_name(dev->of_node, clk_names[i]);
+               mic->clks[i] = devm_clk_get(dev, clk_names[i]);
                if (IS_ERR(mic->clks[i])) {
                        DRM_ERROR("mic: Failed to get clock (%s)\n",
                                                                clk_names[i]);
@@ -454,7 +470,10 @@ int exynos_mic_probe(struct platform_device *pdev)
                }
        }
 
+       platform_set_drvdata(pdev, mic);
+
        DRM_DEBUG_KMS("MIC has been probed\n");
+       return component_add(dev, &exynos_mic_component_ops);
 
 err:
        return ret;
@@ -462,14 +481,7 @@ err:
 
 static int exynos_mic_remove(struct platform_device *pdev)
 {
-       struct exynos_mic *mic = platform_get_drvdata(pdev);
-       int i;
-
-       drm_bridge_remove(&mic->bridge);
-
-       for (i = NUM_CLKS - 1; i > -1; i--)
-               clk_put(mic->clks[i]);
-
+       component_del(&pdev->dev, &exynos_mic_component_ops);
        return 0;
 }
 
index bea0f7826d30a168022c79598c9cea6f4641afcd..ce59f4443394f5394c85956e2e261dc62eaedf43 100644 (file)
@@ -754,7 +754,7 @@ static int rotator_probe(struct platform_device *pdev)
                goto err_ippdrv_register;
        }
 
-       DRM_DEBUG_KMS("ippdrv[0x%x]\n", (int)ippdrv);
+       DRM_DEBUG_KMS("ippdrv[%p]\n", ippdrv);
 
        platform_set_drvdata(pdev, rot);
 
index 62ac4e5fa51dbb00cda1f02d50f630b48b6d44bc..b605bd7395eccf2e3a6fb8264c37bd31774b1634 100644 (file)
@@ -223,7 +223,7 @@ static void vidi_fake_vblank_handler(struct work_struct *work)
        }
 }
 
-static int vidi_show_connection(struct device *dev,
+static ssize_t vidi_show_connection(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
        struct vidi_context *ctx = dev_get_drvdata(dev);
@@ -238,7 +238,7 @@ static int vidi_show_connection(struct device *dev,
        return rc;
 }
 
-static int vidi_store_connection(struct device *dev,
+static ssize_t vidi_store_connection(struct device *dev,
                                struct device_attribute *attr,
                                const char *buf, size_t len)
 {
@@ -294,7 +294,9 @@ int vidi_connection_ioctl(struct drm_device *drm_dev, void *data,
        }
 
        if (vidi->connection) {
-               struct edid *raw_edid  = (struct edid *)(uint32_t)vidi->edid;
+               struct edid *raw_edid;
+
+               raw_edid = (struct edid *)(unsigned long)vidi->edid;
                if (!drm_edid_is_valid(raw_edid)) {
                        DRM_DEBUG_KMS("edid data is invalid.\n");
                        return -EINVAL;
index b5fbc1cbf02454d0e8c76de88f91f2c63e343e2c..0a5a60005f7e5711e14147dfa21990be1b88a2b8 100644 (file)
@@ -1289,8 +1289,7 @@ static int mixer_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int exynos_mixer_suspend(struct device *dev)
+static int __maybe_unused exynos_mixer_suspend(struct device *dev)
 {
        struct mixer_context *ctx = dev_get_drvdata(dev);
        struct mixer_resources *res = &ctx->mixer_res;
@@ -1306,7 +1305,7 @@ static int exynos_mixer_suspend(struct device *dev)
        return 0;
 }
 
-static int exynos_mixer_resume(struct device *dev)
+static int __maybe_unused exynos_mixer_resume(struct device *dev)
 {
        struct mixer_context *ctx = dev_get_drvdata(dev);
        struct mixer_resources *res = &ctx->mixer_res;
@@ -1342,7 +1341,6 @@ static int exynos_mixer_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
 static const struct dev_pm_ops exynos_mixer_pm_ops = {
        SET_RUNTIME_PM_OPS(exynos_mixer_suspend, exynos_mixer_resume, NULL)
index 533d1e3d4a999f971b2479f471c965e3708a73f2..a02112ba1c3df692b3a089a3ab24a8f30d35b7ec 100644 (file)
@@ -136,6 +136,7 @@ static bool adv7511_register_volatile(struct device *dev, unsigned int reg)
        case ADV7511_REG_BKSV(3):
        case ADV7511_REG_BKSV(4):
        case ADV7511_REG_DDC_STATUS:
+       case ADV7511_REG_EDID_READ_CTRL:
        case ADV7511_REG_BSTATUS(0):
        case ADV7511_REG_BSTATUS(1):
        case ADV7511_REG_CHIP_ID_HIGH:
@@ -362,24 +363,31 @@ static void adv7511_power_on(struct adv7511 *adv7511)
 {
        adv7511->current_edid_segment = -1;
 
-       regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                    ADV7511_INT0_EDID_READY);
-       regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-                    ADV7511_INT1_DDC_ERROR);
        regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
                           ADV7511_POWER_POWER_DOWN, 0);
+       if (adv7511->i2c_main->irq) {
+               /*
+                * Documentation says the INT_ENABLE registers are reset in
+                * POWER_DOWN mode. My 7511w preserved the bits, however.
+                * Still, let's be safe and stick to the documentation.
+                */
+               regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+                            ADV7511_INT0_EDID_READY);
+               regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+                            ADV7511_INT1_DDC_ERROR);
+       }
 
        /*
-        * Per spec it is allowed to pulse the HDP signal to indicate that the
+        * Per spec it is allowed to pulse the HPD signal to indicate that the
         * EDID information has changed. Some monitors do this when they wakeup
-        * from standby or are enabled. When the HDP goes low the adv7511 is
+        * from standby or are enabled. When the HPD goes low the adv7511 is
         * reset and the outputs are disabled which might cause the monitor to
-        * go to standby again. To avoid this we ignore the HDP pin for the
+        * go to standby again. To avoid this we ignore the HPD pin for the
         * first few seconds after enabling the output.
         */
        regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-                          ADV7511_REG_POWER2_HDP_SRC_MASK,
-                          ADV7511_REG_POWER2_HDP_SRC_NONE);
+                          ADV7511_REG_POWER2_HPD_SRC_MASK,
+                          ADV7511_REG_POWER2_HPD_SRC_NONE);
 
        /*
         * Most of the registers are reset during power down or when HPD is low.
@@ -413,9 +421,9 @@ static bool adv7511_hpd(struct adv7511 *adv7511)
        if (ret < 0)
                return false;
 
-       if (irq0 & ADV7511_INT0_HDP) {
+       if (irq0 & ADV7511_INT0_HPD) {
                regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                            ADV7511_INT0_HDP);
+                            ADV7511_INT0_HPD);
                return true;
        }
 
@@ -438,7 +446,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
        regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
        regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-       if (irq0 & ADV7511_INT0_HDP && adv7511->encoder)
+       if (irq0 & ADV7511_INT0_HPD && adv7511->encoder)
                drm_helper_hpd_irq_event(adv7511->encoder->dev);
 
        if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
@@ -567,12 +575,14 @@ static int adv7511_get_modes(struct drm_encoder *encoder,
 
        /* Reading the EDID only works if the device is powered */
        if (!adv7511->powered) {
-               regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                            ADV7511_INT0_EDID_READY);
-               regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-                            ADV7511_INT1_DDC_ERROR);
                regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
                                   ADV7511_POWER_POWER_DOWN, 0);
+               if (adv7511->i2c_main->irq) {
+                       regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+                                    ADV7511_INT0_EDID_READY);
+                       regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+                                    ADV7511_INT1_DDC_ERROR);
+               }
                adv7511->current_edid_segment = -1;
        }
 
@@ -638,10 +648,10 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
                if (adv7511->status == connector_status_connected)
                        status = connector_status_disconnected;
        } else {
-               /* Renable HDP sensing */
+               /* Renable HPD sensing */
                regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-                                  ADV7511_REG_POWER2_HDP_SRC_MASK,
-                                  ADV7511_REG_POWER2_HDP_SRC_BOTH);
+                                  ADV7511_REG_POWER2_HPD_SRC_MASK,
+                                  ADV7511_REG_POWER2_HPD_SRC_BOTH);
        }
 
        adv7511->status = status;
index 6599ed538426d60f41a9dbebe3e9bcd8e8b11acc..38515b30cedfc84812a556638fe7333ff6ebeb8b 100644 (file)
@@ -90,7 +90,7 @@
 #define ADV7511_CSC_ENABLE                     BIT(7)
 #define ADV7511_CSC_UPDATE_MODE                        BIT(5)
 
-#define ADV7511_INT0_HDP                       BIT(7)
+#define ADV7511_INT0_HPD                       BIT(7)
 #define ADV7511_INT0_VSYNC                     BIT(5)
 #define ADV7511_INT0_AUDIO_FIFO_FULL           BIT(4)
 #define ADV7511_INT0_EDID_READY                        BIT(2)
 #define ADV7511_PACKET_ENABLE_SPARE2           BIT(1)
 #define ADV7511_PACKET_ENABLE_SPARE1           BIT(0)
 
-#define ADV7511_REG_POWER2_HDP_SRC_MASK                0xc0
-#define ADV7511_REG_POWER2_HDP_SRC_BOTH                0x00
-#define ADV7511_REG_POWER2_HDP_SRC_HDP         0x40
-#define ADV7511_REG_POWER2_HDP_SRC_CEC         0x80
-#define ADV7511_REG_POWER2_HDP_SRC_NONE                0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_MASK                0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_BOTH                0x00
+#define ADV7511_REG_POWER2_HPD_SRC_HPD         0x40
+#define ADV7511_REG_POWER2_HPD_SRC_CEC         0x80
+#define ADV7511_REG_POWER2_HPD_SRC_NONE                0xc0
 #define ADV7511_REG_POWER2_TDMS_ENABLE         BIT(4)
 #define ADV7511_REG_POWER2_GATE_INPUT_CLK      BIT(0)
 
index fcd77b27514dfdb738334024c2b1201732af6dc3..051eab33e4c7b13260994ebb884cc44a5394c4bc 100644 (file)
@@ -10,7 +10,6 @@ config DRM_I915
        # the shmem_readpage() which depends upon tmpfs
        select SHMEM
        select TMPFS
-       select STOP_MACHINE
        select DRM_KMS_HELPER
        select DRM_PANEL
        select DRM_MIPI_DSI
index 0fc38bb7276c26920b372ca52c053eba504aa722..cf39ed3133d63aaa434120dc085bd6ea0811b2ee 100644 (file)
@@ -825,8 +825,11 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
                }
 
                for_each_pipe(dev_priv, pipe) {
-                       if (!intel_display_power_is_enabled(dev_priv,
-                                               POWER_DOMAIN_PIPE(pipe))) {
+                       enum intel_display_power_domain power_domain;
+
+                       power_domain = POWER_DOMAIN_PIPE(pipe);
+                       if (!intel_display_power_get_if_enabled(dev_priv,
+                                                               power_domain)) {
                                seq_printf(m, "Pipe %c power disabled\n",
                                           pipe_name(pipe));
                                continue;
@@ -840,6 +843,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
                        seq_printf(m, "Pipe %c IER:\t%08x\n",
                                   pipe_name(pipe),
                                   I915_READ(GEN8_DE_PIPE_IER(pipe)));
+
+                       intel_display_power_put(dev_priv, power_domain);
                }
 
                seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
@@ -3985,6 +3990,7 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
        struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
        struct intel_crtc *crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev,
                                                                        pipe));
+       enum intel_display_power_domain power_domain;
        u32 val = 0; /* shut up gcc */
        int ret;
 
@@ -3995,7 +4001,8 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
        if (pipe_crc->source && source)
                return -EINVAL;
 
-       if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) {
+       power_domain = POWER_DOMAIN_PIPE(pipe);
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
                DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
                return -EIO;
        }
@@ -4012,7 +4019,7 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
                ret = ivb_pipe_crc_ctl_reg(dev, pipe, &source, &val);
 
        if (ret != 0)
-               return ret;
+               goto out;
 
        /* none -> real source transition */
        if (source) {
@@ -4024,8 +4031,10 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
                entries = kcalloc(INTEL_PIPE_CRC_ENTRIES_NR,
                                  sizeof(pipe_crc->entries[0]),
                                  GFP_KERNEL);
-               if (!entries)
-                       return -ENOMEM;
+               if (!entries) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
 
                /*
                 * When IPS gets enabled, the pipe CRC changes. Since IPS gets
@@ -4081,7 +4090,12 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
                hsw_enable_ips(crtc);
        }
 
-       return 0;
+       ret = 0;
+
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 /*
index 3ac616d7363bafcc4197aa126c509932d15fb299..f357058c74d938a41f922c4b40e3da1d67aa029a 100644 (file)
@@ -501,7 +501,9 @@ void intel_detect_pch(struct drm_device *dev)
                                WARN_ON(!IS_SKYLAKE(dev) &&
                                        !IS_KABYLAKE(dev));
                        } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
-                                  (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) {
+                                  ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
+                                   pch->subsystem_vendor == 0x1af4 &&
+                                   pch->subsystem_device == 0x1100)) {
                                dev_priv->pch_type = intel_virt_detect_pch(dev);
                        } else
                                continue;
index f0f75d7c0d94263f86ccfdd7d8b84af831462be0..b0847b9155452366569a4676c9da62bc6c2bc94d 100644 (file)
@@ -751,6 +751,7 @@ struct intel_csr {
        uint32_t mmio_count;
        i915_reg_t mmioaddr[8];
        uint32_t mmiodata[8];
+       uint32_t dc_state;
 };
 
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
@@ -1988,6 +1989,9 @@ enum hdmi_force_audio {
 #define I915_GTT_OFFSET_NONE ((u32)-1)
 
 struct drm_i915_gem_object_ops {
+       unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
+
        /* Interface between the GEM object and its backing storage.
         * get_pages() is called once prior to the use of the associated set
         * of pages before to binding them into the GTT, and put_pages() is
@@ -2003,6 +2007,7 @@ struct drm_i915_gem_object_ops {
         */
        int (*get_pages)(struct drm_i915_gem_object *);
        void (*put_pages)(struct drm_i915_gem_object *);
+
        int (*dmabuf_export)(struct drm_i915_gem_object *);
        void (*release)(struct drm_i915_gem_object *);
 };
index ddc21d4b388d2419a63fbd4f4a5745deea80957b..bb44bad15403556fb443998852864d8e0fb83472 100644 (file)
@@ -4425,6 +4425,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
+       .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
        .get_pages = i915_gem_object_get_pages_gtt,
        .put_pages = i915_gem_object_put_pages_gtt,
 };
@@ -5261,7 +5262,7 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
        struct page *page;
 
        /* Only default objects have per-page dirty tracking */
-       if (WARN_ON(obj->ops != &i915_gem_object_ops))
+       if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
                return NULL;
 
        page = i915_gem_object_get_page(obj, n);
index 19fb0bddc1cddfce0804e459319dc11ba96c5ab7..59e45b3a69379a0e892fbd85d7a17ba3f85913eb 100644 (file)
@@ -789,9 +789,10 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
-       .dmabuf_export = i915_gem_userptr_dmabuf_export,
+       .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
        .get_pages = i915_gem_userptr_get_pages,
        .put_pages = i915_gem_userptr_put_pages,
+       .dmabuf_export = i915_gem_userptr_dmabuf_export,
        .release = i915_gem_userptr_release,
 };
 
index 007ae83a4086d65ff7e4d3862f2ad2a035c74540..4897728713f698d1b3fdbbd31cbf473524c4ecd1 100644 (file)
@@ -3287,19 +3287,20 @@ enum skl_disp_power_wells {
 
 #define PORT_HOTPLUG_STAT      _MMIO(dev_priv->info.display_mmio_offset + 0x61114)
 /*
- * HDMI/DP bits are gen4+
+ * HDMI/DP bits are g4x+
  *
  * WARNING: Bspec for hpd status bits on gen4 seems to be completely confused.
  * Please check the detailed lore in the commit message for for experimental
  * evidence.
  */
-#define   PORTD_HOTPLUG_LIVE_STATUS_G4X                (1 << 29)
+/* Bspec says GM45 should match G4X/VLV/CHV, but reality disagrees */
+#define   PORTD_HOTPLUG_LIVE_STATUS_GM45       (1 << 29)
+#define   PORTC_HOTPLUG_LIVE_STATUS_GM45       (1 << 28)
+#define   PORTB_HOTPLUG_LIVE_STATUS_GM45       (1 << 27)
+/* G4X/VLV/CHV DP/HDMI bits again match Bspec */
+#define   PORTD_HOTPLUG_LIVE_STATUS_G4X                (1 << 27)
 #define   PORTC_HOTPLUG_LIVE_STATUS_G4X                (1 << 28)
-#define   PORTB_HOTPLUG_LIVE_STATUS_G4X                (1 << 27)
-/* VLV DP/HDMI bits again match Bspec */
-#define   PORTD_HOTPLUG_LIVE_STATUS_VLV                (1 << 27)
-#define   PORTC_HOTPLUG_LIVE_STATUS_VLV                (1 << 28)
-#define   PORTB_HOTPLUG_LIVE_STATUS_VLV                (1 << 29)
+#define   PORTB_HOTPLUG_LIVE_STATUS_G4X                (1 << 29)
 #define   PORTD_HOTPLUG_INT_STATUS             (3 << 21)
 #define   PORTD_HOTPLUG_INT_LONG_PULSE         (2 << 21)
 #define   PORTD_HOTPLUG_INT_SHORT_PULSE                (1 << 21)
@@ -7514,7 +7515,7 @@ enum skl_disp_power_wells {
 #define  DPLL_CFGCR2_PDIV_7 (4<<2)
 #define  DPLL_CFGCR2_CENTRAL_FREQ_MASK (3)
 
-#define DPLL_CFGCR1(id)        _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR2)
+#define DPLL_CFGCR1(id)        _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR1)
 #define DPLL_CFGCR2(id)        _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR2, _DPLL2_CFGCR2)
 
 /* BXT display engine PLL */
index a2aa09ce3202f3ca72e64f550244e2a2c8e8a29f..a8af594fbd0097ba066071710a4832c3b731474e 100644 (file)
@@ -49,7 +49,7 @@ static void i915_save_display(struct drm_device *dev)
                dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS);
                dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS);
                dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR);
-       } else if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+       } else if (INTEL_INFO(dev)->gen <= 4) {
                dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL);
                dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS);
                dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS);
@@ -84,7 +84,7 @@ static void i915_restore_display(struct drm_device *dev)
                I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
                I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
                I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
-       } else if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+       } else if (INTEL_INFO(dev)->gen <= 4) {
                I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
                I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
                I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
index 9c89df1af036de613d71a5e8fccc83050722a088..a7b4a524faddc8a80c6693c89eee98efa3fc4450 100644 (file)
@@ -71,22 +71,29 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder,
        struct intel_crt *crt = intel_encoder_to_crt(encoder);
        enum intel_display_power_domain power_domain;
        u32 tmp;
+       bool ret;
 
        power_domain = intel_display_port_power_domain(encoder);
-       if (!intel_display_power_is_enabled(dev_priv, power_domain))
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
+       ret = false;
+
        tmp = I915_READ(crt->adpa_reg);
 
        if (!(tmp & ADPA_DAC_ENABLE))
-               return false;
+               goto out;
 
        if (HAS_PCH_CPT(dev))
                *pipe = PORT_TO_PIPE_CPT(tmp);
        else
                *pipe = PORT_TO_PIPE(tmp);
 
-       return true;
+       ret = true;
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 static unsigned int intel_crt_get_flags(struct intel_encoder *encoder)
index 9bb63a85997a4ffbf31da0e8dbd211786f2e626c..647d85e77c2f4700e84bc275efb711f9b2dc2363 100644 (file)
@@ -240,6 +240,8 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv)
                I915_WRITE(dev_priv->csr.mmioaddr[i],
                           dev_priv->csr.mmiodata[i]);
        }
+
+       dev_priv->csr.dc_state = 0;
 }
 
 static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
index e6408e5583d7a88af4511c6ec2334db2b7922555..0f3df2c39f7cdd3a69d8f0044cbd725e3b9bac6d 100644 (file)
@@ -1589,7 +1589,8 @@ skl_ddi_pll_select(struct intel_crtc *intel_crtc,
                         DPLL_CFGCR2_KDIV(wrpll_params.kdiv) |
                         DPLL_CFGCR2_PDIV(wrpll_params.pdiv) |
                         wrpll_params.central_freq;
-       } else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT) {
+       } else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
+                  intel_encoder->type == INTEL_OUTPUT_DP_MST) {
                switch (crtc_state->port_clock / 2) {
                case 81000:
                        ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, 0);
@@ -1968,13 +1969,16 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
        enum transcoder cpu_transcoder;
        enum intel_display_power_domain power_domain;
        uint32_t tmp;
+       bool ret;
 
        power_domain = intel_display_port_power_domain(intel_encoder);
-       if (!intel_display_power_is_enabled(dev_priv, power_domain))
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
-       if (!intel_encoder->get_hw_state(intel_encoder, &pipe))
-               return false;
+       if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) {
+               ret = false;
+               goto out;
+       }
 
        if (port == PORT_A)
                cpu_transcoder = TRANSCODER_EDP;
@@ -1986,23 +1990,33 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
        switch (tmp & TRANS_DDI_MODE_SELECT_MASK) {
        case TRANS_DDI_MODE_SELECT_HDMI:
        case TRANS_DDI_MODE_SELECT_DVI:
-               return (type == DRM_MODE_CONNECTOR_HDMIA);
+               ret = type == DRM_MODE_CONNECTOR_HDMIA;
+               break;
 
        case TRANS_DDI_MODE_SELECT_DP_SST:
-               if (type == DRM_MODE_CONNECTOR_eDP)
-                       return true;
-               return (type == DRM_MODE_CONNECTOR_DisplayPort);
+               ret = type == DRM_MODE_CONNECTOR_eDP ||
+                     type == DRM_MODE_CONNECTOR_DisplayPort;
+               break;
+
        case TRANS_DDI_MODE_SELECT_DP_MST:
                /* if the transcoder is in MST state then
                 * connector isn't connected */
-               return false;
+               ret = false;
+               break;
 
        case TRANS_DDI_MODE_SELECT_FDI:
-               return (type == DRM_MODE_CONNECTOR_VGA);
+               ret = type == DRM_MODE_CONNECTOR_VGA;
+               break;
 
        default:
-               return false;
+               ret = false;
+               break;
        }
+
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
@@ -2014,15 +2028,18 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
        enum intel_display_power_domain power_domain;
        u32 tmp;
        int i;
+       bool ret;
 
        power_domain = intel_display_port_power_domain(encoder);
-       if (!intel_display_power_is_enabled(dev_priv, power_domain))
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
+       ret = false;
+
        tmp = I915_READ(DDI_BUF_CTL(port));
 
        if (!(tmp & DDI_BUF_CTL_ENABLE))
-               return false;
+               goto out;
 
        if (port == PORT_A) {
                tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
@@ -2040,25 +2057,32 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
                        break;
                }
 
-               return true;
-       } else {
-               for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
-                       tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+               ret = true;
 
-                       if ((tmp & TRANS_DDI_PORT_MASK)
-                           == TRANS_DDI_SELECT_PORT(port)) {
-                               if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_DP_MST)
-                                       return false;
+               goto out;
+       }
 
-                               *pipe = i;
-                               return true;
-                       }
+       for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
+               tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+
+               if ((tmp & TRANS_DDI_PORT_MASK) == TRANS_DDI_SELECT_PORT(port)) {
+                       if ((tmp & TRANS_DDI_MODE_SELECT_MASK) ==
+                           TRANS_DDI_MODE_SELECT_DP_MST)
+                               goto out;
+
+                       *pipe = i;
+                       ret = true;
+
+                       goto out;
                }
        }
 
        DRM_DEBUG_KMS("No pipe for ddi port %c found\n", port_name(port));
 
-       return false;
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc)
@@ -2507,12 +2531,14 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
 {
        uint32_t val;
 
-       if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+       if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
                return false;
 
        val = I915_READ(WRPLL_CTL(pll->id));
        hw_state->wrpll = val;
 
+       intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
        return val & WRPLL_PLL_ENABLE;
 }
 
@@ -2522,12 +2548,14 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv,
 {
        uint32_t val;
 
-       if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+       if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
                return false;
 
        val = I915_READ(SPLL_CTL);
        hw_state->spll = val;
 
+       intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
        return val & SPLL_PLL_ENABLE;
 }
 
@@ -2644,16 +2672,19 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
        uint32_t val;
        unsigned int dpll;
        const struct skl_dpll_regs *regs = skl_dpll_regs;
+       bool ret;
 
-       if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+       if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
                return false;
 
+       ret = false;
+
        /* DPLL0 is not part of the shared DPLLs, so pll->id is 0 for DPLL1 */
        dpll = pll->id + 1;
 
        val = I915_READ(regs[pll->id].ctl);
        if (!(val & LCPLL_PLL_ENABLE))
-               return false;
+               goto out;
 
        val = I915_READ(DPLL_CTRL1);
        hw_state->ctrl1 = (val >> (dpll * 6)) & 0x3f;
@@ -2663,8 +2694,12 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
                hw_state->cfgcr1 = I915_READ(regs[pll->id].cfgcr1);
                hw_state->cfgcr2 = I915_READ(regs[pll->id].cfgcr2);
        }
+       ret = true;
 
-       return true;
+out:
+       intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+       return ret;
 }
 
 static void skl_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -2931,13 +2966,16 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 {
        enum port port = (enum port)pll->id;    /* 1:1 port->PLL mapping */
        uint32_t val;
+       bool ret;
 
-       if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+       if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
                return false;
 
+       ret = false;
+
        val = I915_READ(BXT_PORT_PLL_ENABLE(port));
        if (!(val & PORT_PLL_ENABLE))
-               return false;
+               goto out;
 
        hw_state->ebb0 = I915_READ(BXT_PORT_PLL_EBB_0(port));
        hw_state->ebb0 &= PORT_PLL_P1_MASK | PORT_PLL_P2_MASK;
@@ -2984,7 +3022,12 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
                                 I915_READ(BXT_PORT_PCS_DW12_LN23(port)));
        hw_state->pcsdw12 &= LANE_STAGGER_MASK | LANESTAGGER_STRAP_OVRD;
 
-       return true;
+       ret = true;
+
+out:
+       intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+       return ret;
 }
 
 static void bxt_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -3119,11 +3162,15 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
 {
        u32 temp;
 
-       if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
+       if (intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
                temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
+
+               intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
+
                if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe))
                        return true;
        }
+
        return false;
 }
 
index 2f00828ccc6e923a65447424c716d9cc5ee0e99b..46947fffd5998074555c95ce8d373343aac20bd6 100644 (file)
@@ -1351,18 +1351,21 @@ void assert_pipe(struct drm_i915_private *dev_priv,
        bool cur_state;
        enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
                                                                      pipe);
+       enum intel_display_power_domain power_domain;
 
        /* if we need the pipe quirk it must be always on */
        if ((pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) ||
            (pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE))
                state = true;
 
-       if (!intel_display_power_is_enabled(dev_priv,
-                               POWER_DOMAIN_TRANSCODER(cpu_transcoder))) {
-               cur_state = false;
-       } else {
+       power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+       if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
                u32 val = I915_READ(PIPECONF(cpu_transcoder));
                cur_state = !!(val & PIPECONF_ENABLE);
+
+               intel_display_power_put(dev_priv, power_domain);
+       } else {
+               cur_state = false;
        }
 
        I915_STATE_WARN(cur_state != state,
@@ -2946,7 +2949,7 @@ u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
        struct i915_vma *vma;
        u64 offset;
 
-       intel_fill_fb_ggtt_view(&view, intel_plane->base.fb,
+       intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
                                intel_plane->base.state);
 
        vma = i915_gem_obj_to_ggtt_view(obj, &view);
@@ -8171,18 +8174,22 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       enum intel_display_power_domain power_domain;
        uint32_t tmp;
+       bool ret;
 
-       if (!intel_display_power_is_enabled(dev_priv,
-                                           POWER_DOMAIN_PIPE(crtc->pipe)))
+       power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
        pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
        pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
+       ret = false;
+
        tmp = I915_READ(PIPECONF(crtc->pipe));
        if (!(tmp & PIPECONF_ENABLE))
-               return false;
+               goto out;
 
        if (IS_G4X(dev) || IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
                switch (tmp & PIPECONF_BPC_MASK) {
@@ -8262,7 +8269,12 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
        pipe_config->base.adjusted_mode.crtc_clock =
                pipe_config->port_clock / pipe_config->pixel_multiplier;
 
-       return true;
+       ret = true;
+
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 static void ironlake_init_pch_refclk(struct drm_device *dev)
@@ -9366,18 +9378,21 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       enum intel_display_power_domain power_domain;
        uint32_t tmp;
+       bool ret;
 
-       if (!intel_display_power_is_enabled(dev_priv,
-                                           POWER_DOMAIN_PIPE(crtc->pipe)))
+       power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
        pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
        pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
+       ret = false;
        tmp = I915_READ(PIPECONF(crtc->pipe));
        if (!(tmp & PIPECONF_ENABLE))
-               return false;
+               goto out;
 
        switch (tmp & PIPECONF_BPC_MASK) {
        case PIPECONF_6BPC:
@@ -9440,7 +9455,12 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 
        ironlake_get_pfit_config(crtc, pipe_config);
 
-       return true;
+       ret = true;
+
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
@@ -9950,12 +9970,17 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
-       enum intel_display_power_domain pfit_domain;
+       enum intel_display_power_domain power_domain;
+       unsigned long power_domain_mask;
        uint32_t tmp;
+       bool ret;
 
-       if (!intel_display_power_is_enabled(dev_priv,
-                                        POWER_DOMAIN_PIPE(crtc->pipe)))
+       power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
+       power_domain_mask = BIT(power_domain);
+
+       ret = false;
 
        pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
        pipe_config->shared_dpll = DPLL_ID_PRIVATE;
@@ -9982,13 +10007,14 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
                        pipe_config->cpu_transcoder = TRANSCODER_EDP;
        }
 
-       if (!intel_display_power_is_enabled(dev_priv,
-                       POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder)))
-               return false;
+       power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder);
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+               goto out;
+       power_domain_mask |= BIT(power_domain);
 
        tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder));
        if (!(tmp & PIPECONF_ENABLE))
-               return false;
+               goto out;
 
        haswell_get_ddi_port_state(crtc, pipe_config);
 
@@ -9998,14 +10024,14 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
                skl_init_scalers(dev, crtc, pipe_config);
        }
 
-       pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
-
        if (INTEL_INFO(dev)->gen >= 9) {
                pipe_config->scaler_state.scaler_id = -1;
                pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
        }
 
-       if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
+       power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
+       if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+               power_domain_mask |= BIT(power_domain);
                if (INTEL_INFO(dev)->gen >= 9)
                        skylake_get_pfit_config(crtc, pipe_config);
                else
@@ -10023,7 +10049,13 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
                pipe_config->pixel_multiplier = 1;
        }
 
-       return true;
+       ret = true;
+
+out:
+       for_each_power_domain(power_domain, power_domain_mask)
+               intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 static void i845_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
@@ -12075,11 +12107,21 @@ connected_sink_compute_bpp(struct intel_connector *connector,
                pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
        }
 
-       /* Clamp bpp to 8 on screens without EDID 1.4 */
-       if (connector->base.display_info.bpc == 0 && bpp > 24) {
-               DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
-                             bpp);
-               pipe_config->pipe_bpp = 24;
+       /* Clamp bpp to default limit on screens without EDID 1.4 */
+       if (connector->base.display_info.bpc == 0) {
+               int type = connector->base.connector_type;
+               int clamp_bpp = 24;
+
+               /* Fall back to 18 bpp when DP sink capability is unknown. */
+               if (type == DRM_MODE_CONNECTOR_DisplayPort ||
+                   type == DRM_MODE_CONNECTOR_eDP)
+                       clamp_bpp = 18;
+
+               if (bpp > clamp_bpp) {
+                       DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
+                                     bpp, clamp_bpp);
+                       pipe_config->pipe_bpp = clamp_bpp;
+               }
        }
 }
 
@@ -13620,7 +13662,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
 {
        uint32_t val;
 
-       if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+       if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
                return false;
 
        val = I915_READ(PCH_DPLL(pll->id));
@@ -13628,6 +13670,8 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
        hw_state->fp0 = I915_READ(PCH_FP0(pll->id));
        hw_state->fp1 = I915_READ(PCH_FP1(pll->id));
 
+       intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
        return val & DPLL_VCO_ENABLE;
 }
 
@@ -13883,11 +13927,12 @@ intel_check_primary_plane(struct drm_plane *plane,
        int max_scale = DRM_PLANE_HELPER_NO_SCALING;
        bool can_position = false;
 
-       /* use scaler when colorkey is not required */
-       if (INTEL_INFO(plane->dev)->gen >= 9 &&
-           state->ckey.flags == I915_SET_COLORKEY_NONE) {
-               min_scale = 1;
-               max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+       if (INTEL_INFO(plane->dev)->gen >= 9) {
+               /* use scaler when colorkey is not required */
+               if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
+                       min_scale = 1;
+                       max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+               }
                can_position = true;
        }
 
@@ -15557,10 +15602,12 @@ void i915_redisable_vga(struct drm_device *dev)
         * level, just check if the power well is enabled instead of trying to
         * follow the "don't touch the power well if we don't need it" policy
         * the rest of the driver uses. */
-       if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_VGA))
+       if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA))
                return;
 
        i915_redisable_vga_power_on(dev);
+
+       intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
 }
 
 static bool primary_get_hw_state(struct intel_plane *plane)
index 796e3d313cb975efc3797a39b15beb8e55ab7f92..1d8de43bed56839fd1fee3a75b6d61b6221c7c0a 100644 (file)
@@ -2362,15 +2362,18 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
        struct drm_i915_private *dev_priv = dev->dev_private;
        enum intel_display_power_domain power_domain;
        u32 tmp;
+       bool ret;
 
        power_domain = intel_display_port_power_domain(encoder);
-       if (!intel_display_power_is_enabled(dev_priv, power_domain))
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
+       ret = false;
+
        tmp = I915_READ(intel_dp->output_reg);
 
        if (!(tmp & DP_PORT_EN))
-               return false;
+               goto out;
 
        if (IS_GEN7(dev) && port == PORT_A) {
                *pipe = PORT_TO_PIPE_CPT(tmp);
@@ -2381,7 +2384,9 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
                        u32 trans_dp = I915_READ(TRANS_DP_CTL(p));
                        if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) {
                                *pipe = p;
-                               return true;
+                               ret = true;
+
+                               goto out;
                        }
                }
 
@@ -2393,7 +2398,12 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
                *pipe = PORT_TO_PIPE(tmp);
        }
 
-       return true;
+       ret = true;
+
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 static void intel_dp_get_config(struct intel_encoder *encoder,
@@ -4493,20 +4503,20 @@ static bool g4x_digital_port_connected(struct drm_i915_private *dev_priv,
        return I915_READ(PORT_HOTPLUG_STAT) & bit;
 }
 
-static bool vlv_digital_port_connected(struct drm_i915_private *dev_priv,
-                                      struct intel_digital_port *port)
+static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv,
+                                       struct intel_digital_port *port)
 {
        u32 bit;
 
        switch (port->port) {
        case PORT_B:
-               bit = PORTB_HOTPLUG_LIVE_STATUS_VLV;
+               bit = PORTB_HOTPLUG_LIVE_STATUS_GM45;
                break;
        case PORT_C:
-               bit = PORTC_HOTPLUG_LIVE_STATUS_VLV;
+               bit = PORTC_HOTPLUG_LIVE_STATUS_GM45;
                break;
        case PORT_D:
-               bit = PORTD_HOTPLUG_LIVE_STATUS_VLV;
+               bit = PORTD_HOTPLUG_LIVE_STATUS_GM45;
                break;
        default:
                MISSING_CASE(port->port);
@@ -4558,8 +4568,8 @@ bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
                return cpt_digital_port_connected(dev_priv, port);
        else if (IS_BROXTON(dev_priv))
                return bxt_digital_port_connected(dev_priv, port);
-       else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-               return vlv_digital_port_connected(dev_priv, port);
+       else if (IS_GM45(dev_priv))
+               return gm45_digital_port_connected(dev_priv, port);
        else
                return g4x_digital_port_connected(dev_priv, port);
 }
index 88887938e0bfd3e0cf3e7bdebd66a145f7e036a7..0b8eefc2acc5d93088b960e4714bce55944df82e 100644 (file)
@@ -215,27 +215,46 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
        }
 }
 
-static void
-intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
+/*
+ * Pick training pattern for channel equalization. Training Pattern 3 for HBR2
+ * or 1.2 devices that support it, Training Pattern 2 otherwise.
+ */
+static u32 intel_dp_training_pattern(struct intel_dp *intel_dp)
 {
-       bool channel_eq = false;
-       int tries, cr_tries;
-       uint32_t training_pattern = DP_TRAINING_PATTERN_2;
+       u32 training_pattern = DP_TRAINING_PATTERN_2;
+       bool source_tps3, sink_tps3;
 
        /*
-        * Training Pattern 3 for HBR2 or 1.2 devices that support it.
-        *
         * Intel platforms that support HBR2 also support TPS3. TPS3 support is
-        * also mandatory for downstream devices that support HBR2.
+        * also mandatory for downstream devices that support HBR2. However, not
+        * all sinks follow the spec.
         *
         * Due to WaDisableHBR2 SKL < B0 is the only exception where TPS3 is
-        * supported but still not enabled.
+        * supported in source but still not enabled.
         */
-       if (intel_dp_source_supports_hbr2(intel_dp) &&
-           drm_dp_tps3_supported(intel_dp->dpcd))
+       source_tps3 = intel_dp_source_supports_hbr2(intel_dp);
+       sink_tps3 = drm_dp_tps3_supported(intel_dp->dpcd);
+
+       if (source_tps3 && sink_tps3) {
                training_pattern = DP_TRAINING_PATTERN_3;
-       else if (intel_dp->link_rate == 540000)
-               DRM_ERROR("5.4 Gbps link rate without HBR2/TPS3 support\n");
+       } else if (intel_dp->link_rate == 540000) {
+               if (!source_tps3)
+                       DRM_DEBUG_KMS("5.4 Gbps link rate without source HBR2/TPS3 support\n");
+               if (!sink_tps3)
+                       DRM_DEBUG_KMS("5.4 Gbps link rate without sink TPS3 support\n");
+       }
+
+       return training_pattern;
+}
+
+static void
+intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
+{
+       bool channel_eq = false;
+       int tries, cr_tries;
+       u32 training_pattern;
+
+       training_pattern = intel_dp_training_pattern(intel_dp);
 
        /* channel equalization */
        if (!intel_dp_set_link_train(intel_dp,
index ea5415851c6e2cc36d4c6790db473fab637fd46d..df7f3cb66056e754c3c8f596583cae343fc2d5a9 100644 (file)
@@ -1428,6 +1428,8 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
                                      enum intel_display_power_domain domain);
 void intel_display_power_get(struct drm_i915_private *dev_priv,
                             enum intel_display_power_domain domain);
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+                                       enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
                             enum intel_display_power_domain domain);
 
@@ -1514,6 +1516,7 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
        enable_rpm_wakeref_asserts(dev_priv)
 
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
 
index 44742fa2f616dd22fc25847b0f4ddb5730e550de..0193c62a53ef34f8e2027fb39e3172d44703bfba 100644 (file)
@@ -664,13 +664,16 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
        struct drm_device *dev = encoder->base.dev;
        enum intel_display_power_domain power_domain;
        enum port port;
+       bool ret;
 
        DRM_DEBUG_KMS("\n");
 
        power_domain = intel_display_port_power_domain(encoder);
-       if (!intel_display_power_is_enabled(dev_priv, power_domain))
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
+       ret = false;
+
        /* XXX: this only works for one DSI output */
        for_each_dsi_port(port, intel_dsi->ports) {
                i915_reg_t ctrl_reg = IS_BROXTON(dev) ?
@@ -691,12 +694,16 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
                if (dpi_enabled || (func & CMD_MODE_DATA_WIDTH_MASK)) {
                        if (I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY) {
                                *pipe = port == PORT_A ? PIPE_A : PIPE_B;
-                               return true;
+                               ret = true;
+
+                               goto out;
                        }
                }
        }
+out:
+       intel_display_power_put(dev_priv, power_domain);
 
-       return false;
+       return ret;
 }
 
 static void intel_dsi_get_config(struct intel_encoder *encoder,
index a5e99ac305daab3ef69471d37b0ec9a97a27425c..e8113ad6547782ff5836839f1354a860aea5e464 100644 (file)
@@ -204,10 +204,28 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
        struct drm_device *dev = intel_dsi->base.base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
+       if (dev_priv->vbt.dsi.seq_version >= 3)
+               data++;
+
        gpio = *data++;
 
        /* pull up/down */
-       action = *data++;
+       action = *data++ & 1;
+
+       if (gpio >= ARRAY_SIZE(gtable)) {
+               DRM_DEBUG_KMS("unknown gpio %u\n", gpio);
+               goto out;
+       }
+
+       if (!IS_VALLEYVIEW(dev_priv)) {
+               DRM_DEBUG_KMS("GPIO element not supported on this platform\n");
+               goto out;
+       }
+
+       if (dev_priv->vbt.dsi.seq_version >= 3) {
+               DRM_DEBUG_KMS("GPIO element v3 not supported\n");
+               goto out;
+       }
 
        function = gtable[gpio].function_reg;
        pad = gtable[gpio].pad_reg;
@@ -226,6 +244,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
        vlv_gpio_nc_write(dev_priv, pad, val);
        mutex_unlock(&dev_priv->sb_lock);
 
+out:
        return data;
 }
 
index 4a77639a489dfcd67ff0937e6e3fa4da61fed461..cb5d1b15755c3b19a496105c7a8df99982e83cb9 100644 (file)
@@ -880,15 +880,18 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
        struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
        enum intel_display_power_domain power_domain;
        u32 tmp;
+       bool ret;
 
        power_domain = intel_display_port_power_domain(encoder);
-       if (!intel_display_power_is_enabled(dev_priv, power_domain))
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
+       ret = false;
+
        tmp = I915_READ(intel_hdmi->hdmi_reg);
 
        if (!(tmp & SDVO_ENABLE))
-               return false;
+               goto out;
 
        if (HAS_PCH_CPT(dev))
                *pipe = PORT_TO_PIPE_CPT(tmp);
@@ -897,7 +900,12 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
        else
                *pipe = PORT_TO_PIPE(tmp);
 
-       return true;
+       ret = true;
+
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 static void intel_hdmi_get_config(struct intel_encoder *encoder,
index 25254b5c1ac5c95173d0d5b4fa101f35a1131cb7..deb8282c26d83f952473ae145c4fef0b3112b9f1 100644 (file)
@@ -683,7 +683,7 @@ int intel_setup_gmbus(struct drm_device *dev)
        return 0;
 
 err:
-       while (--pin) {
+       while (pin--) {
                if (!intel_gmbus_is_valid_pin(dev_priv, pin))
                        continue;
 
index 3aa614731d7e4b7160a85336d96df41469587991..f1fa756c5d5d59bf8877daded5c23329f63ed331 100644 (file)
@@ -1707,6 +1707,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
 
index 0da0240caf815089447d4614b200ac818fd85250..bc04d8d29acb0bcc8112b39741f7614db6615625 100644 (file)
@@ -75,22 +75,30 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder,
        struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
        enum intel_display_power_domain power_domain;
        u32 tmp;
+       bool ret;
 
        power_domain = intel_display_port_power_domain(encoder);
-       if (!intel_display_power_is_enabled(dev_priv, power_domain))
+       if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                return false;
 
+       ret = false;
+
        tmp = I915_READ(lvds_encoder->reg);
 
        if (!(tmp & LVDS_PORT_EN))
-               return false;
+               goto out;
 
        if (HAS_PCH_CPT(dev))
                *pipe = PORT_TO_PIPE_CPT(tmp);
        else
                *pipe = PORT_TO_PIPE(tmp);
 
-       return true;
+       ret = true;
+
+out:
+       intel_display_power_put(dev_priv, power_domain);
+
+       return ret;
 }
 
 static void intel_lvds_get_config(struct intel_encoder *encoder,
index eb5fa05cf476e465ab2c0df6ee52f6870ec568f8..b28c29f20e754ec740e5aacd101bbe07867f65a6 100644 (file)
@@ -1783,16 +1783,20 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
                                   const struct intel_plane_state *pstate,
                                   uint32_t mem_value)
 {
-       int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
+       /*
+        * We treat the cursor plane as always-on for the purposes of watermark
+        * calculation.  Until we have two-stage watermark programming merged,
+        * this is necessary to avoid flickering.
+        */
+       int cpp = 4;
+       int width = pstate->visible ? pstate->base.crtc_w : 64;
 
-       if (!cstate->base.active || !pstate->visible)
+       if (!cstate->base.active)
                return 0;
 
        return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
                              cstate->base.adjusted_mode.crtc_htotal,
-                             drm_rect_width(&pstate->dst),
-                             bpp,
-                             mem_value);
+                             width, cpp, mem_value);
 }
 
 /* Only for WM_LP. */
@@ -2825,7 +2829,10 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
        memset(ddb, 0, sizeof(*ddb));
 
        for_each_pipe(dev_priv, pipe) {
-               if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
+               enum intel_display_power_domain power_domain;
+
+               power_domain = POWER_DOMAIN_PIPE(pipe);
+               if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
                        continue;
 
                for_each_plane(dev_priv, pipe, plane) {
@@ -2837,6 +2844,8 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
                val = I915_READ(CUR_BUF_CFG(pipe));
                skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
                                           val);
+
+               intel_display_power_put(dev_priv, power_domain);
        }
 }
 
index 339701d7a9a5069ccce00f2f171f923073d2c3b5..40c6aff57256140a3dccd780356cfcd859f377ba 100644 (file)
@@ -331,6 +331,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
        if (invalidate_domains) {
@@ -403,6 +404,7 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
        if (invalidate_domains) {
index ddbdbffe829a684eff8c2ef42a3cf29ccbf112d8..678ed3475d7eca57a512e41d872e9ba9fd5f52b0 100644 (file)
@@ -470,6 +470,43 @@ static void gen9_set_dc_state_debugmask_memory_up(
        }
 }
 
+static void gen9_write_dc_state(struct drm_i915_private *dev_priv,
+                               u32 state)
+{
+       int rewrites = 0;
+       int rereads = 0;
+       u32 v;
+
+       I915_WRITE(DC_STATE_EN, state);
+
+       /* It has been observed that disabling the dc6 state sometimes
+        * doesn't stick and dmc keeps returning old value. Make sure
+        * the write really sticks enough times and also force rewrite until
+        * we are confident that state is exactly what we want.
+        */
+       do  {
+               v = I915_READ(DC_STATE_EN);
+
+               if (v != state) {
+                       I915_WRITE(DC_STATE_EN, state);
+                       rewrites++;
+                       rereads = 0;
+               } else if (rereads++ > 5) {
+                       break;
+               }
+
+       } while (rewrites < 100);
+
+       if (v != state)
+               DRM_ERROR("Writing dc state to 0x%x failed, now 0x%x\n",
+                         state, v);
+
+       /* Most of the times we need one retry, avoid spam */
+       if (rewrites > 1)
+               DRM_DEBUG_KMS("Rewrote dc state to 0x%x %d times\n",
+                             state, rewrites);
+}
+
 static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
 {
        uint32_t val;
@@ -494,10 +531,18 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
        val = I915_READ(DC_STATE_EN);
        DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
                      val & mask, state);
+
+       /* Check if DMC is ignoring our DC state requests */
+       if ((val & mask) != dev_priv->csr.dc_state)
+               DRM_ERROR("DC state mismatch (0x%x -> 0x%x)\n",
+                         dev_priv->csr.dc_state, val & mask);
+
        val &= ~mask;
        val |= state;
-       I915_WRITE(DC_STATE_EN, val);
-       POSTING_READ(DC_STATE_EN);
+
+       gen9_write_dc_state(dev_priv, val);
+
+       dev_priv->csr.dc_state = val & mask;
 }
 
 void bxt_enable_dc9(struct drm_i915_private *dev_priv)
@@ -1442,6 +1487,22 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
        chv_set_pipe_power_well(dev_priv, power_well, false);
 }
 
+static void
+__intel_display_power_get_domain(struct drm_i915_private *dev_priv,
+                                enum intel_display_power_domain domain)
+{
+       struct i915_power_domains *power_domains = &dev_priv->power_domains;
+       struct i915_power_well *power_well;
+       int i;
+
+       for_each_power_well(i, power_well, BIT(domain), power_domains) {
+               if (!power_well->count++)
+                       intel_power_well_enable(dev_priv, power_well);
+       }
+
+       power_domains->domain_use_count[domain]++;
+}
+
 /**
  * intel_display_power_get - grab a power domain reference
  * @dev_priv: i915 device instance
@@ -1457,24 +1518,53 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
 void intel_display_power_get(struct drm_i915_private *dev_priv,
                             enum intel_display_power_domain domain)
 {
-       struct i915_power_domains *power_domains;
-       struct i915_power_well *power_well;
-       int i;
+       struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
        intel_runtime_pm_get(dev_priv);
 
-       power_domains = &dev_priv->power_domains;
+       mutex_lock(&power_domains->lock);
+
+       __intel_display_power_get_domain(dev_priv, domain);
+
+       mutex_unlock(&power_domains->lock);
+}
+
+/**
+ * intel_display_power_get_if_enabled - grab a reference for an enabled display power domain
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function grabs a power domain reference for @domain and ensures that the
+ * power domain and all its parents are powered up. Therefore users should only
+ * grab a reference to the innermost power domain they need.
+ *
+ * Any power domain reference obtained by this function must have a symmetric
+ * call to intel_display_power_put() to release the reference again.
+ */
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+                                       enum intel_display_power_domain domain)
+{
+       struct i915_power_domains *power_domains = &dev_priv->power_domains;
+       bool is_enabled;
+
+       if (!intel_runtime_pm_get_if_in_use(dev_priv))
+               return false;
 
        mutex_lock(&power_domains->lock);
 
-       for_each_power_well(i, power_well, BIT(domain), power_domains) {
-               if (!power_well->count++)
-                       intel_power_well_enable(dev_priv, power_well);
+       if (__intel_display_power_is_enabled(dev_priv, domain)) {
+               __intel_display_power_get_domain(dev_priv, domain);
+               is_enabled = true;
+       } else {
+               is_enabled = false;
        }
 
-       power_domains->domain_use_count[domain]++;
-
        mutex_unlock(&power_domains->lock);
+
+       if (!is_enabled)
+               intel_runtime_pm_put(dev_priv);
+
+       return is_enabled;
 }
 
 /**
@@ -2245,6 +2335,43 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
        assert_rpm_wakelock_held(dev_priv);
 }
 
+/**
+ * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
+ * @dev_priv: i915 device instance
+ *
+ * This function grabs a device-level runtime pm reference if the device is
+ * already in use and ensures that it is powered up.
+ *
+ * Any runtime pm reference obtained by this function must have a symmetric
+ * call to intel_runtime_pm_put() to release the reference again.
+ */
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+{
+       struct drm_device *dev = dev_priv->dev;
+       struct device *device = &dev->pdev->dev;
+       int ret;
+
+       if (!IS_ENABLED(CONFIG_PM))
+               return true;
+
+       ret = pm_runtime_get_if_in_use(device);
+
+       /*
+        * In cases runtime PM is disabled by the RPM core and we get an
+        * -EINVAL return value we are not supposed to call this function,
+        * since the power state is undefined. This applies atm to the
+        * late/early system suspend/resume handlers.
+        */
+       WARN_ON_ONCE(ret < 0);
+       if (ret <= 0)
+               return false;
+
+       atomic_inc(&dev_priv->pm.wakeref_count);
+       assert_rpm_wakelock_held(dev_priv);
+
+       return true;
+}
+
 /**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
  * @dev_priv: i915 device instance
index 78f520d05de92ea627980c5e8f2bb8388715e6fe..e3acc35e3805e58b09fa47fe8dc70223aff175dc 100644 (file)
@@ -1520,7 +1520,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
                                    DMA_BIDIRECTIONAL);
 
                if (dma_mapping_error(pdev, addr)) {
-                       while (--i) {
+                       while (i--) {
                                dma_unmap_page(pdev, ttm_dma->dma_address[i],
                                               PAGE_SIZE, DMA_BIDIRECTIONAL);
                                ttm_dma->dma_address[i] = 0;
index 24be27d3cd18cfb35821b00fe9531c5d0337c284..20935eb2a09e9600e68d37edec8dcb3404a0db60 100644 (file)
@@ -635,10 +635,6 @@ nouveau_display_resume(struct drm_device *dev, bool runtime)
                nv_crtc->lut.depth = 0;
        }
 
-       /* Make sure that drm and hw vblank irqs get resumed if needed. */
-       for (head = 0; head < dev->mode_config.num_crtc; head++)
-               drm_vblank_on(dev, head);
-
        /* This should ensure we don't hit a locking problem when someone
         * wakes us up via a connector.  We should never go into suspend
         * while the display is on anyways.
@@ -648,6 +644,10 @@ nouveau_display_resume(struct drm_device *dev, bool runtime)
 
        drm_helper_resume_force_mode(dev);
 
+       /* Make sure that drm and hw vblank irqs get resumed if needed. */
+       for (head = 0; head < dev->mode_config.num_crtc; head++)
+               drm_vblank_on(dev, head);
+
        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
                struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 
index 8a70cec59bcd3f58b43d2d8119afa14c44190bce..2dfe58af12e4ee05103d14946dc4a0d001f7f9c6 100644 (file)
@@ -24,7 +24,7 @@
 static int nouveau_platform_probe(struct platform_device *pdev)
 {
        const struct nvkm_device_tegra_func *func;
-       struct nvkm_device *device;
+       struct nvkm_device *device = NULL;
        struct drm_device *drm;
        int ret;
 
index 7f8a42721eb20ccafe19fcaefe3d972f432a6c7f..e7e581d6a8ff24d256b6526d2b93c41b7a00daa3 100644 (file)
@@ -252,32 +252,40 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 
        if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
                return -ENOMEM;
-       *pdevice = &tdev->device;
+
        tdev->func = func;
        tdev->pdev = pdev;
        tdev->irq = -1;
 
        tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
-       if (IS_ERR(tdev->vdd))
-               return PTR_ERR(tdev->vdd);
+       if (IS_ERR(tdev->vdd)) {
+               ret = PTR_ERR(tdev->vdd);
+               goto free;
+       }
 
        tdev->rst = devm_reset_control_get(&pdev->dev, "gpu");
-       if (IS_ERR(tdev->rst))
-               return PTR_ERR(tdev->rst);
+       if (IS_ERR(tdev->rst)) {
+               ret = PTR_ERR(tdev->rst);
+               goto free;
+       }
 
        tdev->clk = devm_clk_get(&pdev->dev, "gpu");
-       if (IS_ERR(tdev->clk))
-               return PTR_ERR(tdev->clk);
+       if (IS_ERR(tdev->clk)) {
+               ret = PTR_ERR(tdev->clk);
+               goto free;
+       }
 
        tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
-       if (IS_ERR(tdev->clk_pwr))
-               return PTR_ERR(tdev->clk_pwr);
+       if (IS_ERR(tdev->clk_pwr)) {
+               ret = PTR_ERR(tdev->clk_pwr);
+               goto free;
+       }
 
        nvkm_device_tegra_probe_iommu(tdev);
 
        ret = nvkm_device_tegra_power_up(tdev);
        if (ret)
-               return ret;
+               goto remove;
 
        tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
        ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
@@ -285,9 +293,19 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
                               cfg, dbg, detect, mmio, subdev_mask,
                               &tdev->device);
        if (ret)
-               return ret;
+               goto powerdown;
+
+       *pdevice = &tdev->device;
 
        return 0;
+
+powerdown:
+       nvkm_device_tegra_power_down(tdev);
+remove:
+       nvkm_device_tegra_remove_iommu(tdev);
+free:
+       kfree(tdev);
+       return ret;
 }
 #else
 int
index 74e2f7c6c07e4a583d9e3f6d62016b92c5694d28..9688970eca47dbe4ccea5b2e700b0cfdb40b02e0 100644 (file)
@@ -328,6 +328,7 @@ nvkm_dp_train(struct work_struct *w)
                .outp = outp,
        }, *dp = &_dp;
        u32 datarate = 0;
+       u8  pwr;
        int ret;
 
        if (!outp->base.info.location && disp->func->sor.magic)
@@ -355,6 +356,15 @@ nvkm_dp_train(struct work_struct *w)
        /* disable link interrupt handling during link training */
        nvkm_notify_put(&outp->irq);
 
+       /* ensure sink is not in a low-power state */
+       if (!nvkm_rdaux(outp->aux, DPCD_SC00, &pwr, 1)) {
+               if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) {
+                       pwr &= ~DPCD_SC00_SET_POWER;
+                       pwr |=  DPCD_SC00_SET_POWER_D0;
+                       nvkm_wraux(outp->aux, DPCD_SC00, &pwr, 1);
+               }
+       }
+
        /* enable down-spreading and execute pre-train script from vbios */
        dp_link_train_init(dp, outp->dpcd[3] & 0x01);
 
index 9596290329c70e0cbe34259fbf5db39e12a9cbcb..6e10c5e0ef1162232253b29a248ad2c15e801f21 100644 (file)
 #define DPCD_LS0C_LANE1_POST_CURSOR2                                       0x0c
 #define DPCD_LS0C_LANE0_POST_CURSOR2                                       0x03
 
+/* DPCD Sink Control */
+#define DPCD_SC00                                                       0x00600
+#define DPCD_SC00_SET_POWER                                                0x03
+#define DPCD_SC00_SET_POWER_D0                                             0x01
+#define DPCD_SC00_SET_POWER_D3                                             0x03
+
 void nvkm_dp_train(struct work_struct *);
 #endif
index 2ae8577497ca6e1c9cc87b265ff7ca92ee5375ec..7c2e78201ead9d475c3ba97d083c523026c21a47 100644 (file)
@@ -168,7 +168,8 @@ static int qxl_process_single_command(struct qxl_device *qdev,
                       cmd->command_size))
                return -EFAULT;
 
-       reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL);
+       reloc_info = kmalloc_array(cmd->relocs_num,
+                                  sizeof(struct qxl_reloc_info), GFP_KERNEL);
        if (!reloc_info)
                return -ENOMEM;
 
index 3d031b50a8fd13dd8203f4de284ecedbb95ecbcc..9f029dda1f071431a9d90894096ab4a3f89bbe0a 100644 (file)
@@ -68,5 +68,5 @@ int qxl_gem_prime_mmap(struct drm_gem_object *obj,
                       struct vm_area_struct *area)
 {
        WARN_ONCE(1, "not implemented");
-       return ENOSYS;
+       return -ENOSYS;
 }
index 6bfc46369db1b4f8daa02fba453a382e863a3585..367a916f364e94617a1b15fcd7d78cd3f90c8dc6 100644 (file)
@@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev,
                unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
                        DENTIST_DPREFCLK_WDIVIDER_MASK) >>
                        DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
-               if (div < 128 && div >= 96)
-                       div -= 64;
-               else if (div >= 64)
-                       div = div / 2 - 16;
-               else if (div >= 8)
-                       div /= 4;
-               else
-                       div = 0;
+               div = radeon_audio_decode_dfs_div(div);
 
                if (div)
-                       clock = rdev->clock.gpupll_outputfreq * 10 / div;
+                       clock = clock * 100 / div;
 
                WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
                WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
index 9953356fe2637cfacdc2ba41e8ecd082d65213ff..3cf04a2f44bbb05169264a504349dec3d949e5bb 100644 (file)
@@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev,
         * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
         * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
         */
+       if (ASIC_IS_DCE41(rdev)) {
+               unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+                       DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+                       DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+               div = radeon_audio_decode_dfs_div(div);
+
+               if (div)
+                       clock = 100 * clock / div;
+       }
+
        WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
        WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
 }
index 4aa5f755572b1593a8b6f7876cf5f7aed183715d..13b6029d65cc524528aec6a1f1727b9d695aa4af 100644 (file)
 #define DCCG_AUDIO_DTO1_CNTL              0x05cc
 #       define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
 
+#define DCE41_DENTIST_DISPCLK_CNTL                     0x049c
+#       define DENTIST_DPREFCLK_WDIVIDER(x)            (((x) & 0x7f) << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_MASK          (0x7f << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_SHIFT         24
+
 /* DCE 4.0 AFMT */
 #define HDMI_CONTROL                         0x7030
 #       define HDMI_KEEPOUT_MODE             (1 << 0)
index 5ae6db98aa4d1eae63990bbd5d8cff1b82dfc6e3..78a51b3eda10c4d2931c3c783221ad0625d9a9c9 100644 (file)
@@ -268,7 +268,7 @@ struct radeon_clock {
        uint32_t current_dispclk;
        uint32_t dp_extclk;
        uint32_t max_pixel_clock;
-       uint32_t gpupll_outputfreq;
+       uint32_t vco_freq;
 };
 
 /*
index 08fc1b5effa8a5ec21d4f6234bc168c5b2508b36..de9a2ffcf5f762539d6d4c92464ed5eff19e1c42 100644 (file)
@@ -1106,6 +1106,31 @@ union firmware_info {
        ATOM_FIRMWARE_INFO_V2_2 info_22;
 };
 
+union igp_info {
+       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+       struct radeon_mode_info *mode_info = &rdev->mode_info;
+       int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+       union igp_info *igp_info;
+       u8 frev, crev;
+       u16 data_offset;
+
+       if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+                       &frev, &crev, &data_offset)) {
+               igp_info = (union igp_info *)(mode_info->atom_context->bios +
+                       data_offset);
+               rdev->clock.vco_freq =
+                       le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+       }
+}
+
 bool radeon_atom_get_clock_info(struct drm_device *dev)
 {
        struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
                rdev->mode_info.firmware_flags =
                        le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
 
-               if (ASIC_IS_DCE8(rdev)) {
-                       rdev->clock.gpupll_outputfreq =
+               if (ASIC_IS_DCE8(rdev))
+                       rdev->clock.vco_freq =
                                le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
-                       if (rdev->clock.gpupll_outputfreq == 0)
-                               rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */
-               }
+               else if (ASIC_IS_DCE5(rdev))
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+               else if (ASIC_IS_DCE41(rdev))
+                       radeon_atombios_get_dentist_vco_freq(rdev);
+               else
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+               if (rdev->clock.vco_freq == 0)
+                       rdev->clock.vco_freq = 360000;  /* 3.6 GHz */
 
                return true;
        }
@@ -1270,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
        return false;
 }
 
-union igp_info {
-       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
 {
        struct radeon_mode_info *mode_info = &rdev->mode_info;
index 2c02e99b5f95a65a26357f26d67e91a45f8224a4..b214663b370da00905faa882ea27513153aa91bd 100644 (file)
@@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
        struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
        struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
        struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-       struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-       struct radeon_connector_atom_dig *dig_connector =
-               radeon_connector->con_priv;
 
        if (!dig || !dig->afmt)
                return;
@@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
                radeon_audio_write_speaker_allocation(encoder);
                radeon_audio_write_sad_regs(encoder);
                radeon_audio_write_latency_fields(encoder, mode);
-               if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
-                       radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
-               else
-                       radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+               radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
                radeon_audio_set_audio_packet(encoder);
                radeon_audio_select_pin(encoder);
 
@@ -781,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode)
        if (radeon_encoder->audio && radeon_encoder->audio->dpms)
                radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
 }
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+       if (div >= 8 && div < 64)
+               return (div - 8) * 25 + 200;
+       else if (div >= 64 && div < 96)
+               return (div - 64) * 50 + 1600;
+       else if (div >= 96 && div < 128)
+               return (div - 96) * 100 + 3200;
+       else
+               return 0;
+}
index 059cc3012062a7b50708620c557771dcfea821c0..5c70cceaa4a6c35e673ff9c50306e36e7f93ff3d 100644 (file)
@@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev);
 void radeon_audio_mode_set(struct drm_encoder *encoder,
        struct drm_display_mode *mode);
 void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
 
 #endif
index b3bb92368ae0e98372e55caab123fb720ecdc48d..2b9ba03a7c1a84bc00564fe0f6391771977c2e7b 100644 (file)
@@ -403,7 +403,8 @@ static void radeon_flip_work_func(struct work_struct *__work)
        struct drm_crtc *crtc = &radeon_crtc->base;
        unsigned long flags;
        int r;
-       int vpos, hpos, stat, min_udelay;
+       int vpos, hpos, stat, min_udelay = 0;
+       unsigned repcnt = 4;
        struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
         down_read(&rdev->exclusive_lock);
@@ -454,7 +455,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
         * In practice this won't execute very often unless on very fast
         * machines because the time window for this to happen is very small.
         */
-       for (;;) {
+       while (radeon_crtc->enabled && repcnt--) {
                /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
                 * start in hpos, and to the "fudged earlier" vblank start in
                 * vpos.
@@ -472,10 +473,22 @@ static void radeon_flip_work_func(struct work_struct *__work)
                /* Sleep at least until estimated real start of hw vblank */
                spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
                min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+               if (min_udelay > vblank->framedur_ns / 2000) {
+                       /* Don't wait ridiculously long - something is wrong */
+                       repcnt = 0;
+                       break;
+               }
                usleep_range(min_udelay, 2 * min_udelay);
                spin_lock_irqsave(&crtc->dev->event_lock, flags);
        };
 
+       if (!repcnt)
+               DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+                                "framedur %d, linedur %d, stat %d, vpos %d, "
+                                "hpos %d\n", work->crtc_id, min_udelay,
+                                vblank->framedur_ns / 1000,
+                                vblank->linedur_ns / 1000, stat, vpos, hpos);
+
        /* do the flip (mmio) */
        radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
 
@@ -1670,8 +1683,10 @@ int radeon_modeset_init(struct radeon_device *rdev)
        /* setup afmt */
        radeon_afmt_init(rdev);
 
-       radeon_fbdev_init(rdev);
-       drm_kms_helper_poll_init(rdev->ddev);
+       if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+               radeon_fbdev_init(rdev);
+               drm_kms_helper_poll_init(rdev->ddev);
+       }
 
        /* do pm late init */
        ret = radeon_pm_late_init(rdev);
index 3dcc5733ff6915b2e2497ca3d4ff800455f49c20..e26c963f2e9304e58f25517114e773b3c85242bd 100644 (file)
@@ -663,6 +663,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
        bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
        if (!bo_va) {
                args->operation = RADEON_VA_RESULT_ERROR;
+               radeon_bo_unreserve(rbo);
                drm_gem_object_unreference_unlocked(gobj);
                return -ENOENT;
        }
index 84d45633d28c9b1c58f58dc6ec6383eba2207230..fb6ad143873f5b40d2c027a20dd25914dbd5e29e 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
+#include <drm/drm_cache.h>
 #include "radeon.h"
 #include "radeon_trace.h"
 
@@ -245,6 +246,12 @@ int radeon_bo_create(struct radeon_device *rdev,
                DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
                              "better performance thanks to write-combining\n");
        bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
+#else
+       /* For architectures that don't support WC memory,
+        * mask out the WC flag from the BO
+        */
+       if (!drm_arch_can_wc_memory())
+               bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
        radeon_ttm_placement_from_domain(bo, domain);
index 460c8f2989daca1ba9af3b7ba59d724c27182c1c..ca3be90a3bb43eb2624732cf7c4556f6b6650583 100644 (file)
@@ -276,8 +276,12 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
        if (rdev->irq.installed) {
                for (i = 0; i < rdev->num_crtc; i++) {
                        if (rdev->pm.active_crtcs & (1 << i)) {
-                               rdev->pm.req_vblank |= (1 << i);
-                               drm_vblank_get(rdev->ddev, i);
+                               /* This can fail if a modeset is in progress */
+                               if (drm_vblank_get(rdev->ddev, i) == 0)
+                                       rdev->pm.req_vblank |= (1 << i);
+                               else
+                                       DRM_DEBUG_DRIVER("crtc %d no vblank, can glitch\n",
+                                                        i);
                        }
                }
        }
@@ -1075,8 +1079,6 @@ force:
 
        /* update display watermarks based on new power state */
        radeon_bandwidth_update(rdev);
-       /* update displays */
-       radeon_dpm_display_configuration_changed(rdev);
 
        rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
        rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
@@ -1097,6 +1099,9 @@ force:
 
        radeon_dpm_post_set_power_state(rdev);
 
+       /* update displays */
+       radeon_dpm_display_configuration_changed(rdev);
+
        if (rdev->asic->dpm.force_performance_level) {
                if (rdev->pm.dpm.thermal_active) {
                        enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level;
index c507896aca45a43a3fe271b79b84e7db61bfb85a..197b157b73d09b96f7ced03c6703b6696c210793 100644 (file)
@@ -349,8 +349,13 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
                        /* see if we can skip over some allocations */
                } while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
 
+               for (i = 0; i < RADEON_NUM_RINGS; ++i)
+                       radeon_fence_ref(fences[i]);
+
                spin_unlock(&sa_manager->wq.lock);
                r = radeon_fence_wait_any(rdev, fences, false);
+               for (i = 0; i < RADEON_NUM_RINGS; ++i)
+                       radeon_fence_unref(&fences[i]);
                spin_lock(&sa_manager->wq.lock);
                /* if we have nothing to wait for block */
                if (r == -ENOENT) {
index e34307459e501f60895ca4a0abe156995706f921..e06ac546a90ff185a31f64b4ce6e9fd76d071adc 100644 (file)
@@ -758,7 +758,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
                                                       0, PAGE_SIZE,
                                                       PCI_DMA_BIDIRECTIONAL);
                if (pci_dma_mapping_error(rdev->pdev, gtt->ttm.dma_address[i])) {
-                       while (--i) {
+                       while (i--) {
                                pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i],
                                               PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
                                gtt->ttm.dma_address[i] = 0;
index 07a0d378e122b1b206b8aceb1a97234254d00e74..a01efe39a8206ec1b1d1fd26a3192c5d72abb885 100644 (file)
@@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
                return -EINVAL;
        }
 
-       for (i = 0; i < sign->num; ++i) {
-               if (sign->val[i].chip_id == chip_id)
+       for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+               if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
                        break;
        }
 
-       if (i == sign->num)
+       if (i == le32_to_cpu(sign->num))
                return -EINVAL;
 
        data += (256 - 64) / 4;
@@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
        data[1] = sign->val[i].nonce[1];
        data[2] = sign->val[i].nonce[2];
        data[3] = sign->val[i].nonce[3];
-       data[4] = sign->len + 64;
+       data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
        memset(&data[5], 0, 44);
        memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-       data += data[4] / 4;
+       data += le32_to_cpu(data[4]) / 4;
        data[0] = sign->val[i].sigval[0];
        data[1] = sign->val[i].sigval[1];
        data[2] = sign->val[i].sigval[2];
        data[3] = sign->val[i].sigval[3];
 
-       rdev->vce.keyselect = sign->val[i].keyselect;
+       rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
 
        return 0;
 }
index d1dc0f7b01dbdf48ad97e2829278b7f61bb6efea..f6a809afceec2e35237c855df8d2803a09674147 100644 (file)
@@ -2,11 +2,11 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
-               rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+               rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
-                               rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
index 7bfe243c61737bd112cf591366611ce545e86598..f8f8f29fb7c336d8fffe1a74bddbad66e01854f6 100644 (file)
@@ -461,10 +461,11 @@ static int dw_mipi_dsi_phy_init(struct dw_mipi_dsi *dsi)
 
 static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
 {
-       unsigned int bpp, i, pre;
+       unsigned int i, pre;
        unsigned long mpclk, pllref, tmp;
        unsigned int m = 1, n = 1, target_mbps = 1000;
        unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+       int bpp;
 
        bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
        if (bpp < 0) {
index 8397d1b62ef9460910bb28c96e0675a53f5d1b50..a0d51ccb6ea449802a0ff4fac1bcd8b4cf510d3a 100644 (file)
@@ -55,14 +55,12 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
 
        return arm_iommu_attach_device(dev, mapping);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
                                    struct device *dev)
 {
        arm_iommu_detach_device(dev);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
                                 const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
 
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 {
@@ -89,7 +86,6 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 
        priv->crtc_funcs[pipe] = NULL;
 }
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
 
 static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
                                                int pipe)
index f7844883cb76cb8cb00c434aaeb9d85d63b0160a..3b8f652698f828574c19f624264e816ab1299ee9 100644 (file)
@@ -39,7 +39,6 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
 
        return rk_fb->obj[plane];
 }
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
 
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
@@ -177,8 +176,23 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc)
                crtc_funcs->wait_for_update(crtc);
 }
 
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ *                             | <-- HW vsync irq and reg take effect
+ *            plane_commit --> |
+ *     get_vblank and wait --> |
+ *                             | <-- handle_vblank, vblank->count + 1
+ *              cleanup_fb --> |
+ *             iommu crash --> |
+ *                             | <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
 static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
        struct drm_crtc_state *old_crtc_state;
        struct drm_crtc *crtc;
@@ -194,6 +208,10 @@ rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
                if (!crtc->state->active)
                        continue;
 
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
+                       continue;
+
                ret = drm_crtc_vblank_get(crtc);
                if (ret != 0)
                        continue;
@@ -241,7 +259,7 @@ rockchip_atomic_commit_complete(struct rockchip_atomic_commit *commit)
 
        drm_atomic_helper_commit_planes(dev, state, true);
 
-       rockchip_atomic_wait_for_complete(state);
+       rockchip_atomic_wait_for_complete(dev, state);
 
        drm_atomic_helper_cleanup_planes(dev, state);
 
index 50432e9b5b37f38fcb3c3a10cf28573bf2d59163..73718c5f5bbf720ca6151fc44ca670fc8646c6a9 100644 (file)
 #ifndef _ROCKCHIP_DRM_FBDEV_H
 #define _ROCKCHIP_DRM_FBDEV_H
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 int rockchip_drm_fbdev_init(struct drm_device *dev);
 void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+       return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
 
 #endif /* _ROCKCHIP_DRM_FBDEV_H */
index d908321b94ced4ffe10936f4c7ef37fc518d127a..18e07338c6e5f31f45b9a38c17884164fc476cff 100644 (file)
@@ -234,13 +234,8 @@ int rockchip_gem_dumb_create(struct drm_file *file_priv,
        /*
         * align to 64 bytes since Mali requires it.
         */
-       min_pitch = ALIGN(min_pitch, 64);
-
-       if (args->pitch < min_pitch)
-               args->pitch = min_pitch;
-
-       if (args->size < args->pitch * args->height)
-               args->size = args->pitch * args->height;
+       args->pitch = ALIGN(min_pitch, 64);
+       args->size = args->pitch * args->height;
 
        rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
                                                 &args->handle);
index 46c2a8dfd8aa78b22caf02d9b1e217b8d011dd25..fd370548d7d75dc1990226110cf24ea0353a002c 100644 (file)
@@ -43,8 +43,8 @@
 
 #define REG_SET(x, base, reg, v, mode) \
                __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
-               __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+               __REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
 
 #define VOP_WIN_SET(x, win, name, v) \
                REG_SET(x, win->base, win->phy->name, v, RELAXED)
 #define VOP_INTR_GET(vop, name) \
                vop_read_reg(vop, 0, &vop->data->ctrl->name)
 
-#define VOP_INTR_SET(vop, name, v) \
-               REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+               REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
        do { \
-               int i, reg = 0; \
+               int i, reg = 0, mask = 0; \
                for (i = 0; i < vop->data->intr->nintrs; i++) { \
-                       if (vop->data->intr->intrs[i] & type) \
+                       if (vop->data->intr->intrs[i] & type) \
                                reg |= (v) << i; \
+                               mask |= 1 << i; \
+                       } \
                } \
-               VOP_INTR_SET(vop, name, reg); \
+               VOP_INTR_SET(vop, name, mask, reg); \
        } while (0)
 #define VOP_INTR_GET_TYPE(vop, name, type) \
                vop_get_intr_type(vop, &vop->data->intr->name, type)
index 18dfe3ec9a6274a57e8b311d5c62c82cb50c124f..22278bcfc60eac4ed40fac0e31dda840aa4b703e 100644 (file)
@@ -215,7 +215,7 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
        struct drm_gem_cma_object *cma_obj;
 
        if (size == 0)
-               return NULL;
+               return ERR_PTR(-EINVAL);
 
        /* First, try to get a vc4_bo from the kernel BO cache. */
        if (from_cache) {
@@ -237,7 +237,7 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
                if (IS_ERR(cma_obj)) {
                        DRM_ERROR("Failed to allocate from CMA:\n");
                        vc4_bo_stats_dump(vc4);
-                       return NULL;
+                       return ERR_PTR(-ENOMEM);
                }
        }
 
@@ -259,8 +259,8 @@ int vc4_dumb_create(struct drm_file *file_priv,
                args->size = args->pitch * args->height;
 
        bo = vc4_bo_create(dev, args->size, false);
-       if (!bo)
-               return -ENOMEM;
+       if (IS_ERR(bo))
+               return PTR_ERR(bo);
 
        ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
        drm_gem_object_unreference_unlocked(&bo->base.base);
@@ -443,8 +443,8 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
         * get zeroed, and that might leak data between users.
         */
        bo = vc4_bo_create(dev, args->size, false);
-       if (!bo)
-               return -ENOMEM;
+       if (IS_ERR(bo))
+               return PTR_ERR(bo);
 
        ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
        drm_gem_object_unreference_unlocked(&bo->base.base);
@@ -496,8 +496,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
        }
 
        bo = vc4_bo_create(dev, args->size, true);
-       if (!bo)
-               return -ENOMEM;
+       if (IS_ERR(bo))
+               return PTR_ERR(bo);
 
        ret = copy_from_user(bo->base.vaddr,
                             (void __user *)(uintptr_t)args->data,
index 080865ec2bae67c7ff7b04a97fdb5956096cc156..51a63330d4f8bd3bef1946224aafba621eaa26a5 100644 (file)
@@ -91,8 +91,12 @@ struct vc4_dev {
        struct vc4_bo *overflow_mem;
        struct work_struct overflow_mem_work;
 
+       int power_refcount;
+
+       /* Mutex controlling the power refcount. */
+       struct mutex power_lock;
+
        struct {
-               uint32_t last_ct0ca, last_ct1ca;
                struct timer_list timer;
                struct work_struct reset_work;
        } hangcheck;
@@ -142,6 +146,7 @@ struct vc4_seqno_cb {
 };
 
 struct vc4_v3d {
+       struct vc4_dev *vc4;
        struct platform_device *pdev;
        void __iomem *regs;
 };
@@ -192,6 +197,11 @@ struct vc4_exec_info {
        /* Sequence number for this bin/render job. */
        uint64_t seqno;
 
+       /* Last current addresses the hardware was processing when the
+        * hangcheck timer checked on us.
+        */
+       uint32_t last_ct0ca, last_ct1ca;
+
        /* Kernel-space copy of the ioctl arguments */
        struct drm_vc4_submit_cl *args;
 
@@ -434,7 +444,6 @@ void vc4_plane_async_set_fb(struct drm_plane *plane,
 extern struct platform_driver vc4_v3d_driver;
 int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
 int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
-int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
 
 /* vc4_validate.c */
 int
index 48ce30a6f4b5cebb21348c1148d85a8c2450e45f..202aa1544acc7df6b669781764f387daa99dd871 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/device.h>
 #include <linux/io.h>
 
@@ -228,8 +229,16 @@ vc4_reset(struct drm_device *dev)
        struct vc4_dev *vc4 = to_vc4_dev(dev);
 
        DRM_INFO("Resetting GPU.\n");
-       vc4_v3d_set_power(vc4, false);
-       vc4_v3d_set_power(vc4, true);
+
+       mutex_lock(&vc4->power_lock);
+       if (vc4->power_refcount) {
+               /* Power the device off and back on the by dropping the
+                * reference on runtime PM.
+                */
+               pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
+               pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+       }
+       mutex_unlock(&vc4->power_lock);
 
        vc4_irq_reset(dev);
 
@@ -257,10 +266,17 @@ vc4_hangcheck_elapsed(unsigned long data)
        struct drm_device *dev = (struct drm_device *)data;
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        uint32_t ct0ca, ct1ca;
+       unsigned long irqflags;
+       struct vc4_exec_info *exec;
+
+       spin_lock_irqsave(&vc4->job_lock, irqflags);
+       exec = vc4_first_job(vc4);
 
        /* If idle, we can stop watching for hangs. */
-       if (list_empty(&vc4->job_list))
+       if (!exec) {
+               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
                return;
+       }
 
        ct0ca = V3D_READ(V3D_CTNCA(0));
        ct1ca = V3D_READ(V3D_CTNCA(1));
@@ -268,14 +284,16 @@ vc4_hangcheck_elapsed(unsigned long data)
        /* If we've made any progress in execution, rearm the timer
         * and wait.
         */
-       if (ct0ca != vc4->hangcheck.last_ct0ca ||
-           ct1ca != vc4->hangcheck.last_ct1ca) {
-               vc4->hangcheck.last_ct0ca = ct0ca;
-               vc4->hangcheck.last_ct1ca = ct1ca;
+       if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) {
+               exec->last_ct0ca = ct0ca;
+               exec->last_ct1ca = ct1ca;
+               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
                vc4_queue_hangcheck(dev);
                return;
        }
 
+       spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
        /* We've gone too long with no progress, reset.  This has to
         * be done from a work struct, since resetting can sleep and
         * this timer hook isn't allowed to.
@@ -340,12 +358,7 @@ vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
        finish_wait(&vc4->job_wait_queue, &wait);
        trace_vc4_wait_for_seqno_end(dev, seqno);
 
-       if (ret && ret != -ERESTARTSYS) {
-               DRM_ERROR("timeout waiting for render thread idle\n");
-               return ret;
-       }
-
-       return 0;
+       return ret;
 }
 
 static void
@@ -578,9 +591,9 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
        }
 
        bo = vc4_bo_create(dev, exec_size, true);
-       if (!bo) {
+       if (IS_ERR(bo)) {
                DRM_ERROR("Couldn't allocate BO for binning\n");
-               ret = -ENOMEM;
+               ret = PTR_ERR(bo);
                goto fail;
        }
        exec->exec_bo = &bo->base;
@@ -617,6 +630,7 @@ fail:
 static void
 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 {
+       struct vc4_dev *vc4 = to_vc4_dev(dev);
        unsigned i;
 
        /* Need the struct lock for drm_gem_object_unreference(). */
@@ -635,6 +649,11 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
        }
        mutex_unlock(&dev->struct_mutex);
 
+       mutex_lock(&vc4->power_lock);
+       if (--vc4->power_refcount == 0)
+               pm_runtime_put(&vc4->v3d->pdev->dev);
+       mutex_unlock(&vc4->power_lock);
+
        kfree(exec);
 }
 
@@ -746,6 +765,9 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
        struct drm_gem_object *gem_obj;
        struct vc4_bo *bo;
 
+       if (args->pad != 0)
+               return -EINVAL;
+
        gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
        if (!gem_obj) {
                DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
@@ -772,7 +794,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        struct drm_vc4_submit_cl *args = data;
        struct vc4_exec_info *exec;
-       int ret;
+       int ret = 0;
 
        if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
                DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
@@ -785,6 +807,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
                return -ENOMEM;
        }
 
+       mutex_lock(&vc4->power_lock);
+       if (vc4->power_refcount++ == 0)
+               ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+       mutex_unlock(&vc4->power_lock);
+       if (ret < 0) {
+               kfree(exec);
+               return ret;
+       }
+
        exec->args = args;
        INIT_LIST_HEAD(&exec->unref_list);
 
@@ -839,6 +870,8 @@ vc4_gem_init(struct drm_device *dev)
                    (unsigned long)dev);
 
        INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+
+       mutex_init(&vc4->power_lock);
 }
 
 void
index b68060e758dbd26e1dccd17beeba4c09bbb1441e..78a21357fb2de9fe88d50a26f20f0634d97dbd88 100644 (file)
@@ -57,7 +57,7 @@ vc4_overflow_mem_work(struct work_struct *work)
        struct vc4_bo *bo;
 
        bo = vc4_bo_create(dev, 256 * 1024, true);
-       if (!bo) {
+       if (IS_ERR(bo)) {
                DRM_ERROR("Couldn't allocate binner overflow mem\n");
                return;
        }
index 8a2a312e2c1bb618ea414ed3f0dd87e82ed7e09a..0f12418725e552f7378dada1d5af1244f52599ad 100644 (file)
@@ -316,20 +316,11 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
        size += xtiles * ytiles * loop_body_size;
 
        setup->rcl = &vc4_bo_create(dev, size, true)->base;
-       if (!setup->rcl)
-               return -ENOMEM;
+       if (IS_ERR(setup->rcl))
+               return PTR_ERR(setup->rcl);
        list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
                      &exec->unref_list);
 
-       rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
-       rcl_u32(setup,
-               (setup->color_write ? (setup->color_write->paddr +
-                                      args->color_write.offset) :
-                0));
-       rcl_u16(setup, args->width);
-       rcl_u16(setup, args->height);
-       rcl_u16(setup, args->color_write.bits);
-
        /* The tile buffer gets cleared when the previous tile is stored.  If
         * the clear values changed between frames, then the tile buffer has
         * stale clear values in it, so we have to do a store in None mode (no
@@ -349,6 +340,15 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
                rcl_u32(setup, 0); /* no address, since we're in None mode */
        }
 
+       rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+       rcl_u32(setup,
+               (setup->color_write ? (setup->color_write->paddr +
+                                      args->color_write.offset) :
+                0));
+       rcl_u16(setup, args->width);
+       rcl_u16(setup, args->height);
+       rcl_u16(setup, args->color_write.bits);
+
        for (y = min_y_tile; y <= max_y_tile; y++) {
                for (x = min_x_tile; x <= max_x_tile; x++) {
                        bool first = (x == min_x_tile && y == min_y_tile);
index 424d515ffcdafae650c9d472a02600db7c0a8485..31de5d17bc856a0c152f9d9b4ebaae4c48880ce9 100644 (file)
@@ -17,6 +17,7 @@
  */
 
 #include "linux/component.h"
+#include "linux/pm_runtime.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -144,21 +145,6 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
 }
 #endif /* CONFIG_DEBUG_FS */
 
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
-int
-vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
-{
-       if (on)
-               return pm_generic_poweroff(&vc4->v3d->pdev->dev);
-       else
-               return pm_generic_resume(&vc4->v3d->pdev->dev);
-}
-
 static void vc4_v3d_init_hw(struct drm_device *dev)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -170,6 +156,29 @@ static void vc4_v3d_init_hw(struct drm_device *dev)
        V3D_WRITE(V3D_VPMBASE, 0);
 }
 
+#ifdef CONFIG_PM
+static int vc4_v3d_runtime_suspend(struct device *dev)
+{
+       struct vc4_v3d *v3d = dev_get_drvdata(dev);
+       struct vc4_dev *vc4 = v3d->vc4;
+
+       vc4_irq_uninstall(vc4->dev);
+
+       return 0;
+}
+
+static int vc4_v3d_runtime_resume(struct device *dev)
+{
+       struct vc4_v3d *v3d = dev_get_drvdata(dev);
+       struct vc4_dev *vc4 = v3d->vc4;
+
+       vc4_v3d_init_hw(vc4->dev);
+       vc4_irq_postinstall(vc4->dev);
+
+       return 0;
+}
+#endif
+
 static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 {
        struct platform_device *pdev = to_platform_device(dev);
@@ -182,6 +191,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
        if (!v3d)
                return -ENOMEM;
 
+       dev_set_drvdata(dev, v3d);
+
        v3d->pdev = pdev;
 
        v3d->regs = vc4_ioremap_regs(pdev, 0);
@@ -189,6 +200,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
                return PTR_ERR(v3d->regs);
 
        vc4->v3d = v3d;
+       v3d->vc4 = vc4;
 
        if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
                DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
@@ -210,6 +222,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
                return ret;
        }
 
+       pm_runtime_enable(dev);
+
        return 0;
 }
 
@@ -219,6 +233,8 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master,
        struct drm_device *drm = dev_get_drvdata(master);
        struct vc4_dev *vc4 = to_vc4_dev(drm);
 
+       pm_runtime_disable(dev);
+
        drm_irq_uninstall(drm);
 
        /* Disable the binner's overflow memory address, so the next
@@ -231,6 +247,10 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master,
        vc4->v3d = NULL;
 }
 
+static const struct dev_pm_ops vc4_v3d_pm_ops = {
+       SET_RUNTIME_PM_OPS(vc4_v3d_runtime_suspend, vc4_v3d_runtime_resume, NULL)
+};
+
 static const struct component_ops vc4_v3d_ops = {
        .bind   = vc4_v3d_bind,
        .unbind = vc4_v3d_unbind,
@@ -258,5 +278,6 @@ struct platform_driver vc4_v3d_driver = {
        .driver = {
                .name = "vc4_v3d",
                .of_match_table = vc4_v3d_dt_match,
+               .pm = &vc4_v3d_pm_ops,
        },
 };
index e26d9f6face3c498a4ddb5278e7d9f043cd2de2c..24c2c746e8f397aafd0285323508bd2ee351d3a2 100644 (file)
@@ -401,8 +401,8 @@ validate_tile_binning_config(VALIDATE_ARGS)
        tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
                                true);
        exec->tile_bo = &tile_bo->base;
-       if (!exec->tile_bo)
-               return -ENOMEM;
+       if (IS_ERR(exec->tile_bo))
+               return PTR_ERR(exec->tile_bo);
        list_add_tail(&tile_bo->unref_head, &exec->unref_list);
 
        /* tile alloc address. */
index c49812b80dd0dae82c77096f75fbd4cced19ffb1..24fb348a44e141f71574f20d1e57bdaad2759ce6 100644 (file)
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 #include <linux/module.h>
+#include <linux/console.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 static int __init vmwgfx_init(void)
 {
        int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+       if (vgacon_text_force())
+               return -EINVAL;
+#endif
+
        ret = drm_pci_init(&driver, &vmw_pci_driver);
        if (ret)
                DRM_ERROR("Failed initializing DRM.\n");
index f155b83804819ff97455ba5312f62de7f14991e2..2b3105c8aed399f8ce9cd3df03d6a265378131fc 100644 (file)
@@ -126,7 +126,7 @@ static int ads1015_reg_to_mv(struct i2c_client *client, unsigned int channel,
        struct ads1015_data *data = i2c_get_clientdata(client);
        unsigned int pga = data->channel_data[channel].pga;
        int fullscale = fullscale_table[pga];
-       const unsigned mask = data->id == ads1115 ? 0x7fff : 0x7ff0;
+       const int mask = data->id == ads1115 ? 0x7fff : 0x7ff0;
 
        return DIV_ROUND_CLOSEST(reg * fullscale, mask);
 }
index c8487894b31236cefd761b24cac48fb4e17e6d52..c43318d3416e20a4d027a32402b1cf751718b8c6 100644 (file)
@@ -930,6 +930,17 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
 static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+       {
+               /*
+                * CPU fan speed going up and down on Dell Studio XPS 8000
+                * for unknown reasons.
+                */
+               .ident = "Dell Studio XPS 8000",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+               },
+       },
        {
                /*
                 * CPU fan speed going up and down on Dell Studio XPS 8100
index f77eb971ce959a6d3501dfdf740e8f8f566383e5..4f695d8fcafaceab3c5d8caaa677b58e94ae9c2c 100644 (file)
@@ -90,7 +90,15 @@ static ssize_t show_power(struct device *dev,
        pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
                                  REG_TDP_LIMIT3, &val);
 
-       tdp_limit = val >> 16;
+       /*
+        * On Carrizo and later platforms, ApmTdpLimit bit field
+        * is extended to 16:31 from 16:28.
+        */
+       if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+               tdp_limit = val >> 16;
+       else
+               tdp_limit = (val >> 16) & 0x1fff;
+
        curr_pwr_watts = ((u64)(tdp_limit +
                                data->base_tdp)) << running_avg_range;
        curr_pwr_watts -= running_avg_capture;
index 82de3deeb18a7ddf5e041e695b35cc8b1500abea..685568b1236d4a26db2d685ce36dcb9729e9f3ab 100644 (file)
@@ -406,16 +406,11 @@ static int gpio_fan_get_cur_state(struct thermal_cooling_device *cdev,
                                  unsigned long *state)
 {
        struct gpio_fan_data *fan_data = cdev->devdata;
-       int r;
 
        if (!fan_data)
                return -EINVAL;
 
-       r = get_fan_speed_index(fan_data);
-       if (r < 0)
-               return r;
-
-       *state = r;
+       *state = fan_data->speed_index;
        return 0;
 }
 
index 52f708bcf77f397952ea0f278ce5b161780e076a..d50c701b19d678e9998319be36a492bb3a8eba6d 100644 (file)
@@ -313,6 +313,10 @@ int of_hwspin_lock_get_id(struct device_node *np, int index)
                hwlock = radix_tree_deref_slot(slot);
                if (unlikely(!hwlock))
                        continue;
+               if (radix_tree_is_indirect_ptr(hwlock)) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
 
                if (hwlock->bank->dev->of_node == args.np) {
                        ret = 0;
index ba9732c236c52985c922a63d50bec477585af610..10fbd6d841e06aef8ae0a627f9fcf36106f5119b 100644 (file)
@@ -874,7 +874,8 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
        i2c_set_adapdata(adap, dev);
 
        i2c_dw_disable_int(dev);
-       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, IRQF_SHARED,
+       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr,
+                            IRQF_SHARED | IRQF_COND_SUSPEND,
                             dev_name(dev->dev), dev);
        if (r) {
                dev_err(dev->dev, "failure requesting irq %i: %d\n",
index f62d69799a9c55b0f79e2de5e29ec9a09b257c98..27fa0cb09538cebfd0f9388112cfe30abb773edd 100644 (file)
@@ -1271,6 +1271,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
        switch (dev->device) {
        case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
        case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
+       case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
+       case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
        case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
                priv->features |= FEATURE_I2C_BLOCK_READ;
                priv->features |= FEATURE_IRQ;
index 08d26ba61ed3326a8905c99cdb6ee8caaef45727..13c45296ce5bfc5390a1bbffcca9960b3b1b90b6 100644 (file)
@@ -1450,7 +1450,8 @@ omap_i2c_probe(struct platform_device *pdev)
 
 err_unuse_clocks:
        omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
-       pm_runtime_put(omap->dev);
+       pm_runtime_dont_use_autosuspend(omap->dev);
+       pm_runtime_put_sync(omap->dev);
        pm_runtime_disable(&pdev->dev);
 err_free_mem:
 
@@ -1468,6 +1469,7 @@ static int omap_i2c_remove(struct platform_device *pdev)
                return ret;
 
        omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
+       pm_runtime_dont_use_autosuspend(&pdev->dev);
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
        return 0;
index e045985950737c429c3c3253620559f55be1244e..93f2895383ee55bb9f060cf145af92adbbeb512e 100644 (file)
@@ -137,10 +137,11 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 };
 
 /* SB800 globals */
+static DEFINE_MUTEX(piix4_mutex_sb800);
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
-       "SDA0", "SDA2", "SDA3", "SDA4"
+       " port 0", " port 2", " port 3", " port 4"
 };
-static const char *piix4_aux_port_name_sb800 = "SDA1";
+static const char *piix4_aux_port_name_sb800 = " port 1";
 
 struct i2c_piix4_adapdata {
        unsigned short smba;
@@ -148,7 +149,6 @@ struct i2c_piix4_adapdata {
        /* SB800 */
        bool sb800_main;
        unsigned short port;
-       struct mutex *mutex;
 };
 
 static int piix4_setup(struct pci_dev *PIIX4_dev,
@@ -275,10 +275,12 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
        else
                smb_en = (aux) ? 0x28 : 0x2c;
 
+       mutex_lock(&piix4_mutex_sb800);
        outb_p(smb_en, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
        outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
        smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
+       mutex_unlock(&piix4_mutex_sb800);
 
        if (!smb_en) {
                smb_en_status = smba_en_lo & 0x10;
@@ -559,7 +561,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
        u8 port;
        int retval;
 
-       mutex_lock(adapdata->mutex);
+       mutex_lock(&piix4_mutex_sb800);
 
        outb_p(SB800_PIIX4_PORT_IDX, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
@@ -574,7 +576,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
        outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
 
-       mutex_unlock(adapdata->mutex);
+       mutex_unlock(&piix4_mutex_sb800);
 
        return retval;
 }
@@ -625,6 +627,7 @@ static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
+                            bool sb800_main, unsigned short port,
                             const char *name, struct i2c_adapter **padap)
 {
        struct i2c_adapter *adap;
@@ -639,7 +642,8 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
        adap->owner = THIS_MODULE;
        adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
-       adap->algo = &smbus_algorithm;
+       adap->algo = sb800_main ? &piix4_smbus_algorithm_sb800
+                               : &smbus_algorithm;
 
        adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
        if (adapdata == NULL) {
@@ -649,12 +653,14 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
        }
 
        adapdata->smba = smba;
+       adapdata->sb800_main = sb800_main;
+       adapdata->port = port;
 
        /* set up the sysfs linkage to our parent device */
        adap->dev.parent = &dev->dev;
 
        snprintf(adap->name, sizeof(adap->name),
-               "SMBus PIIX4 adapter %s at %04x", name, smba);
+               "SMBus PIIX4 adapter%s at %04x", name, smba);
 
        i2c_set_adapdata(adap, adapdata);
 
@@ -673,30 +679,16 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
 static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 {
-       struct mutex *mutex;
        struct i2c_piix4_adapdata *adapdata;
        int port;
        int retval;
 
-       mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
-       if (mutex == NULL)
-               return -ENOMEM;
-
-       mutex_init(mutex);
-
        for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
-               retval = piix4_add_adapter(dev, smba,
+               retval = piix4_add_adapter(dev, smba, true, port,
                                           piix4_main_port_names_sb800[port],
                                           &piix4_main_adapters[port]);
                if (retval < 0)
                        goto error;
-
-               piix4_main_adapters[port]->algo = &piix4_smbus_algorithm_sb800;
-
-               adapdata = i2c_get_adapdata(piix4_main_adapters[port]);
-               adapdata->sb800_main = true;
-               adapdata->port = port;
-               adapdata->mutex = mutex;
        }
 
        return retval;
@@ -714,19 +706,20 @@ error:
                }
        }
 
-       kfree(mutex);
-
        return retval;
 }
 
 static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
        int retval;
+       bool is_sb800 = false;
 
        if ((dev->vendor == PCI_VENDOR_ID_ATI &&
             dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
             dev->revision >= 0x40) ||
            dev->vendor == PCI_VENDOR_ID_AMD) {
+               is_sb800 = true;
+
                if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) {
                        dev_err(&dev->dev,
                        "SMBus base address index region 0x%x already in use!\n",
@@ -756,7 +749,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        return retval;
 
                /* Try to register main SMBus adapter, give up if we can't */
-               retval = piix4_add_adapter(dev, retval, "main",
+               retval = piix4_add_adapter(dev, retval, false, 0, "",
                                           &piix4_main_adapters[0]);
                if (retval < 0)
                        return retval;
@@ -783,7 +776,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (retval > 0) {
                /* Try to add the aux adapter if it exists,
                 * piix4_add_adapter will clean up if this fails */
-               piix4_add_adapter(dev, retval, piix4_aux_port_name_sb800,
+               piix4_add_adapter(dev, retval, false, 0,
+                                 is_sb800 ? piix4_aux_port_name_sb800 : "",
                                  &piix4_aux_adapter);
        }
 
@@ -798,10 +792,8 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
                i2c_del_adapter(adap);
                if (adapdata->port == 0) {
                        release_region(adapdata->smba, SMBIOSIZE);
-                       if (adapdata->sb800_main) {
-                               kfree(adapdata->mutex);
+                       if (adapdata->sb800_main)
                                release_region(SB800_PIIX4_SMB_IDX, 2);
-                       }
                }
                kfree(adapdata);
                kfree(adap);
index f3e5ff8522f0af6ef77af252f3269114fe4b803e..213ba55e17c3a35bfe9faacc9155a3f8072b2899 100644 (file)
@@ -467,7 +467,7 @@ static int uniphier_fi2c_clk_init(struct device *dev,
                bus_speed = UNIPHIER_FI2C_DEFAULT_SPEED;
 
        if (!bus_speed) {
-               dev_err(dev, "clock-freqyency should not be zero\n");
+               dev_err(dev, "clock-frequency should not be zero\n");
                return -EINVAL;
        }
 
index 1f4f3f53819c7a88516dad5eab11d773fceb89f9..89eaa8a7e1e01578a4ee27f322a8cbfe75abf5e8 100644 (file)
@@ -328,7 +328,7 @@ static int uniphier_i2c_clk_init(struct device *dev,
                bus_speed = UNIPHIER_I2C_DEFAULT_SPEED;
 
        if (!bus_speed) {
-               dev_err(dev, "clock-freqyency should not be zero\n");
+               dev_err(dev, "clock-frequency should not be zero\n");
                return -EINVAL;
        }
 
index edc29b173f6c9012635771116a6cca23193a096e..833ea9dd4464b664e9d11103c8543961a65f5f01 100644 (file)
@@ -213,6 +213,7 @@ config STK8312
 config STK8BA50
        tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver"
        depends on I2C
+       depends on IIO_TRIGGER
        help
          Say yes here to get support for the Sensortek STK8BA50 3-axis
          accelerometer.
index 605ff42c46310201e7aba3fd2f7ab42a0a873e04..283ded7747a9379be4c105ef49964d5c5d674b0f 100644 (file)
@@ -175,6 +175,7 @@ config DA9150_GPADC
 config EXYNOS_ADC
        tristate "Exynos ADC driver support"
        depends on ARCH_EXYNOS || ARCH_S3C24XX || ARCH_S3C64XX || (OF && COMPILE_TEST)
+       depends on HAS_IOMEM
        help
          Core support for the ADC block found in the Samsung EXYNOS series
          of SoCs for drivers such as the touchscreen and hwmon to use to share
@@ -207,6 +208,7 @@ config INA2XX_ADC
 config IMX7D_ADC
        tristate "IMX7D ADC driver"
        depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Say yes here to build support for IMX7D ADC.
 
@@ -409,6 +411,7 @@ config TWL6030_GPADC
 config VF610_ADC
        tristate "Freescale vf610 ADC driver"
        depends on OF
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        help
index 942320e32753c3838e967e9e0a96d4db2d691fce..c1e05532d437f263a9aa3d7f7c96147b13bfe682 100644 (file)
@@ -289,7 +289,7 @@ static int tiadc_iio_buffered_hardware_setup(struct iio_dev *indio_dev,
                goto error_kfifo_free;
 
        indio_dev->setup_ops = setup_ops;
-       indio_dev->modes |= INDIO_BUFFER_HARDWARE;
+       indio_dev->modes |= INDIO_BUFFER_SOFTWARE;
 
        return 0;
 
index 43d14588448d65f507ab02ce22c0e01d7a9008ad..b4dde8315210c6519c7e2a7db4bd8bf3b6607988 100644 (file)
@@ -300,6 +300,7 @@ static int mcp4725_probe(struct i2c_client *client,
        data->client = client;
 
        indio_dev->dev.parent = &client->dev;
+       indio_dev->name = id->name;
        indio_dev->info = &mcp4725_info;
        indio_dev->channels = &mcp4725_channel;
        indio_dev->num_channels = 1;
index 1165b1c4f9d67bd93db24f784da5b13010d9fd99..cfc5a051ab9f3946bfdfd4aaf5aaac01f7245f36 100644 (file)
@@ -117,7 +117,7 @@ static int dht11_decode(struct dht11 *dht11, int offset, int timeres)
        if (((hum_int + hum_dec + temp_int + temp_dec) & 0xff) != checksum)
                return -EIO;
 
-       dht11->timestamp = ktime_get_real_ns();
+       dht11->timestamp = ktime_get_boot_ns();
        if (hum_int < 20) {  /* DHT22 */
                dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) *
                                        ((temp_int & 0x80) ? -100 : 100);
@@ -145,7 +145,7 @@ static irqreturn_t dht11_handle_irq(int irq, void *data)
 
        /* TODO: Consider making the handler safe for IRQ sharing */
        if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
-               dht11->edges[dht11->num_edges].ts = ktime_get_real_ns();
+               dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns();
                dht11->edges[dht11->num_edges++].value =
                                                gpio_get_value(dht11->gpio);
 
@@ -164,7 +164,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
        int ret, timeres;
 
        mutex_lock(&dht11->lock);
-       if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_real_ns()) {
+       if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) {
                timeres = ktime_get_resolution_ns();
                if (DHT11_DATA_BIT_HIGH < 2 * timeres) {
                        dev_err(dht11->dev, "timeresolution %dns too low\n",
@@ -279,7 +279,7 @@ static int dht11_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       dht11->timestamp = ktime_get_real_ns() - DHT11_DATA_VALID_TIME - 1;
+       dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1;
        dht11->num_edges = -1;
 
        platform_set_drvdata(pdev, iio);
index cb32b593f1c55c023046d97cfc41263ea3818d89..36607d52fee065f9e2aa82f14215897e2c5b44b1 100644 (file)
@@ -43,7 +43,7 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
                return -ENOMEM;
 
        rx = adis->buffer;
-       tx = rx + indio_dev->scan_bytes;
+       tx = rx + scan_count;
 
        spi_message_init(&adis->msg);
 
index 48fbc0bc7e2a1d499015001bace2c46db2c8b440..8f8d1370ed8b9b3359d3a4f1135767a622df2c21 100644 (file)
@@ -5,9 +5,9 @@
 config INV_MPU6050_IIO
        tristate "Invensense MPU6050 devices"
        depends on I2C && SYSFS
+       depends on I2C_MUX
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
-       select I2C_MUX
        help
          This driver supports the Invensense MPU6050 devices.
          This driver can also support MPU6500 in MPU6050 compatibility mode
index 80fbbfd76faf9e1859eefb1b5dc55b2129f9858b..734a0042de0cb4265ee3145ee48f22a19b8b50af 100644 (file)
@@ -349,6 +349,8 @@ EXPORT_SYMBOL_GPL(iio_channel_get);
 
 void iio_channel_release(struct iio_channel *channel)
 {
+       if (!channel)
+               return;
        iio_device_put(channel->indio_dev);
        kfree(channel);
 }
index 60537ec0c923b98c45160d011f4a17ac932e4609..53201d99a16c8d760f4ec909c8c5fb1cba6bc8be 100644 (file)
@@ -54,7 +54,9 @@ static const struct iio_chan_spec acpi_als_channels[] = {
                        .realbits       = 32,
                        .storagebits    = 32,
                },
-               .info_mask_separate     = BIT(IIO_CHAN_INFO_RAW),
+               /* _RAW is here for backward ABI compatibility */
+               .info_mask_separate     = BIT(IIO_CHAN_INFO_RAW) |
+                                         BIT(IIO_CHAN_INFO_PROCESSED),
        },
 };
 
@@ -152,7 +154,7 @@ static int acpi_als_read_raw(struct iio_dev *indio_dev,
        s32 temp_val;
        int ret;
 
-       if (mask != IIO_CHAN_INFO_RAW)
+       if ((mask != IIO_CHAN_INFO_PROCESSED) && (mask != IIO_CHAN_INFO_RAW))
                return -EINVAL;
 
        /* we support only illumination (_ALI) so far. */
index 809a961b9a7f6d0d0077114e12767e4e267c7606..6bf89d8f374191cee48f258d1c25810b0e5dc410 100644 (file)
@@ -180,7 +180,7 @@ static const struct ltr501_samp_table ltr501_ps_samp_table[] = {
                        {500000, 2000000}
 };
 
-static unsigned int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
+static int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
                                           int len, int val, int val2)
 {
        int i, freq;
index f5ecd6e19f5de725e715e547a5527f5875050561..a0d7deeac62f78dfc416bf597e0b894436c75a8c 100644 (file)
@@ -117,7 +117,7 @@ static int mpl115_read_raw(struct iio_dev *indio_dev,
                *val = ret >> 6;
                return IIO_VAL_INT;
        case IIO_CHAN_INFO_OFFSET:
-               *val = 605;
+               *val = -605;
                *val2 = 750000;
                return IIO_VAL_INT_PLUS_MICRO;
        case IIO_CHAN_INFO_SCALE:
index 93e29fb67fa00f94e9ca0a614addfc5cde7ebb76..db35e04a063767a9b2b6011cf3d0345c9d0929bd 100644 (file)
@@ -87,7 +87,7 @@ static int lidar_i2c_xfer(struct lidar_data *data, u8 reg, u8 *val, int len)
 
        ret = i2c_transfer(client->adapter, msg, 2);
 
-       return (ret == 2) ? 0 : ret;
+       return (ret == 2) ? 0 : -EIO;
 }
 
 static int lidar_smbus_xfer(struct lidar_data *data, u8 reg, u8 *val, int len)
index aa26f3c3416bbbcd04dac1ec5381fcb081105d5e..8a8440c0eed1bf64519c5b7bce7903d010246c1c 100644 (file)
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
        depends on NET
        depends on INET
        depends on m || IPV6 != m
+       select IRQ_POLL
        ---help---
          Core support for InfiniBand (IB).  Make sure to also select
          any protocols you wish to use as well as drivers for your
@@ -54,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
        depends on INFINIBAND
        default y
 
+config INFINIBAND_ADDR_TRANS_CONFIGFS
+       bool
+       depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
+       default y
+       ---help---
+         ConfigFS support for RDMA communication manager (CM).
+         This allows the user to config the default GID type that the CM
+         uses for each device, when initiaing new connections.
+
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
index d43a8994ac5c129d201b0f164442c618192eea9d..f818538a7f4e118b309052c3d43a1aa18d5dbe65 100644 (file)
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) +=    ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=        ib_uverbs.o ib_ucm.o \
                                        $(user_access-y)
 
-ib_core-y :=                   packer.o ud_header.o verbs.o sysfs.o \
+ib_core-y :=                   packer.o ud_header.o verbs.o cq.o sysfs.o \
                                device.o fmr_pool.o cache.o netlink.o \
                                roce_gid_mgmt.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
@@ -24,6 +24,8 @@ iw_cm-y :=                    iwcm.o iwpm_util.o iwpm_msg.o
 
 rdma_cm-y :=                   cma.o
 
+rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
+
 rdma_ucm-y :=                  ucma.o
 
 ib_addr-y :=                   addr.o
index 34b1adad07aacf92e9bbfa9621e084f2ec756f74..337353d86cfadec33064e10117539769fcc0f95d 100644 (file)
@@ -121,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 }
 EXPORT_SYMBOL(rdma_copy_addr);
 
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+int rdma_translate_ip(const struct sockaddr *addr,
+                     struct rdma_dev_addr *dev_addr,
                      u16 *vlan_id)
 {
        struct net_device *dev;
@@ -139,7 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
        switch (addr->sa_family) {
        case AF_INET:
                dev = ip_dev_find(dev_addr->net,
-                       ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+                       ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
 
                if (!dev)
                        return ret;
@@ -154,7 +155,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
                rcu_read_lock();
                for_each_netdev_rcu(dev_addr->net, dev) {
                        if (ipv6_chk_addr(dev_addr->net,
-                                         &((struct sockaddr_in6 *) addr)->sin6_addr,
+                                         &((const struct sockaddr_in6 *)addr)->sin6_addr,
                                          dev, 1)) {
                                ret = rdma_copy_addr(dev_addr, dev, NULL);
                                if (vlan_id)
@@ -198,7 +199,8 @@ static void queue_req(struct addr_req *req)
        mutex_unlock(&lock);
 }
 
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+                       const void *daddr)
 {
        struct neighbour *n;
        int ret;
@@ -222,8 +224,9 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, v
 }
 
 static int addr4_resolve(struct sockaddr_in *src_in,
-                        struct sockaddr_in *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct rtable **prt)
 {
        __be32 src_ip = src_in->sin_addr.s_addr;
        __be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -243,33 +246,29 @@ static int addr4_resolve(struct sockaddr_in *src_in,
        src_in->sin_family = AF_INET;
        src_in->sin_addr.s_addr = fl4.saddr;
 
-       if (rt->dst.dev->flags & IFF_LOOPBACK) {
-               ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-               goto put;
-       }
+       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+        * routable) and we could set the network type accordingly.
+        */
+       if (rt->rt_uses_gateway)
+               addr->network = RDMA_NETWORK_IPV4;
 
-       /* If the device does ARP internally, return 'done' */
-       if (rt->dst.dev->flags & IFF_NOARP) {
-               ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
-               goto put;
-       }
+       addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
 
-       ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
-put:
-       ip_rt_put(rt);
+       *prt = rt;
+       return 0;
 out:
        return ret;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static int addr6_resolve(struct sockaddr_in6 *src_in,
-                        struct sockaddr_in6 *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct dst_entry **pdst)
 {
        struct flowi6 fl6;
        struct dst_entry *dst;
+       struct rt6_info *rt;
        int ret;
 
        memset(&fl6, 0, sizeof fl6);
@@ -281,6 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
        if ((ret = dst->error))
                goto put;
 
+       rt = (struct rt6_info *)dst;
        if (ipv6_addr_any(&fl6.saddr)) {
                ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
                                         &fl6.daddr, 0, &fl6.saddr);
@@ -291,43 +291,111 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
                src_in->sin6_addr = fl6.saddr;
        }
 
-       if (dst->dev->flags & IFF_LOOPBACK) {
-               ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-               goto put;
-       }
+       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+        * routable) and we could set the network type accordingly.
+        */
+       if (rt->rt6i_flags & RTF_GATEWAY)
+               addr->network = RDMA_NETWORK_IPV6;
 
-       /* If the device does ARP internally, return 'done' */
-       if (dst->dev->flags & IFF_NOARP) {
-               ret = rdma_copy_addr(addr, dst->dev, NULL);
-               goto put;
-       }
+       addr->hoplimit = ip6_dst_hoplimit(dst);
 
-       ret = dst_fetch_ha(dst, addr, &fl6.daddr);
+       *pdst = dst;
+       return 0;
 put:
        dst_release(dst);
        return ret;
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,
-                        struct sockaddr_in6 *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct dst_entry **pdst)
 {
        return -EADDRNOTAVAIL;
 }
 #endif
 
+static int addr_resolve_neigh(struct dst_entry *dst,
+                             const struct sockaddr *dst_in,
+                             struct rdma_dev_addr *addr)
+{
+       if (dst->dev->flags & IFF_LOOPBACK) {
+               int ret;
+
+               ret = rdma_translate_ip(dst_in, addr, NULL);
+               if (!ret)
+                       memcpy(addr->dst_dev_addr, addr->src_dev_addr,
+                              MAX_ADDR_LEN);
+
+               return ret;
+       }
+
+       /* If the device doesn't do ARP internally */
+       if (!(dst->dev->flags & IFF_NOARP)) {
+               const struct sockaddr_in *dst_in4 =
+                       (const struct sockaddr_in *)dst_in;
+               const struct sockaddr_in6 *dst_in6 =
+                       (const struct sockaddr_in6 *)dst_in;
+
+               return dst_fetch_ha(dst, addr,
+                                   dst_in->sa_family == AF_INET ?
+                                   (const void *)&dst_in4->sin_addr.s_addr :
+                                   (const void *)&dst_in6->sin6_addr);
+       }
+
+       return rdma_copy_addr(addr, dst->dev, NULL);
+}
+
 static int addr_resolve(struct sockaddr *src_in,
-                       struct sockaddr *dst_in,
-                       struct rdma_dev_addr *addr)
+                       const struct sockaddr *dst_in,
+                       struct rdma_dev_addr *addr,
+                       bool resolve_neigh)
 {
+       struct net_device *ndev;
+       struct dst_entry *dst;
+       int ret;
+
        if (src_in->sa_family == AF_INET) {
-               return addr4_resolve((struct sockaddr_in *) src_in,
-                       (struct sockaddr_in *) dst_in, addr);
-       } else
-               return addr6_resolve((struct sockaddr_in6 *) src_in,
-                       (struct sockaddr_in6 *) dst_in, addr);
+               struct rtable *rt = NULL;
+               const struct sockaddr_in *dst_in4 =
+                       (const struct sockaddr_in *)dst_in;
+
+               ret = addr4_resolve((struct sockaddr_in *)src_in,
+                                   dst_in4, addr, &rt);
+               if (ret)
+                       return ret;
+
+               if (resolve_neigh)
+                       ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+
+               ndev = rt->dst.dev;
+               dev_hold(ndev);
+
+               ip_rt_put(rt);
+       } else {
+               const struct sockaddr_in6 *dst_in6 =
+                       (const struct sockaddr_in6 *)dst_in;
+
+               ret = addr6_resolve((struct sockaddr_in6 *)src_in,
+                                   dst_in6, addr,
+                                   &dst);
+               if (ret)
+                       return ret;
+
+               if (resolve_neigh)
+                       ret = addr_resolve_neigh(dst, dst_in, addr);
+
+               ndev = dst->dev;
+               dev_hold(ndev);
+
+               dst_release(dst);
+       }
+
+       addr->bound_dev_if = ndev->ifindex;
+       addr->net = dev_net(ndev);
+       dev_put(ndev);
+
+       return ret;
 }
 
 static void process_req(struct work_struct *work)
@@ -343,7 +411,8 @@ static void process_req(struct work_struct *work)
                if (req->status == -ENODATA) {
                        src_in = (struct sockaddr *) &req->src_addr;
                        dst_in = (struct sockaddr *) &req->dst_addr;
-                       req->status = addr_resolve(src_in, dst_in, req->addr);
+                       req->status = addr_resolve(src_in, dst_in, req->addr,
+                                                  true);
                        if (req->status && time_after_eq(jiffies, req->timeout))
                                req->status = -ETIMEDOUT;
                        else if (req->status == -ENODATA)
@@ -403,7 +472,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        req->client = client;
        atomic_inc(&client->refcount);
 
-       req->status = addr_resolve(src_in, dst_in, addr);
+       req->status = addr_resolve(src_in, dst_in, addr, true);
        switch (req->status) {
        case 0:
                req->timeout = jiffies;
@@ -425,6 +494,26 @@ err:
 }
 EXPORT_SYMBOL(rdma_resolve_ip);
 
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+                         const struct sockaddr *dst_addr,
+                         struct rdma_dev_addr *addr)
+{
+       struct sockaddr_storage ssrc_addr = {};
+       struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+
+       if (src_addr) {
+               if (src_addr->sa_family != dst_addr->sa_family)
+                       return -EINVAL;
+
+               memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+       } else {
+               src_in->sa_family = dst_addr->sa_family;
+       }
+
+       return addr_resolve(src_in, dst_addr, addr, false);
+}
+EXPORT_SYMBOL(rdma_resolve_ip_route);
+
 void rdma_addr_cancel(struct rdma_dev_addr *addr)
 {
        struct addr_req *req, *temp_req;
@@ -456,8 +545,10 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
        complete(&((struct resolve_cb_context *)context)->comp);
 }
 
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *dmac, u16 *vlan_id, int if_index)
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+                                const union ib_gid *dgid,
+                                u8 *dmac, u16 *vlan_id, int *if_index,
+                                int *hoplimit)
 {
        int ret = 0;
        struct rdma_dev_addr dev_addr;
@@ -475,7 +566,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
        rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
        memset(&dev_addr, 0, sizeof(dev_addr));
-       dev_addr.bound_dev_if = if_index;
+       if (if_index)
+               dev_addr.bound_dev_if = *if_index;
        dev_addr.net = &init_net;
 
        ctx.addr = &dev_addr;
@@ -491,12 +583,16 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
        if (!dev)
                return -ENODEV;
+       if (if_index)
+               *if_index = dev_addr.bound_dev_if;
        if (vlan_id)
                *vlan_id = rdma_vlan_dev_vlan_id(dev);
+       if (hoplimit)
+               *hoplimit = dev_addr.hoplimit;
        dev_put(dev);
        return ret;
 }
-EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 {
index 89bebeada38b9b5f6a09d9dd896c8047fa57d7ad..53343ffbff7a1302b03ce5f6a66116b06f1e60b5 100644 (file)
@@ -64,6 +64,7 @@ enum gid_attr_find_mask {
        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
        GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
+       GID_ATTR_FIND_MASK_GID_TYPE     = 1UL << 3,
 };
 
 enum gid_table_entry_props {
@@ -81,10 +82,6 @@ enum gid_table_write_action {
 };
 
 struct ib_gid_table_entry {
-       /* This lock protects an entry from being
-        * read and written simultaneously.
-        */
-       rwlock_t            lock;
        unsigned long       props;
        union ib_gid        gid;
        struct ib_gid_attr  attr;
@@ -109,28 +106,86 @@ struct ib_gid_table {
         * are locked by this lock.
         **/
        struct mutex         lock;
+       /* This lock protects the table entries from being
+        * read and written simultaneously.
+        */
+       rwlock_t             rwlock;
        struct ib_gid_table_entry *data_vec;
 };
 
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+{
+       if (rdma_cap_roce_gid_table(ib_dev, port)) {
+               struct ib_event event;
+
+               event.device            = ib_dev;
+               event.element.port_num  = port;
+               event.event             = IB_EVENT_GID_CHANGE;
+
+               ib_dispatch_event(&event);
+       }
+}
+
+static const char * const gid_type_str[] = {
+       [IB_GID_TYPE_IB]        = "IB/RoCE v1",
+       [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
+};
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
+{
+       if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
+               return gid_type_str[gid_type];
+
+       return "Invalid GID type";
+}
+EXPORT_SYMBOL(ib_cache_gid_type_str);
+
+int ib_cache_gid_parse_type_str(const char *buf)
+{
+       unsigned int i;
+       size_t len;
+       int err = -EINVAL;
+
+       len = strlen(buf);
+       if (len == 0)
+               return -EINVAL;
+
+       if (buf[len - 1] == '\n')
+               len--;
+
+       for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
+               if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
+                   len == strlen(gid_type_str[i])) {
+                       err = i;
+                       break;
+               }
+
+       return err;
+}
+EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
+
+/* This function expects that rwlock will be write locked in all
+ * scenarios and that lock will be locked in sleep-able (RoCE)
+ * scenarios.
+ */
 static int write_gid(struct ib_device *ib_dev, u8 port,
                     struct ib_gid_table *table, int ix,
                     const union ib_gid *gid,
                     const struct ib_gid_attr *attr,
                     enum gid_table_write_action action,
                     bool  default_gid)
+       __releases(&table->rwlock) __acquires(&table->rwlock)
 {
        int ret = 0;
        struct net_device *old_net_dev;
-       unsigned long flags;
 
        /* in rdma_cap_roce_gid_table, this funciton should be protected by a
         * sleep-able lock.
         */
-       write_lock_irqsave(&table->data_vec[ix].lock, flags);
 
        if (rdma_cap_roce_gid_table(ib_dev, port)) {
                table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
-               write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
+               write_unlock_irq(&table->rwlock);
                /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
                 * RoCE providers and thus only updates the cache.
                 */
@@ -140,7 +195,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
                else if (action == GID_TABLE_WRITE_ACTION_DEL)
                        ret = ib_dev->del_gid(ib_dev, port, ix,
                                              &table->data_vec[ix].context);
-               write_lock_irqsave(&table->data_vec[ix].lock, flags);
+               write_lock_irq(&table->rwlock);
        }
 
        old_net_dev = table->data_vec[ix].attr.ndev;
@@ -162,17 +217,6 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 
        table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
 
-       write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
-
-       if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
-               struct ib_event event;
-
-               event.device            = ib_dev;
-               event.element.port_num  = port;
-               event.event             = IB_EVENT_GID_CHANGE;
-
-               ib_dispatch_event(&event);
-       }
        return ret;
 }
 
@@ -201,41 +245,58 @@ static int del_gid(struct ib_device *ib_dev, u8 port,
                         GID_TABLE_WRITE_ACTION_DEL, default_gid);
 }
 
+/* rwlock should be read locked */
 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
                    const struct ib_gid_attr *val, bool default_gid,
-                   unsigned long mask)
+                   unsigned long mask, int *pempty)
 {
-       int i;
+       int i = 0;
+       int found = -1;
+       int empty = pempty ? -1 : 0;
 
-       for (i = 0; i < table->sz; i++) {
-               unsigned long flags;
-               struct ib_gid_attr *attr = &table->data_vec[i].attr;
+       while (i < table->sz && (found < 0 || empty < 0)) {
+               struct ib_gid_table_entry *data = &table->data_vec[i];
+               struct ib_gid_attr *attr = &data->attr;
+               int curr_index = i;
 
-               read_lock_irqsave(&table->data_vec[i].lock, flags);
+               i++;
 
-               if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
-                       goto next;
+               if (data->props & GID_TABLE_ENTRY_INVALID)
+                       continue;
+
+               if (empty < 0)
+                       if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
+                           !memcmp(attr, &zattr, sizeof(*attr)) &&
+                           !data->props)
+                               empty = curr_index;
+
+               if (found >= 0)
+                       continue;
+
+               if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
+                   attr->gid_type != val->gid_type)
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_GID &&
-                   memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
-                       goto next;
+                   memcmp(gid, &data->gid, sizeof(*gid)))
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_NETDEV &&
                    attr->ndev != val->ndev)
-                       goto next;
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
-                   !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
+                   !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
                    default_gid)
-                       goto next;
+                       continue;
 
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-               return i;
-next:
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+               found = curr_index;
        }
 
-       return -1;
+       if (pempty)
+               *pempty = empty;
+
+       return found;
 }
 
 static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
@@ -252,6 +313,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
        int ix;
        int ret = 0;
        struct net_device *idev;
+       int empty;
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
@@ -275,22 +337,25 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
        }
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
-                     GID_ATTR_FIND_MASK_NETDEV);
+                     GID_ATTR_FIND_MASK_GID_TYPE |
+                     GID_ATTR_FIND_MASK_NETDEV, &empty);
        if (ix >= 0)
                goto out_unlock;
 
-       ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
-                     GID_ATTR_FIND_MASK_DEFAULT);
-       if (ix < 0) {
+       if (empty < 0) {
                ret = -ENOSPC;
                goto out_unlock;
        }
 
-       add_gid(ib_dev, port, table, ix, gid, attr, false);
+       ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
+       if (!ret)
+               dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
        return ret;
 }
@@ -305,17 +370,22 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
        table = ports_table[port - rdma_start_port(ib_dev)];
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        ix = find_gid(table, gid, attr, false,
                      GID_ATTR_FIND_MASK_GID      |
+                     GID_ATTR_FIND_MASK_GID_TYPE |
                      GID_ATTR_FIND_MASK_NETDEV   |
-                     GID_ATTR_FIND_MASK_DEFAULT);
+                     GID_ATTR_FIND_MASK_DEFAULT,
+                     NULL);
        if (ix < 0)
                goto out_unlock;
 
-       del_gid(ib_dev, port, table, ix, false);
+       if (!del_gid(ib_dev, port, table, ix, false))
+               dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
        return 0;
 }
@@ -326,16 +396,24 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
        int ix;
+       bool deleted = false;
 
        table  = ports_table[port - rdma_start_port(ib_dev)];
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        for (ix = 0; ix < table->sz; ix++)
                if (table->data_vec[ix].attr.ndev == ndev)
-                       del_gid(ib_dev, port, table, ix, false);
+                       if (!del_gid(ib_dev, port, table, ix, false))
+                               deleted = true;
 
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
+
+       if (deleted)
+               dispatch_gid_change_event(ib_dev, port);
+
        return 0;
 }
 
@@ -344,18 +422,14 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 {
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
-       unsigned long flags;
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
        if (index < 0 || index >= table->sz)
                return -EINVAL;
 
-       read_lock_irqsave(&table->data_vec[index].lock, flags);
-       if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
-               read_unlock_irqrestore(&table->data_vec[index].lock, flags);
+       if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
                return -EAGAIN;
-       }
 
        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
        if (attr) {
@@ -364,7 +438,6 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
                        dev_hold(attr->ndev);
        }
 
-       read_unlock_irqrestore(&table->data_vec[index].lock, flags);
        return 0;
 }
 
@@ -378,17 +451,21 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
        struct ib_gid_table *table;
        u8 p;
        int local_index;
+       unsigned long flags;
 
        for (p = 0; p < ib_dev->phys_port_cnt; p++) {
                table = ports_table[p];
-               local_index = find_gid(table, gid, val, false, mask);
+               read_lock_irqsave(&table->rwlock, flags);
+               local_index = find_gid(table, gid, val, false, mask, NULL);
                if (local_index >= 0) {
                        if (index)
                                *index = local_index;
                        if (port)
                                *port = p + rdma_start_port(ib_dev);
+                       read_unlock_irqrestore(&table->rwlock, flags);
                        return 0;
                }
+               read_unlock_irqrestore(&table->rwlock, flags);
        }
 
        return -ENOENT;
@@ -396,11 +473,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
 
 static int ib_cache_gid_find(struct ib_device *ib_dev,
                             const union ib_gid *gid,
+                            enum ib_gid_type gid_type,
                             struct net_device *ndev, u8 *port,
                             u16 *index)
 {
-       unsigned long mask = GID_ATTR_FIND_MASK_GID;
-       struct ib_gid_attr gid_attr_val = {.ndev = ndev};
+       unsigned long mask = GID_ATTR_FIND_MASK_GID |
+                            GID_ATTR_FIND_MASK_GID_TYPE;
+       struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
 
        if (ndev)
                mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -411,14 +490,17 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
 
 int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
                               const union ib_gid *gid,
+                              enum ib_gid_type gid_type,
                               u8 port, struct net_device *ndev,
                               u16 *index)
 {
        int local_index;
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
-       unsigned long mask = GID_ATTR_FIND_MASK_GID;
-       struct ib_gid_attr val = {.ndev = ndev};
+       unsigned long mask = GID_ATTR_FIND_MASK_GID |
+                            GID_ATTR_FIND_MASK_GID_TYPE;
+       struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+       unsigned long flags;
 
        if (port < rdma_start_port(ib_dev) ||
            port > rdma_end_port(ib_dev))
@@ -429,13 +511,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
        if (ndev)
                mask |= GID_ATTR_FIND_MASK_NETDEV;
 
-       local_index = find_gid(table, gid, &val, false, mask);
+       read_lock_irqsave(&table->rwlock, flags);
+       local_index = find_gid(table, gid, &val, false, mask, NULL);
        if (local_index >= 0) {
                if (index)
                        *index = local_index;
+               read_unlock_irqrestore(&table->rwlock, flags);
                return 0;
        }
 
+       read_unlock_irqrestore(&table->rwlock, flags);
        return -ENOENT;
 }
 EXPORT_SYMBOL(ib_find_cached_gid_by_port);
@@ -472,6 +557,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
        unsigned int i;
+       unsigned long flags;
        bool found = false;
 
        if (!ports_table)
@@ -484,11 +570,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
+       read_lock_irqsave(&table->rwlock, flags);
        for (i = 0; i < table->sz; i++) {
                struct ib_gid_attr attr;
-               unsigned long flags;
 
-               read_lock_irqsave(&table->data_vec[i].lock, flags);
                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
                        goto next;
 
@@ -501,11 +586,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
                        found = true;
 
 next:
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-
                if (found)
                        break;
        }
+       read_unlock_irqrestore(&table->rwlock, flags);
 
        if (!found)
                return -ENOENT;
@@ -517,9 +601,9 @@ next:
 
 static struct ib_gid_table *alloc_gid_table(int sz)
 {
-       unsigned int i;
        struct ib_gid_table *table =
                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+
        if (!table)
                return NULL;
 
@@ -530,9 +614,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
        mutex_init(&table->lock);
 
        table->sz = sz;
-
-       for (i = 0; i < sz; i++)
-               rwlock_init(&table->data_vec[i].lock);
+       rwlock_init(&table->rwlock);
 
        return table;
 
@@ -553,30 +635,37 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
                                   struct ib_gid_table *table)
 {
        int i;
+       bool deleted = false;
 
        if (!table)
                return;
 
+       write_lock_irq(&table->rwlock);
        for (i = 0; i < table->sz; ++i) {
                if (memcmp(&table->data_vec[i].gid, &zgid,
                           sizeof(table->data_vec[i].gid)))
-                       del_gid(ib_dev, port, table, i,
-                               table->data_vec[i].props &
-                               GID_ATTR_FIND_MASK_DEFAULT);
+                       if (!del_gid(ib_dev, port, table, i,
+                                    table->data_vec[i].props &
+                                    GID_ATTR_FIND_MASK_DEFAULT))
+                               deleted = true;
        }
+       write_unlock_irq(&table->rwlock);
+
+       if (deleted)
+               dispatch_gid_change_event(ib_dev, port);
 }
 
 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                                  struct net_device *ndev,
+                                 unsigned long gid_type_mask,
                                  enum ib_cache_gid_default_mode mode)
 {
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        union ib_gid gid;
        struct ib_gid_attr gid_attr;
+       struct ib_gid_attr zattr_type = zattr;
        struct ib_gid_table *table;
-       int ix;
-       union ib_gid current_gid;
-       struct ib_gid_attr current_gid_attr = {};
+       unsigned int gid_type;
 
        table  = ports_table[port - rdma_start_port(ib_dev)];
 
@@ -584,46 +673,82 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
        memset(&gid_attr, 0, sizeof(gid_attr));
        gid_attr.ndev = ndev;
 
-       mutex_lock(&table->lock);
-       ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
-
-       /* Coudn't find default GID location */
-       WARN_ON(ix < 0);
-
-       if (!__ib_cache_gid_get(ib_dev, port, ix,
-                               &current_gid, &current_gid_attr) &&
-           mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
-           !memcmp(&gid, &current_gid, sizeof(gid)) &&
-           !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
-               goto unlock;
-
-       if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
-            memcmp(&current_gid_attr, &zattr,
-                   sizeof(current_gid_attr))) &&
-           del_gid(ib_dev, port, table, ix, true)) {
-               pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
-                       ix, gid.raw);
-               goto unlock;
-       }
+       for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
+               int ix;
+               union ib_gid current_gid;
+               struct ib_gid_attr current_gid_attr = {};
+
+               if (1UL << gid_type & ~gid_type_mask)
+                       continue;
+
+               gid_attr.gid_type = gid_type;
+
+               mutex_lock(&table->lock);
+               write_lock_irq(&table->rwlock);
+               ix = find_gid(table, NULL, &gid_attr, true,
+                             GID_ATTR_FIND_MASK_GID_TYPE |
+                             GID_ATTR_FIND_MASK_DEFAULT,
+                             NULL);
+
+               /* Coudn't find default GID location */
+               WARN_ON(ix < 0);
+
+               zattr_type.gid_type = gid_type;
+
+               if (!__ib_cache_gid_get(ib_dev, port, ix,
+                                       &current_gid, &current_gid_attr) &&
+                   mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
+                   !memcmp(&gid, &current_gid, sizeof(gid)) &&
+                   !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
+                       goto release;
+
+               if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
+                   memcmp(&current_gid_attr, &zattr_type,
+                          sizeof(current_gid_attr))) {
+                       if (del_gid(ib_dev, port, table, ix, true)) {
+                               pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
+                                       ix, gid.raw);
+                               goto release;
+                       } else {
+                               dispatch_gid_change_event(ib_dev, port);
+                       }
+               }
 
-       if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
-               if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
-                       pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
-                               gid.raw);
+               if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
+                       if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
+                               pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
+                                       gid.raw);
+                       else
+                               dispatch_gid_change_event(ib_dev, port);
+               }
 
-unlock:
-       if (current_gid_attr.ndev)
-               dev_put(current_gid_attr.ndev);
-       mutex_unlock(&table->lock);
+release:
+               if (current_gid_attr.ndev)
+                       dev_put(current_gid_attr.ndev);
+               write_unlock_irq(&table->rwlock);
+               mutex_unlock(&table->lock);
+       }
 }
 
 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
                                     struct ib_gid_table *table)
 {
-       if (rdma_protocol_roce(ib_dev, port)) {
-               struct ib_gid_table_entry *entry = &table->data_vec[0];
+       unsigned int i;
+       unsigned long roce_gid_type_mask;
+       unsigned int num_default_gids;
+       unsigned int current_gid = 0;
+
+       roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+       num_default_gids = hweight_long(roce_gid_type_mask);
+       for (i = 0; i < num_default_gids && i < table->sz; i++) {
+               struct ib_gid_table_entry *entry =
+                       &table->data_vec[i];
 
                entry->props |= GID_TABLE_ENTRY_DEFAULT;
+               current_gid = find_next_bit(&roce_gid_type_mask,
+                                           BITS_PER_LONG,
+                                           current_gid);
+               entry->attr.gid_type = current_gid++;
        }
 
        return 0;
@@ -728,20 +853,30 @@ int ib_get_cached_gid(struct ib_device *device,
                      union ib_gid     *gid,
                      struct ib_gid_attr *gid_attr)
 {
+       int res;
+       unsigned long flags;
+       struct ib_gid_table **ports_table = device->cache.gid_cache;
+       struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+
        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
                return -EINVAL;
 
-       return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+       read_lock_irqsave(&table->rwlock, flags);
+       res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+       read_unlock_irqrestore(&table->rwlock, flags);
+
+       return res;
 }
 EXPORT_SYMBOL(ib_get_cached_gid);
 
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
+                      enum ib_gid_type gid_type,
                       struct net_device *ndev,
                       u8               *port_num,
                       u16              *index)
 {
-       return ib_cache_gid_find(device, gid, ndev, port_num, index);
+       return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
 }
 EXPORT_SYMBOL(ib_find_cached_gid);
 
@@ -956,10 +1091,12 @@ static void ib_cache_update(struct ib_device *device,
 
        device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
        if (!use_roce_gid_table) {
+               write_lock(&table->rwlock);
                for (i = 0; i < gid_cache->table_len; i++) {
                        modify_gid(device, port, table, i, gid_cache->table + i,
                                   &zattr, false);
                }
+               write_unlock(&table->rwlock);
        }
 
        device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
index 0a26dd6d9b19f96d97c9b3b244509892afae7023..1d92e091e22ef0bdb64ee7433eb2cf5fbba9600c 100644 (file)
@@ -364,7 +364,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
        read_lock_irqsave(&cm.device_lock, flags);
        list_for_each_entry(cm_dev, &cm.device_list, list) {
                if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
-                                       ndev, &p, NULL)) {
+                                       path->gid_type, ndev, &p, NULL)) {
                        port = cm_dev->port[p-1];
                        break;
                }
@@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
        wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
 
        /* Check if the device started its remove_one */
-       spin_lock_irq(&cm.lock);
+       spin_lock_irqsave(&cm.lock, flags);
        if (!cm_dev->going_down)
                queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
                                   msecs_to_jiffies(wait_time));
-       spin_unlock_irq(&cm.lock);
+       spin_unlock_irqrestore(&cm.lock, flags);
 
        cm_id_priv->timewait_info = NULL;
 }
@@ -1600,6 +1600,8 @@ static int cm_req_handler(struct cm_work *work)
        struct ib_cm_id *cm_id;
        struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
        struct cm_req_msg *req_msg;
+       union ib_gid gid;
+       struct ib_gid_attr gid_attr;
        int ret;
 
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1639,11 +1641,31 @@ static int cm_req_handler(struct cm_work *work)
        cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 
        memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
-       ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+       work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+       ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
+                               work->port->port_num,
+                               cm_id_priv->av.ah_attr.grh.sgid_index,
+                               &gid, &gid_attr);
+       if (!ret) {
+               if (gid_attr.ndev) {
+                       work->path[0].ifindex = gid_attr.ndev->ifindex;
+                       work->path[0].net = dev_net(gid_attr.ndev);
+                       dev_put(gid_attr.ndev);
+               }
+               work->path[0].gid_type = gid_attr.gid_type;
+               ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+       }
        if (ret) {
-               ib_get_cached_gid(work->port->cm_dev->ib_device,
-                                 work->port->port_num, 0, &work->path[0].sgid,
-                                 NULL);
+               int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+                                           work->port->port_num, 0,
+                                           &work->path[0].sgid,
+                                           &gid_attr);
+               if (!err && gid_attr.ndev) {
+                       work->path[0].ifindex = gid_attr.ndev->ifindex;
+                       work->path[0].net = dev_net(gid_attr.ndev);
+                       dev_put(gid_attr.ndev);
+               }
+               work->path[0].gid_type = gid_attr.gid_type;
                ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
                               &work->path[0].sgid, sizeof work->path[0].sgid,
                               NULL, 0);
@@ -3482,6 +3504,7 @@ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
 EXPORT_SYMBOL(ib_cm_notify);
 
 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+                           struct ib_mad_send_buf *send_buf,
                            struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct cm_port *port = mad_agent->context;
@@ -3731,16 +3754,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
 }
 EXPORT_SYMBOL(ib_cm_init_qp_attr);
 
-static void cm_get_ack_delay(struct cm_device *cm_dev)
-{
-       struct ib_device_attr attr;
-
-       if (ib_query_device(cm_dev->ib_device, &attr))
-               cm_dev->ack_delay = 0; /* acks will rely on packet life time */
-       else
-               cm_dev->ack_delay = attr.local_ca_ack_delay;
-}
-
 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
                               char *buf)
 {
@@ -3852,7 +3865,7 @@ static void cm_add_one(struct ib_device *ib_device)
                return;
 
        cm_dev->ib_device = ib_device;
-       cm_get_ack_delay(cm_dev);
+       cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
        cm_dev->going_down = 0;
        cm_dev->device = device_create(&cm_class, &ib_device->dev,
                                       MKDEV(0, 0), NULL,
index 2d762a2ecd81252cde34cbc2dbc1083c1d8832dc..9729639df407abdc382959ef8151fbbcc3370589 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/in6.h>
 #include <linux/mutex.h>
 #include <linux/random.h>
+#include <linux/igmp.h>
 #include <linux/idr.h>
 #include <linux/inetdevice.h>
 #include <linux/slab.h>
@@ -60,6 +61,8 @@
 #include <rdma/ib_sa.h>
 #include <rdma/iw_cm.h>
 
+#include "core_priv.h"
+
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -150,6 +153,7 @@ struct cma_device {
        struct completion       comp;
        atomic_t                refcount;
        struct list_head        id_list;
+       enum ib_gid_type        *default_gid_type;
 };
 
 struct rdma_bind_list {
@@ -185,6 +189,67 @@ enum {
        CMA_OPTION_AFONLY,
 };
 
+void cma_ref_dev(struct cma_device *cma_dev)
+{
+       atomic_inc(&cma_dev->refcount);
+}
+
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+                                            void               *cookie)
+{
+       struct cma_device *cma_dev;
+       struct cma_device *found_cma_dev = NULL;
+
+       mutex_lock(&lock);
+
+       list_for_each_entry(cma_dev, &dev_list, list)
+               if (filter(cma_dev->device, cookie)) {
+                       found_cma_dev = cma_dev;
+                       break;
+               }
+
+       if (found_cma_dev)
+               cma_ref_dev(found_cma_dev);
+       mutex_unlock(&lock);
+       return found_cma_dev;
+}
+
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port)
+{
+       if (port < rdma_start_port(cma_dev->device) ||
+           port > rdma_end_port(cma_dev->device))
+               return -EINVAL;
+
+       return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port,
+                            enum ib_gid_type default_gid_type)
+{
+       unsigned long supported_gids;
+
+       if (port < rdma_start_port(cma_dev->device) ||
+           port > rdma_end_port(cma_dev->device))
+               return -EINVAL;
+
+       supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
+
+       if (!(supported_gids & 1 << default_gid_type))
+               return -EINVAL;
+
+       cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
+               default_gid_type;
+
+       return 0;
+}
+
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
+{
+       return cma_dev->device;
+}
+
 /*
  * Device removal can occur at anytime, so we need extra handling to
  * serialize notifying the user of device removal with other callbacks.
@@ -228,6 +293,7 @@ struct rdma_id_private {
        u8                      tos;
        u8                      reuseaddr;
        u8                      afonly;
+       enum ib_gid_type        gid_type;
 };
 
 struct cma_multicast {
@@ -239,6 +305,7 @@ struct cma_multicast {
        void                    *context;
        struct sockaddr_storage addr;
        struct kref             mcref;
+       bool                    igmp_joined;
 };
 
 struct cma_work {
@@ -335,18 +402,48 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
        hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 }
 
-static void cma_attach_to_dev(struct rdma_id_private *id_priv,
-                             struct cma_device *cma_dev)
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
 {
-       atomic_inc(&cma_dev->refcount);
+       struct in_device *in_dev = NULL;
+
+       if (ndev) {
+               rtnl_lock();
+               in_dev = __in_dev_get_rtnl(ndev);
+               if (in_dev) {
+                       if (join)
+                               ip_mc_inc_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+                       else
+                               ip_mc_dec_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+               }
+               rtnl_unlock();
+       }
+       return (in_dev) ? 0 : -ENODEV;
+}
+
+static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
+                              struct cma_device *cma_dev)
+{
+       cma_ref_dev(cma_dev);
        id_priv->cma_dev = cma_dev;
+       id_priv->gid_type = 0;
        id_priv->id.device = cma_dev->device;
        id_priv->id.route.addr.dev_addr.transport =
                rdma_node_get_transport(cma_dev->device->node_type);
        list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
-static inline void cma_deref_dev(struct cma_device *cma_dev)
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+                             struct cma_device *cma_dev)
+{
+       _cma_attach_to_dev(id_priv, cma_dev);
+       id_priv->gid_type =
+               cma_dev->default_gid_type[id_priv->id.port_num -
+                                         rdma_start_port(cma_dev->device)];
+}
+
+void cma_deref_dev(struct cma_device *cma_dev)
 {
        if (atomic_dec_and_test(&cma_dev->refcount))
                complete(&cma_dev->comp);
@@ -441,6 +538,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
 }
 
 static inline int cma_validate_port(struct ib_device *device, u8 port,
+                                   enum ib_gid_type gid_type,
                                      union ib_gid *gid, int dev_type,
                                      int bound_if_index)
 {
@@ -453,10 +551,25 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
        if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
                return ret;
 
-       if (dev_type == ARPHRD_ETHER)
+       if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
                ndev = dev_get_by_index(&init_net, bound_if_index);
+               if (ndev && ndev->flags & IFF_LOOPBACK) {
+                       pr_info("detected loopback device\n");
+                       dev_put(ndev);
 
-       ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+                       if (!device->get_netdev)
+                               return -EOPNOTSUPP;
+
+                       ndev = device->get_netdev(device, port);
+                       if (!ndev)
+                               return -ENODEV;
+               }
+       } else {
+               gid_type = IB_GID_TYPE_IB;
+       }
+
+       ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
+                                        ndev, NULL);
 
        if (ndev)
                dev_put(ndev);
@@ -490,7 +603,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                gidp = rdma_protocol_roce(cma_dev->device, port) ?
                       &iboe_gid : &gid;
 
-               ret = cma_validate_port(cma_dev->device, port, gidp,
+               ret = cma_validate_port(cma_dev->device, port,
+                                       rdma_protocol_ib(cma_dev->device, port) ?
+                                       IB_GID_TYPE_IB :
+                                       listen_id_priv->gid_type, gidp,
                                        dev_addr->dev_type,
                                        dev_addr->bound_dev_if);
                if (!ret) {
@@ -509,8 +625,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                        gidp = rdma_protocol_roce(cma_dev->device, port) ?
                               &iboe_gid : &gid;
 
-                       ret = cma_validate_port(cma_dev->device, port, gidp,
-                                               dev_addr->dev_type,
+                       ret = cma_validate_port(cma_dev->device, port,
+                                               rdma_protocol_ib(cma_dev->device, port) ?
+                                               IB_GID_TYPE_IB :
+                                               cma_dev->default_gid_type[port - 1],
+                                               gidp, dev_addr->dev_type,
                                                dev_addr->bound_dev_if);
                        if (!ret) {
                                id_priv->id.port_num = port;
@@ -1437,8 +1556,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
                                      id_priv->id.port_num)) {
                        ib_sa_free_multicast(mc->multicast.ib);
                        kfree(mc);
-               } else
+               } else {
+                       if (mc->igmp_joined) {
+                               struct rdma_dev_addr *dev_addr =
+                                       &id_priv->id.route.addr.dev_addr;
+                               struct net_device *ndev = NULL;
+
+                               if (dev_addr->bound_dev_if)
+                                       ndev = dev_get_by_index(&init_net,
+                                                               dev_addr->bound_dev_if);
+                               if (ndev) {
+                                       cma_igmp_send(ndev,
+                                                     &mc->multicast.ib->rec.mgid,
+                                                     false);
+                                       dev_put(ndev);
+                               }
+                       }
                        kref_put(&mc->mcref, release_mc);
+               }
        }
 }
 
@@ -1896,7 +2031,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        struct rdma_id_private *listen_id, *conn_id;
        struct rdma_cm_event event;
        int ret;
-       struct ib_device_attr attr;
        struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
@@ -1938,13 +2072,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
        memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
 
-       ret = ib_query_device(conn_id->id.device, &attr);
-       if (ret) {
-               mutex_unlock(&conn_id->handler_mutex);
-               rdma_destroy_id(new_cm_id);
-               goto out;
-       }
-
        memset(&event, 0, sizeof event);
        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
        event.param.conn.private_data = iw_event->private_data;
@@ -2051,7 +2178,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
        memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
               rdma_addr_size(cma_src_addr(id_priv)));
 
-       cma_attach_to_dev(dev_id_priv, cma_dev);
+       _cma_attach_to_dev(dev_id_priv, cma_dev);
        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
        atomic_inc(&id_priv->refcount);
        dev_id_priv->internal_id = 1;
@@ -2321,8 +2448,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
        if (addr->dev_addr.bound_dev_if) {
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+               if (!ndev)
+                       return -ENODEV;
+
+               if (ndev->flags & IFF_LOOPBACK) {
+                       dev_put(ndev);
+                       if (!id_priv->id.device->get_netdev)
+                               return -EOPNOTSUPP;
+
+                       ndev = id_priv->id.device->get_netdev(id_priv->id.device,
+                                                             id_priv->id.port_num);
+                       if (!ndev)
+                               return -ENODEV;
+               }
+
                route->path_rec->net = &init_net;
-               route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+               route->path_rec->ifindex = ndev->ifindex;
+               route->path_rec->gid_type = id_priv->gid_type;
        }
        if (!ndev) {
                ret = -ENODEV;
@@ -2336,7 +2478,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
                    &route->path_rec->dgid);
 
-       route->path_rec->hop_limit = 1;
+       /* Use the hint from IP Stack to select GID Type */
+       if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+               route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+       if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
+               /* TODO: get the hoplimit from the inet/inet6 device */
+               route->path_rec->hop_limit = addr->dev_addr.hoplimit;
+       else
+               route->path_rec->hop_limit = 1;
        route->path_rec->reversible = 1;
        route->path_rec->pkey = cpu_to_be16(0xffff);
        route->path_rec->mtu_selector = IB_SA_EQ;
@@ -3534,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
        event.status = status;
        event.param.ud.private_data = mc->context;
        if (!status) {
+               struct rdma_dev_addr *dev_addr =
+                       &id_priv->id.route.addr.dev_addr;
+               struct net_device *ndev =
+                       dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+               enum ib_gid_type gid_type =
+                       id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                       rdma_start_port(id_priv->cma_dev->device)];
+
                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
                ib_init_ah_from_mcmember(id_priv->id.device,
                                         id_priv->id.port_num, &multicast->rec,
+                                        ndev, gid_type,
                                         &event.param.ud.ah_attr);
                event.param.ud.qp_num = 0xFFFFFF;
                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+               if (ndev)
+                       dev_put(ndev);
        } else
                event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
 
@@ -3672,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 {
        struct iboe_mcast_work *work;
        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
-       int err;
+       int err = 0;
        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
        struct net_device *ndev = NULL;
+       enum ib_gid_type gid_type;
 
        if (cma_zero_addr((struct sockaddr *)&mc->addr))
                return -EINVAL;
@@ -3704,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
        mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
        mc->multicast.ib->rec.hop_limit = 1;
        mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+
+       gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                  rdma_start_port(id_priv->cma_dev->device)];
+       if (addr->sa_family == AF_INET) {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+                                           true);
+               if (!err) {
+                       mc->igmp_joined = true;
+                       mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+               }
+       } else {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = -ENOTSUPP;
+       }
        dev_put(ndev);
-       if (!mc->multicast.ib->rec.mtu) {
-               err = -EINVAL;
+       if (err || !mc->multicast.ib->rec.mtu) {
+               if (!err)
+                       err = -EINVAL;
                goto out2;
        }
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3745,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
        memcpy(&mc->addr, addr, rdma_addr_size(addr));
        mc->context = context;
        mc->id_priv = id_priv;
-
+       mc->igmp_joined = false;
        spin_lock(&id_priv->lock);
        list_add(&mc->list, &id_priv->mc_list);
        spin_unlock(&id_priv->lock);
@@ -3790,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
                        if (rdma_cap_ib_mcast(id->device, id->port_num)) {
                                ib_sa_free_multicast(mc->multicast.ib);
                                kfree(mc);
-                       } else if (rdma_protocol_roce(id->device, id->port_num))
+                       } else if (rdma_protocol_roce(id->device, id->port_num)) {
+                               if (mc->igmp_joined) {
+                                       struct rdma_dev_addr *dev_addr =
+                                               &id->route.addr.dev_addr;
+                                       struct net_device *ndev = NULL;
+
+                                       if (dev_addr->bound_dev_if)
+                                               ndev = dev_get_by_index(&init_net,
+                                                                       dev_addr->bound_dev_if);
+                                       if (ndev) {
+                                               cma_igmp_send(ndev,
+                                                             &mc->multicast.ib->rec.mgid,
+                                                             false);
+                                               dev_put(ndev);
+                                       }
+                                       mc->igmp_joined = false;
+                               }
                                kref_put(&mc->mcref, release_mc);
-
+                       }
                        return;
                }
        }
@@ -3861,12 +4054,27 @@ static void cma_add_one(struct ib_device *device)
 {
        struct cma_device *cma_dev;
        struct rdma_id_private *id_priv;
+       unsigned int i;
+       unsigned long supported_gids = 0;
 
        cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
        if (!cma_dev)
                return;
 
        cma_dev->device = device;
+       cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+                                           sizeof(*cma_dev->default_gid_type),
+                                           GFP_KERNEL);
+       if (!cma_dev->default_gid_type) {
+               kfree(cma_dev);
+               return;
+       }
+       for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+               supported_gids = roce_gid_type_mask_support(device, i);
+               WARN_ON(!supported_gids);
+               cma_dev->default_gid_type[i - rdma_start_port(device)] =
+                       find_first_bit(&supported_gids, BITS_PER_LONG);
+       }
 
        init_completion(&cma_dev->comp);
        atomic_set(&cma_dev->refcount, 1);
@@ -3946,6 +4154,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
        mutex_unlock(&lock);
 
        cma_process_remove(cma_dev);
+       kfree(cma_dev->default_gid_type);
        kfree(cma_dev);
 }
 
@@ -4079,6 +4288,7 @@ static int __init cma_init(void)
 
        if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
                printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+       cma_configfs_init();
 
        return 0;
 
@@ -4093,6 +4303,7 @@ err_wq:
 
 static void __exit cma_cleanup(void)
 {
+       cma_configfs_exit();
        ibnl_remove_client(RDMA_NL_RDMA_CM);
        ib_unregister_client(&cma_client);
        unregister_netdevice_notifier(&cma_nb);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
new file mode 100644 (file)
index 0000000..18b112a
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+struct cma_device;
+
+struct cma_dev_group;
+
+struct cma_dev_port_group {
+       unsigned int            port_num;
+       struct cma_dev_group    *cma_dev_group;
+       struct config_group     group;
+};
+
+struct cma_dev_group {
+       char                            name[IB_DEVICE_NAME_MAX];
+       struct config_group             device_group;
+       struct config_group             ports_group;
+       struct config_group             *default_dev_group[2];
+       struct config_group             **default_ports_group;
+       struct cma_dev_port_group       *ports;
+};
+
+static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
+{
+       struct config_group *group;
+
+       if (!item)
+               return NULL;
+
+       group = container_of(item, struct config_group, cg_item);
+       return container_of(group, struct cma_dev_port_group, group);
+}
+
+static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
+{
+       return !strcmp(ib_dev->name, cookie);
+}
+
+static int cma_configfs_params_get(struct config_item *item,
+                                  struct cma_device **pcma_dev,
+                                  struct cma_dev_port_group **pgroup)
+{
+       struct cma_dev_port_group *group = to_dev_port_group(item);
+       struct cma_device *cma_dev;
+
+       if (!group)
+               return -ENODEV;
+
+       cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+                                           group->cma_dev_group->name);
+       if (!cma_dev)
+               return -ENODEV;
+
+       *pcma_dev = cma_dev;
+       *pgroup = group;
+
+       return 0;
+}
+
+static void cma_configfs_params_put(struct cma_device *cma_dev)
+{
+       cma_deref_dev(cma_dev);
+}
+
+static ssize_t default_roce_mode_show(struct config_item *item,
+                                     char *buf)
+{
+       struct cma_device *cma_dev;
+       struct cma_dev_port_group *group;
+       int gid_type;
+       ssize_t ret;
+
+       ret = cma_configfs_params_get(item, &cma_dev, &group);
+       if (ret)
+               return ret;
+
+       gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
+       cma_configfs_params_put(cma_dev);
+
+       if (gid_type < 0)
+               return gid_type;
+
+       return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+}
+
+static ssize_t default_roce_mode_store(struct config_item *item,
+                                      const char *buf, size_t count)
+{
+       struct cma_device *cma_dev;
+       struct cma_dev_port_group *group;
+       int gid_type = ib_cache_gid_parse_type_str(buf);
+       ssize_t ret;
+
+       if (gid_type < 0)
+               return -EINVAL;
+
+       ret = cma_configfs_params_get(item, &cma_dev, &group);
+       if (ret)
+               return ret;
+
+       ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
+
+       cma_configfs_params_put(cma_dev);
+
+       return !ret ? strnlen(buf, count) : ret;
+}
+
+CONFIGFS_ATTR(, default_roce_mode);
+
+static struct configfs_attribute *cma_configfs_attributes[] = {
+       &attr_default_roce_mode,
+       NULL,
+};
+
+static struct config_item_type cma_port_group_type = {
+       .ct_attrs       = cma_configfs_attributes,
+       .ct_owner       = THIS_MODULE
+};
+
+static int make_cma_ports(struct cma_dev_group *cma_dev_group,
+                         struct cma_device *cma_dev)
+{
+       struct ib_device *ibdev;
+       unsigned int i;
+       unsigned int ports_num;
+       struct cma_dev_port_group *ports;
+       struct config_group **ports_group;
+       int err;
+
+       ibdev = cma_get_ib_dev(cma_dev);
+
+       if (!ibdev)
+               return -ENODEV;
+
+       ports_num = ibdev->phys_port_cnt;
+       ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
+                       GFP_KERNEL);
+       ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
+
+       if (!ports || !ports_group) {
+               err = -ENOMEM;
+               goto free;
+       }
+
+       for (i = 0; i < ports_num; i++) {
+               char port_str[10];
+
+               ports[i].port_num = i + 1;
+               snprintf(port_str, sizeof(port_str), "%u", i + 1);
+               ports[i].cma_dev_group = cma_dev_group;
+               config_group_init_type_name(&ports[i].group,
+                                           port_str,
+                                           &cma_port_group_type);
+               ports_group[i] = &ports[i].group;
+       }
+       ports_group[i] = NULL;
+       cma_dev_group->default_ports_group = ports_group;
+       cma_dev_group->ports = ports;
+
+       return 0;
+free:
+       kfree(ports);
+       kfree(ports_group);
+       cma_dev_group->ports = NULL;
+       cma_dev_group->default_ports_group = NULL;
+       return err;
+}
+
+static void release_cma_dev(struct config_item  *item)
+{
+       struct config_group *group = container_of(item, struct config_group,
+                                                 cg_item);
+       struct cma_dev_group *cma_dev_group = container_of(group,
+                                                          struct cma_dev_group,
+                                                          device_group);
+
+       kfree(cma_dev_group);
+};
+
+static void release_cma_ports_group(struct config_item  *item)
+{
+       struct config_group *group = container_of(item, struct config_group,
+                                                 cg_item);
+       struct cma_dev_group *cma_dev_group = container_of(group,
+                                                          struct cma_dev_group,
+                                                          ports_group);
+
+       kfree(cma_dev_group->ports);
+       kfree(cma_dev_group->default_ports_group);
+       cma_dev_group->ports = NULL;
+       cma_dev_group->default_ports_group = NULL;
+};
+
+static struct configfs_item_operations cma_ports_item_ops = {
+       .release = release_cma_ports_group
+};
+
+static struct config_item_type cma_ports_group_type = {
+       .ct_item_ops    = &cma_ports_item_ops,
+       .ct_owner       = THIS_MODULE
+};
+
+static struct configfs_item_operations cma_device_item_ops = {
+       .release = release_cma_dev
+};
+
+static struct config_item_type cma_device_group_type = {
+       .ct_item_ops    = &cma_device_item_ops,
+       .ct_owner       = THIS_MODULE
+};
+
+static struct config_group *make_cma_dev(struct config_group *group,
+                                        const char *name)
+{
+       int err = -ENODEV;
+       struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+                                                              (void *)name);
+       struct cma_dev_group *cma_dev_group = NULL;
+
+       if (!cma_dev)
+               goto fail;
+
+       cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
+
+       if (!cma_dev_group) {
+               err = -ENOMEM;
+               goto fail;
+       }
+
+       strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+
+       err = make_cma_ports(cma_dev_group, cma_dev);
+       if (err)
+               goto fail;
+
+       cma_dev_group->ports_group.default_groups =
+               cma_dev_group->default_ports_group;
+       config_group_init_type_name(&cma_dev_group->ports_group, "ports",
+                                   &cma_ports_group_type);
+
+       cma_dev_group->device_group.default_groups
+               = cma_dev_group->default_dev_group;
+       cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
+       cma_dev_group->default_dev_group[1] = NULL;
+
+       config_group_init_type_name(&cma_dev_group->device_group, name,
+                                   &cma_device_group_type);
+
+       cma_deref_dev(cma_dev);
+       return &cma_dev_group->device_group;
+
+fail:
+       if (cma_dev)
+               cma_deref_dev(cma_dev);
+       kfree(cma_dev_group);
+       return ERR_PTR(err);
+}
+
+static struct configfs_group_operations cma_subsys_group_ops = {
+       .make_group     = make_cma_dev,
+};
+
+static struct config_item_type cma_subsys_type = {
+       .ct_group_ops   = &cma_subsys_group_ops,
+       .ct_owner       = THIS_MODULE,
+};
+
+static struct configfs_subsystem cma_subsys = {
+       .su_group       = {
+               .cg_item        = {
+                       .ci_namebuf     = "rdma_cm",
+                       .ci_type        = &cma_subsys_type,
+               },
+       },
+};
+
+int __init cma_configfs_init(void)
+{
+       config_group_init(&cma_subsys.su_group);
+       mutex_init(&cma_subsys.su_mutex);
+       return configfs_register_subsystem(&cma_subsys);
+}
+
+void __exit cma_configfs_exit(void)
+{
+       configfs_unregister_subsystem(&cma_subsys);
+}
index 5cf6eb716f000a7aa07233419a0dcbbce41e0b6d..eab32215756b935159355e22c8d9bf76f108f873 100644 (file)
 
 #include <rdma/ib_verbs.h>
 
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+       return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+struct cma_device;
+void cma_ref_dev(struct cma_device *cma_dev);
+void cma_deref_dev(struct cma_device *cma_dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+                                            void               *cookie);
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port);
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port,
+                            enum ib_gid_type default_gid_type);
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
+
 int  ib_device_register_sysfs(struct ib_device *device,
                              int (*port_callback)(struct ib_device *,
                                                   u8, struct kobject *));
@@ -70,8 +96,13 @@ enum ib_cache_gid_default_mode {
        IB_CACHE_GID_DEFAULT_MODE_DELETE
 };
 
+int ib_cache_gid_parse_type_str(const char *buf);
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
+
 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                                  struct net_device *ndev,
+                                 unsigned long gid_type_mask,
                                  enum ib_cache_gid_default_mode mode);
 
 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
@@ -87,9 +118,23 @@ int roce_gid_mgmt_init(void);
 void roce_gid_mgmt_cleanup(void);
 
 int roce_rescan_device(struct ib_device *ib_dev);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
 
 int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
 
+static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
+                                        struct net_device *upper)
+{
+       struct net_device *_upper = NULL;
+       struct list_head *iter;
+
+       netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
+               if (_upper == upper)
+                       break;
+
+       return _upper == upper;
+}
+
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
new file mode 100644 (file)
index 0000000..a754fc7
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+
+/* # of WCs to poll for with a single call to ib_poll_cq */
+#define IB_POLL_BATCH                  16
+
+/* # of WCs to iterate over before yielding */
+#define IB_POLL_BUDGET_IRQ             256
+#define IB_POLL_BUDGET_WORKQUEUE       65536
+
+#define IB_POLL_FLAGS \
+       (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+
+static int __ib_process_cq(struct ib_cq *cq, int budget)
+{
+       int i, n, completed = 0;
+
+       while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+               for (i = 0; i < n; i++) {
+                       struct ib_wc *wc = &cq->wc[i];
+
+                       if (wc->wr_cqe)
+                               wc->wr_cqe->done(cq, wc);
+                       else
+                               WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
+               }
+
+               completed += n;
+
+               if (n != IB_POLL_BATCH ||
+                   (budget != -1 && completed >= budget))
+                       break;
+       }
+
+       return completed;
+}
+
+/**
+ * ib_process_direct_cq - process a CQ in caller context
+ * @cq:                CQ to process
+ * @budget:    number of CQEs to poll for
+ *
+ * This function is used to process all outstanding CQ entries on a
+ * %IB_POLL_DIRECT CQ.  It does not offload CQ processing to a different
+ * context and does not ask for completion interrupts from the HCA.
+ *
+ * Note: for compatibility reasons -1 can be passed in %budget for unlimited
+ * polling.  Do not use this feature in new code, it will be removed soon.
+ */
+int ib_process_cq_direct(struct ib_cq *cq, int budget)
+{
+       WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+
+       return __ib_process_cq(cq, budget);
+}
+EXPORT_SYMBOL(ib_process_cq_direct);
+
+static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
+{
+       WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
+}
+
+static int ib_poll_handler(struct irq_poll *iop, int budget)
+{
+       struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+       int completed;
+
+       completed = __ib_process_cq(cq, budget);
+       if (completed < budget) {
+               irq_poll_complete(&cq->iop);
+               if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+                       irq_poll_sched(&cq->iop);
+       }
+
+       return completed;
+}
+
+static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
+{
+       irq_poll_sched(&cq->iop);
+}
+
+static void ib_cq_poll_work(struct work_struct *work)
+{
+       struct ib_cq *cq = container_of(work, struct ib_cq, work);
+       int completed;
+
+       completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+       if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+           ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+               queue_work(ib_comp_wq, &cq->work);
+}
+
+static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
+{
+       queue_work(ib_comp_wq, &cq->work);
+}
+
+/**
+ * ib_alloc_cq - allocate a completion queue
+ * @dev:               device to allocate the CQ for
+ * @private:           driver private data, accessible from cq->cq_context
+ * @nr_cqe:            number of CQEs to allocate
+ * @comp_vector:       HCA completion vectors for this CQ
+ * @poll_ctx:          context to poll the CQ from.
+ *
+ * This is the proper interface to allocate a CQ for in-kernel users. A
+ * CQ allocated with this interface will automatically be polled from the
+ * specified context.  The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * to use this CQ abstraction.
+ */
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+               int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+{
+       struct ib_cq_init_attr cq_attr = {
+               .cqe            = nr_cqe,
+               .comp_vector    = comp_vector,
+       };
+       struct ib_cq *cq;
+       int ret = -ENOMEM;
+
+       cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+       if (IS_ERR(cq))
+               return cq;
+
+       cq->device = dev;
+       cq->uobject = NULL;
+       cq->event_handler = NULL;
+       cq->cq_context = private;
+       cq->poll_ctx = poll_ctx;
+       atomic_set(&cq->usecnt, 0);
+
+       cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
+       if (!cq->wc)
+               goto out_destroy_cq;
+
+       switch (cq->poll_ctx) {
+       case IB_POLL_DIRECT:
+               cq->comp_handler = ib_cq_completion_direct;
+               break;
+       case IB_POLL_SOFTIRQ:
+               cq->comp_handler = ib_cq_completion_softirq;
+
+               irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
+               ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+               break;
+       case IB_POLL_WORKQUEUE:
+               cq->comp_handler = ib_cq_completion_workqueue;
+               INIT_WORK(&cq->work, ib_cq_poll_work);
+               ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+               break;
+       default:
+               ret = -EINVAL;
+               goto out_free_wc;
+       }
+
+       return cq;
+
+out_free_wc:
+       kfree(cq->wc);
+out_destroy_cq:
+       cq->device->destroy_cq(cq);
+       return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_cq);
+
+/**
+ * ib_free_cq - free a completion queue
+ * @cq:                completion queue to free.
+ */
+void ib_free_cq(struct ib_cq *cq)
+{
+       int ret;
+
+       if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
+               return;
+
+       switch (cq->poll_ctx) {
+       case IB_POLL_DIRECT:
+               break;
+       case IB_POLL_SOFTIRQ:
+               irq_poll_disable(&cq->iop);
+               break;
+       case IB_POLL_WORKQUEUE:
+               flush_work(&cq->work);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+       }
+
+       kfree(cq->wc);
+       ret = cq->device->destroy_cq(cq);
+       WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL(ib_free_cq);
index 179e8134d57fc13b425254d5162006f9fc201879..00da80e02154205c428ca00702f4b7c10fa5a829 100644 (file)
@@ -58,6 +58,7 @@ struct ib_client_data {
        bool              going_down;
 };
 
+struct workqueue_struct *ib_comp_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
@@ -325,6 +326,7 @@ int ib_register_device(struct ib_device *device,
 {
        int ret;
        struct ib_client *client;
+       struct ib_udata uhw = {.outlen = 0, .inlen = 0};
 
        mutex_lock(&device_mutex);
 
@@ -352,6 +354,13 @@ int ib_register_device(struct ib_device *device,
                goto out;
        }
 
+       memset(&device->attrs, 0, sizeof(device->attrs));
+       ret = device->query_device(device, &device->attrs, &uhw);
+       if (ret) {
+               printk(KERN_WARNING "Couldn't query the device attributes\n");
+               goto out;
+       }
+
        ret = ib_device_register_sysfs(device, port_callback);
        if (ret) {
                printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -627,25 +636,6 @@ void ib_dispatch_event(struct ib_event *event)
 }
 EXPORT_SYMBOL(ib_dispatch_event);
 
-/**
- * ib_query_device - Query IB device attributes
- * @device:Device to query
- * @device_attr:Device attributes
- *
- * ib_query_device() returns the attributes of a device through the
- * @device_attr pointer.
- */
-int ib_query_device(struct ib_device *device,
-                   struct ib_device_attr *device_attr)
-{
-       struct ib_udata uhw = {.outlen = 0, .inlen = 0};
-
-       memset(device_attr, 0, sizeof(*device_attr));
-
-       return device->query_device(device, device_attr, &uhw);
-}
-EXPORT_SYMBOL(ib_query_device);
-
 /**
  * ib_query_port - Query IB port attributes
  * @device:Device to query
@@ -825,26 +815,31 @@ EXPORT_SYMBOL(ib_modify_port);
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: Type of GID.
  * @ndev: The ndev related to the GID to search for.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the GID table where the GID was found.  This
  *   parameter may be NULL.
  */
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               struct net_device *ndev, u8 *port_num, u16 *index)
+               enum ib_gid_type gid_type, struct net_device *ndev,
+               u8 *port_num, u16 *index)
 {
        union ib_gid tmp_gid;
        int ret, port, i;
 
        for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
                if (rdma_cap_roce_gid_table(device, port)) {
-                       if (!ib_find_cached_gid_by_port(device, gid, port,
+                       if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
                                                        ndev, index)) {
                                *port_num = port;
                                return 0;
                        }
                }
 
+               if (gid_type != IB_GID_TYPE_IB)
+                       continue;
+
                for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
                        ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
                        if (ret)
@@ -954,10 +949,18 @@ static int __init ib_core_init(void)
        if (!ib_wq)
                return -ENOMEM;
 
+       ib_comp_wq = alloc_workqueue("ib-comp-wq",
+                       WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+                       WQ_UNBOUND_MAX_ACTIVE);
+       if (!ib_comp_wq) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
        ret = class_register(&ib_class);
        if (ret) {
                printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
-               goto err;
+               goto err_comp;
        }
 
        ret = ibnl_init();
@@ -972,7 +975,8 @@ static int __init ib_core_init(void)
 
 err_sysfs:
        class_unregister(&ib_class);
-
+err_comp:
+       destroy_workqueue(ib_comp_wq);
 err:
        destroy_workqueue(ib_wq);
        return ret;
@@ -983,6 +987,7 @@ static void __exit ib_core_cleanup(void)
        ib_cache_cleanup();
        ibnl_cleanup();
        class_unregister(&ib_class);
+       destroy_workqueue(ib_comp_wq);
        /* Make sure that any pending umem accounting work is done. */
        destroy_workqueue(ib_wq);
 }
index 9f5ad7cc33c89985fb1c490c27f07766e38e29fa..6ac3683c144ba859ff34709cc292e2fc3208f621 100644 (file)
@@ -212,7 +212,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 {
        struct ib_device   *device;
        struct ib_fmr_pool *pool;
-       struct ib_device_attr *attr;
        int i;
        int ret;
        int max_remaps;
@@ -228,25 +227,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
                return ERR_PTR(-ENOSYS);
        }
 
-       attr = kmalloc(sizeof *attr, GFP_KERNEL);
-       if (!attr) {
-               printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
-               return ERR_PTR(-ENOMEM);
-       }
-
-       ret = ib_query_device(device, attr);
-       if (ret) {
-               printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
-               kfree(attr);
-               return ERR_PTR(ret);
-       }
-
-       if (!attr->max_map_per_fmr)
+       if (!device->attrs.max_map_per_fmr)
                max_remaps = IB_FMR_MAX_REMAPS;
        else
-               max_remaps = attr->max_map_per_fmr;
-
-       kfree(attr);
+               max_remaps = device->attrs.max_map_per_fmr;
 
        pool = kmalloc(sizeof *pool, GFP_KERNEL);
        if (!pool) {
index 2281de122038e45a5c375f7d7669111ab2aee2de..9fa5bf33f5a34261b16a72c9800b55daab921c84 100644 (file)
@@ -84,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                              u8 mgmt_class);
 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                           struct ib_mad_agent_private *agent_priv);
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+                             struct ib_wc *wc);
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 /*
  * Returns a ib_mad_port_private structure or NULL for a device/port
@@ -681,7 +684,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
 
                atomic_inc(&mad_snoop_priv->refcount);
                spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-               mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+               mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
                                                   mad_recv_wc);
                deref_snoop_agent(mad_snoop_priv);
                spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -689,12 +692,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
        spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 }
 
-static void build_smp_wc(struct ib_qp *qp,
-                        u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
-                        struct ib_wc *wc)
+static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
+               u16 pkey_index, u8 port_num, struct ib_wc *wc)
 {
        memset(wc, 0, sizeof *wc);
-       wc->wr_id = wr_id;
+       wc->wr_cqe = cqe;
        wc->status = IB_WC_SUCCESS;
        wc->opcode = IB_WC_RECV;
        wc->pkey_index = pkey_index;
@@ -832,7 +834,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        }
 
        build_smp_wc(mad_agent_priv->agent.qp,
-                    send_wr->wr.wr_id, drslid,
+                    send_wr->wr.wr_cqe, drslid,
                     send_wr->pkey_index,
                     send_wr->port_num, &mad_wc);
 
@@ -1039,7 +1041,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 
        mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
 
-       mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+       mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+
+       mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
        mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
        mad_send_wr->send_wr.wr.num_sge = 2;
        mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1151,8 +1155,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
        /* Set WR ID to find mad_send_wr upon completion */
        qp_info = mad_send_wr->mad_agent_priv->qp_info;
-       mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
        mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+       mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+       mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
 
        mad_agent = mad_send_wr->send_buf.mad_agent;
        sge = mad_send_wr->sg_list;
@@ -1982,9 +1987,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                                /* user rmpp is in effect
                                 * and this is an active RMPP MAD
                                 */
-                               mad_recv_wc->wc->wr_id = 0;
-                               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-                                                                  mad_recv_wc);
+                               mad_agent_priv->agent.recv_handler(
+                                               &mad_agent_priv->agent, NULL,
+                                               mad_recv_wc);
                                atomic_dec(&mad_agent_priv->refcount);
                        } else {
                                /* not user rmpp, revert to normal behavior and
@@ -1998,9 +2003,10 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
                        /* Defined behavior is to complete response before request */
-                       mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
-                       mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-                                                          mad_recv_wc);
+                       mad_agent_priv->agent.recv_handler(
+                                       &mad_agent_priv->agent,
+                                       &mad_send_wr->send_buf,
+                                       mad_recv_wc);
                        atomic_dec(&mad_agent_priv->refcount);
 
                        mad_send_wc.status = IB_WC_SUCCESS;
@@ -2009,7 +2015,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                        ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
                }
        } else {
-               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
                                                   mad_recv_wc);
                deref_mad_agent(mad_agent_priv);
        }
@@ -2172,13 +2178,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
        return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
 }
 
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
-                                    struct ib_wc *wc)
+static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct ib_mad_port_private *port_priv = cq->cq_context;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
        struct ib_mad_qp_info *qp_info;
        struct ib_mad_private_header *mad_priv_hdr;
        struct ib_mad_private *recv, *response = NULL;
-       struct ib_mad_list_head *mad_list;
        struct ib_mad_agent_private *mad_agent;
        int port_num;
        int ret = IB_MAD_RESULT_SUCCESS;
@@ -2186,7 +2193,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        u16 resp_mad_pkey_index = 0;
        bool opa;
 
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+       if (list_empty_careful(&port_priv->port_list))
+               return;
+
+       if (wc->status != IB_WC_SUCCESS) {
+               /*
+                * Receive errors indicate that the QP has entered the error
+                * state - error handling/shutdown code will cleanup
+                */
+               return;
+       }
+
        qp_info = mad_list->mad_queue->qp_info;
        dequeue_mad(mad_list);
 
@@ -2227,7 +2244,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        response = alloc_mad_private(mad_size, GFP_KERNEL);
        if (!response) {
                dev_err(&port_priv->device->dev,
-                       "ib_mad_recv_done_handler no memory for response buffer\n");
+                       "%s: no memory for response buffer\n", __func__);
                goto out;
        }
 
@@ -2413,11 +2430,12 @@ done:
        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
-                                    struct ib_wc *wc)
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct ib_mad_port_private *port_priv = cq->cq_context;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
        struct ib_mad_send_wr_private   *mad_send_wr, *queued_send_wr;
-       struct ib_mad_list_head         *mad_list;
        struct ib_mad_qp_info           *qp_info;
        struct ib_mad_queue             *send_queue;
        struct ib_send_wr               *bad_send_wr;
@@ -2425,7 +2443,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
        unsigned long flags;
        int ret;
 
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+       if (list_empty_careful(&port_priv->port_list))
+               return;
+
+       if (wc->status != IB_WC_SUCCESS) {
+               if (!ib_mad_send_error(port_priv, wc))
+                       return;
+       }
+
        mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
                                   mad_list);
        send_queue = mad_list->mad_queue;
@@ -2490,24 +2515,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
        spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
 }
 
-static void mad_error_handler(struct ib_mad_port_private *port_priv,
-                             struct ib_wc *wc)
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+               struct ib_wc *wc)
 {
-       struct ib_mad_list_head *mad_list;
-       struct ib_mad_qp_info *qp_info;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
+       struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
        struct ib_mad_send_wr_private *mad_send_wr;
        int ret;
 
-       /* Determine if failure was a send or receive */
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
-       qp_info = mad_list->mad_queue->qp_info;
-       if (mad_list->mad_queue == &qp_info->recv_queue)
-               /*
-                * Receive errors indicate that the QP has entered the error
-                * state - error handling/shutdown code will cleanup
-                */
-               return;
-
        /*
         * Send errors will transition the QP to SQE - move
         * QP to RTS and repost flushed work requests
@@ -2522,10 +2538,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
                        mad_send_wr->retry = 0;
                        ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
                                        &bad_send_wr);
-                       if (ret)
-                               ib_mad_send_done_handler(port_priv, wc);
-               } else
-                       ib_mad_send_done_handler(port_priv, wc);
+                       if (!ret)
+                               return false;
+               }
        } else {
                struct ib_qp_attr *attr;
 
@@ -2539,42 +2554,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
                        kfree(attr);
                        if (ret)
                                dev_err(&port_priv->device->dev,
-                                       "mad_error_handler - ib_modify_qp to RTS : %d\n",
-                                       ret);
+                                       "%s - ib_modify_qp to RTS: %d\n",
+                                       __func__, ret);
                        else
                                mark_sends_for_retry(qp_info);
                }
-               ib_mad_send_done_handler(port_priv, wc);
        }
-}
 
-/*
- * IB MAD completion callback
- */
-static void ib_mad_completion_handler(struct work_struct *work)
-{
-       struct ib_mad_port_private *port_priv;
-       struct ib_wc wc;
-
-       port_priv = container_of(work, struct ib_mad_port_private, work);
-       ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
-       while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
-               if (wc.status == IB_WC_SUCCESS) {
-                       switch (wc.opcode) {
-                       case IB_WC_SEND:
-                               ib_mad_send_done_handler(port_priv, &wc);
-                               break;
-                       case IB_WC_RECV:
-                               ib_mad_recv_done_handler(port_priv, &wc);
-                               break;
-                       default:
-                               BUG_ON(1);
-                               break;
-                       }
-               } else
-                       mad_error_handler(port_priv, &wc);
-       }
+       return true;
 }
 
 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2716,7 +2703,7 @@ static void local_completions(struct work_struct *work)
                         * before request
                         */
                        build_smp_wc(recv_mad_agent->agent.qp,
-                                    (unsigned long) local->mad_send_wr,
+                                    local->mad_send_wr->send_wr.wr.wr_cqe,
                                     be16_to_cpu(IB_LID_PERMISSIVE),
                                     local->mad_send_wr->send_wr.pkey_index,
                                     recv_mad_agent->agent.port_num, &wc);
@@ -2744,6 +2731,7 @@ static void local_completions(struct work_struct *work)
                                           IB_MAD_SNOOP_RECVS);
                        recv_mad_agent->agent.recv_handler(
                                                &recv_mad_agent->agent,
+                                               &local->mad_send_wr->send_buf,
                                                &local->mad_priv->header.recv_wc);
                        spin_lock_irqsave(&recv_mad_agent->lock, flags);
                        atomic_dec(&recv_mad_agent->refcount);
@@ -2855,17 +2843,6 @@ static void timeout_sends(struct work_struct *work)
        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
-{
-       struct ib_mad_port_private *port_priv = cq->cq_context;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ib_mad_port_list_lock, flags);
-       if (!list_empty(&port_priv->port_list))
-               queue_work(port_priv->wq, &port_priv->work);
-       spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-}
-
 /*
  * Allocate receive MADs and post receive WRs for them
  */
@@ -2913,8 +2890,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                        break;
                }
                mad_priv->header.mapping = sg_list.addr;
-               recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
                mad_priv->header.mad_list.mad_queue = recv_queue;
+               mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
+               recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
 
                /* Post receive WR */
                spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3151,7 +3129,6 @@ static int ib_mad_port_open(struct ib_device *device,
        unsigned long flags;
        char name[sizeof "ib_mad123"];
        int has_smi;
-       struct ib_cq_init_attr cq_attr = {};
 
        if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
                return -EFAULT;
@@ -3179,10 +3156,8 @@ static int ib_mad_port_open(struct ib_device *device,
        if (has_smi)
                cq_size *= 2;
 
-       cq_attr.cqe = cq_size;
-       port_priv->cq = ib_create_cq(port_priv->device,
-                                    ib_mad_thread_completion_handler,
-                                    NULL, port_priv, &cq_attr);
+       port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+                       IB_POLL_WORKQUEUE);
        if (IS_ERR(port_priv->cq)) {
                dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
                ret = PTR_ERR(port_priv->cq);
@@ -3211,7 +3186,6 @@ static int ib_mad_port_open(struct ib_device *device,
                ret = -ENOMEM;
                goto error8;
        }
-       INIT_WORK(&port_priv->work, ib_mad_completion_handler);
 
        spin_lock_irqsave(&ib_mad_port_list_lock, flags);
        list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3238,7 +3212,7 @@ error7:
 error6:
        ib_dealloc_pd(port_priv->pd);
 error4:
-       ib_destroy_cq(port_priv->cq);
+       ib_free_cq(port_priv->cq);
        cleanup_recv_queue(&port_priv->qp_info[1]);
        cleanup_recv_queue(&port_priv->qp_info[0]);
 error3:
@@ -3271,7 +3245,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
        destroy_mad_qp(&port_priv->qp_info[1]);
        destroy_mad_qp(&port_priv->qp_info[0]);
        ib_dealloc_pd(port_priv->pd);
-       ib_destroy_cq(port_priv->cq);
+       ib_free_cq(port_priv->cq);
        cleanup_recv_queue(&port_priv->qp_info[1]);
        cleanup_recv_queue(&port_priv->qp_info[0]);
        /* XXX: Handle deallocation of MAD registration tables */
index 990698a6ab4b7024116ae50c9417d00ce179b41c..28669f6419e1fba02b65a0d776c55b08d3cc4fbc 100644 (file)
@@ -64,6 +64,7 @@
 
 struct ib_mad_list_head {
        struct list_head list;
+       struct ib_cqe cqe;
        struct ib_mad_queue *mad_queue;
 };
 
@@ -204,7 +205,6 @@ struct ib_mad_port_private {
        struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
        struct list_head agent_list;
        struct workqueue_struct *wq;
-       struct work_struct work;
        struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
 
index bb6685fb08c61483546505f99037b88af6202ad0..250937cb9a1a5071f054a24e74414a8c90bd88a0 100644 (file)
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
 
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr)
 {
        int ret;
        u16 gid_index;
        u8 p;
 
-       ret = ib_find_cached_gid(device, &rec->port_gid,
-                                NULL, &p, &gid_index);
+       if (rdma_protocol_roce(device, port_num)) {
+               ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+                                                gid_type, port_num,
+                                                ndev,
+                                                &gid_index);
+       } else if (rdma_protocol_ib(device, port_num)) {
+               ret = ib_find_cached_gid(device, &rec->port_gid,
+                                        IB_GID_TYPE_IB, NULL, &p,
+                                        &gid_index);
+       } else {
+               ret = -EINVAL;
+       }
+
        if (ret)
                return ret;
 
index 178f98482e13e217c16d447978a855a900808bc0..06556c34606deb38e8906d08327a7d024f38079e 100644 (file)
@@ -67,17 +67,53 @@ struct netdev_event_work {
        struct netdev_event_work_cmd    cmds[ROCE_NETDEV_CALLBACK_SZ];
 };
 
+static const struct {
+       bool (*is_supported)(const struct ib_device *device, u8 port_num);
+       enum ib_gid_type gid_type;
+} PORT_CAP_TO_GID_TYPE[] = {
+       {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
+       {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
+};
+
+#define CAP_TO_GID_TABLE_SIZE  ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
+
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+{
+       int i;
+       unsigned int ret_flags = 0;
+
+       if (!rdma_protocol_roce(ib_dev, port))
+               return 1UL << IB_GID_TYPE_IB;
+
+       for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
+               if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
+                       ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
+
+       return ret_flags;
+}
+EXPORT_SYMBOL(roce_gid_type_mask_support);
+
 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
                       u8 port, union ib_gid *gid,
                       struct ib_gid_attr *gid_attr)
 {
-       switch (gid_op) {
-       case GID_ADD:
-               ib_cache_gid_add(ib_dev, port, gid, gid_attr);
-               break;
-       case GID_DEL:
-               ib_cache_gid_del(ib_dev, port, gid, gid_attr);
-               break;
+       int i;
+       unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+       for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+               if ((1UL << i) & gid_type_mask) {
+                       gid_attr->gid_type = i;
+                       switch (gid_op) {
+                       case GID_ADD:
+                               ib_cache_gid_add(ib_dev, port,
+                                                gid, gid_attr);
+                               break;
+                       case GID_DEL:
+                               ib_cache_gid_del(ib_dev, port,
+                                                gid, gid_attr);
+                               break;
+                       }
+               }
        }
 }
 
@@ -103,18 +139,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
        return BONDING_SLAVE_STATE_NA;
 }
 
-static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
-{
-       struct net_device *_upper = NULL;
-       struct list_head *iter;
-
-       netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
-               if (_upper == upper)
-                       break;
-
-       return _upper == upper;
-}
-
 #define REQUIRED_BOND_STATES           (BONDING_SLAVE_STATE_ACTIVE |   \
                                         BONDING_SLAVE_STATE_NA)
 static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
@@ -132,7 +156,7 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
        if (!real_dev)
                real_dev = event_ndev;
 
-       res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+       res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
               (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
                REQUIRED_BOND_STATES)) ||
               real_dev == rdma_ndev);
@@ -178,7 +202,7 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
                return 1;
 
        rcu_read_lock();
-       res = is_upper_dev_rcu(rdma_ndev, event_ndev);
+       res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
        rcu_read_unlock();
 
        return res;
@@ -203,10 +227,12 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
                                     u8 port, struct net_device *event_ndev,
                                     struct net_device *rdma_ndev)
 {
+       unsigned long gid_type_mask;
+
        rcu_read_lock();
        if (!rdma_ndev ||
            ((rdma_ndev != event_ndev &&
-             !is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+             !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
             is_eth_active_slave_of_bonding_rcu(rdma_ndev,
                                                netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
             BONDING_SLAVE_STATE_INACTIVE)) {
@@ -215,7 +241,9 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
        }
        rcu_read_unlock();
 
-       ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+       gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+       ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
                                     IB_CACHE_GID_DEFAULT_MODE_SET);
 }
 
@@ -234,12 +262,17 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 
        rcu_read_lock();
 
-       if (is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+       if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
            is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
            BONDING_SLAVE_STATE_INACTIVE) {
+               unsigned long gid_type_mask;
+
                rcu_read_unlock();
 
+               gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
                ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+                                            gid_type_mask,
                                             IB_CACHE_GID_DEFAULT_MODE_DELETE);
        } else {
                rcu_read_unlock();
index a95a32ba596edc03728cf7790a6752c951cde951..f334090bb6129bf7f3cfe788e5cff7bc762752c3 100644 (file)
@@ -49,7 +49,9 @@
 #include <net/netlink.h>
 #include <uapi/rdma/ib_user_sa.h>
 #include <rdma/ib_marshall.h>
+#include <rdma/ib_addr.h>
 #include "sa.h"
+#include "core_priv.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -715,7 +717,9 @@ static int ib_nl_handle_set_timeout(struct sk_buff *skb,
        struct nlattr *tb[LS_NLA_TYPE_MAX];
        int ret;
 
-       if (!netlink_capable(skb, CAP_NET_ADMIN))
+       if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
+           !(NETLINK_CB(skb).sk) ||
+           !netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;
 
        ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@@ -789,7 +793,9 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
        int found = 0;
        int ret;
 
-       if (!netlink_capable(skb, CAP_NET_ADMIN))
+       if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+           !(NETLINK_CB(skb).sk) ||
+           !netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;
 
        spin_lock_irqsave(&ib_nl_request_lock, flags);
@@ -996,7 +1002,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 {
        int ret;
        u16 gid_index;
-       int force_grh;
+       int use_roce;
+       struct net_device *ndev = NULL;
 
        memset(ah_attr, 0, sizeof *ah_attr);
        ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -1006,16 +1013,71 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
        ah_attr->port_num = port_num;
        ah_attr->static_rate = rec->rate;
 
-       force_grh = rdma_cap_eth_ah(device, port_num);
+       use_roce = rdma_cap_eth_ah(device, port_num);
+
+       if (use_roce) {
+               struct net_device *idev;
+               struct net_device *resolved_dev;
+               struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
+                                                .net = rec->net ? rec->net :
+                                                        &init_net};
+               union {
+                       struct sockaddr     _sockaddr;
+                       struct sockaddr_in  _sockaddr_in;
+                       struct sockaddr_in6 _sockaddr_in6;
+               } sgid_addr, dgid_addr;
+
+               if (!device->get_netdev)
+                       return -EOPNOTSUPP;
+
+               rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+               rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
+
+               /* validate the route */
+               ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+                                           &dgid_addr._sockaddr, &dev_addr);
+               if (ret)
+                       return ret;
 
-       if (rec->hop_limit > 1 || force_grh) {
-               struct net_device *ndev = ib_get_ndev_from_path(rec);
+               if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+                    dev_addr.network == RDMA_NETWORK_IPV6) &&
+                   rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       return -EINVAL;
+
+               idev = device->get_netdev(device, port_num);
+               if (!idev)
+                       return -ENODEV;
+
+               resolved_dev = dev_get_by_index(dev_addr.net,
+                                               dev_addr.bound_dev_if);
+               if (resolved_dev->flags & IFF_LOOPBACK) {
+                       dev_put(resolved_dev);
+                       resolved_dev = idev;
+                       dev_hold(resolved_dev);
+               }
+               ndev = ib_get_ndev_from_path(rec);
+               rcu_read_lock();
+               if ((ndev && ndev != resolved_dev) ||
+                   (resolved_dev != idev &&
+                    !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+                       ret = -EHOSTUNREACH;
+               rcu_read_unlock();
+               dev_put(idev);
+               dev_put(resolved_dev);
+               if (ret) {
+                       if (ndev)
+                               dev_put(ndev);
+                       return ret;
+               }
+       }
 
+       if (rec->hop_limit > 1 || use_roce) {
                ah_attr->ah_flags = IB_AH_GRH;
                ah_attr->grh.dgid = rec->dgid;
 
-               ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
-                                        &gid_index);
+               ret = ib_find_cached_gid_by_port(device, &rec->sgid,
+                                                rec->gid_type, port_num, ndev,
+                                                &gid_index);
                if (ret) {
                        if (ndev)
                                dev_put(ndev);
@@ -1029,9 +1091,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
                if (ndev)
                        dev_put(ndev);
        }
-       if (force_grh) {
+
+       if (use_roce)
                memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
-       }
+
        return 0;
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1157,6 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
                          mad->data, &rec);
                rec.net = NULL;
                rec.ifindex = 0;
+               rec.gid_type = IB_GID_TYPE_IB;
                memset(rec.dmac, 0, ETH_ALEN);
                query->callback(status, &rec, query->context);
        } else
@@ -1609,14 +1673,15 @@ static void send_handler(struct ib_mad_agent *agent,
 }
 
 static void recv_handler(struct ib_mad_agent *mad_agent,
+                        struct ib_mad_send_buf *send_buf,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct ib_sa_query *query;
-       struct ib_mad_send_buf *mad_buf;
 
-       mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
-       query = mad_buf->context[0];
+       if (!send_buf)
+               return;
 
+       query = send_buf->context[0];
        if (query->callback) {
                if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
                        query->callback(query,
index b1f37d4095fa1e15f7402c35a367b00e1f24b6b5..14606afbfaa8d6c865d28a545c292146ffc1b4bc 100644 (file)
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/string.h>
+#include <linux/netdevice.h>
 
 #include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
 
+struct ib_port;
+
+struct gid_attr_group {
+       struct ib_port          *port;
+       struct kobject          kobj;
+       struct attribute_group  ndev;
+       struct attribute_group  type;
+};
 struct ib_port {
        struct kobject         kobj;
        struct ib_device      *ibdev;
+       struct gid_attr_group *gid_attr_group;
        struct attribute_group gid_group;
        struct attribute_group pkey_group;
        u8                     port_num;
+       struct attribute_group *pma_table;
 };
 
 struct port_attribute {
@@ -65,6 +77,7 @@ struct port_table_attribute {
        struct port_attribute   attr;
        char                    name[8];
        int                     index;
+       __be16                  attr_id;
 };
 
 static ssize_t port_attr_show(struct kobject *kobj,
@@ -84,6 +97,24 @@ static const struct sysfs_ops port_sysfs_ops = {
        .show = port_attr_show
 };
 
+static ssize_t gid_attr_show(struct kobject *kobj,
+                            struct attribute *attr, char *buf)
+{
+       struct port_attribute *port_attr =
+               container_of(attr, struct port_attribute, attr);
+       struct ib_port *p = container_of(kobj, struct gid_attr_group,
+                                        kobj)->port;
+
+       if (!port_attr->show)
+               return -EIO;
+
+       return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops gid_attr_sysfs_ops = {
+       .show = gid_attr_show
+};
+
 static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
                          char *buf)
 {
@@ -281,6 +312,44 @@ static struct attribute *port_default_attrs[] = {
        NULL
 };
 
+static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+{
+       if (!gid_attr->ndev)
+               return -EINVAL;
+
+       return sprintf(buf, "%s\n", gid_attr->ndev->name);
+}
+
+static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+{
+       return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+}
+
+static ssize_t _show_port_gid_attr(struct ib_port *p,
+                                  struct port_attribute *attr,
+                                  char *buf,
+                                  size_t (*print)(struct ib_gid_attr *gid_attr,
+                                                  char *buf))
+{
+       struct port_table_attribute *tab_attr =
+               container_of(attr, struct port_table_attribute, attr);
+       union ib_gid gid;
+       struct ib_gid_attr gid_attr = {};
+       ssize_t ret;
+
+       ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
+                          &gid_attr);
+       if (ret)
+               goto err;
+
+       ret = print(&gid_attr, buf);
+
+err:
+       if (gid_attr.ndev)
+               dev_put(gid_attr.ndev);
+       return ret;
+}
+
 static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
                             char *buf)
 {
@@ -296,6 +365,19 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
        return sprintf(buf, "%pI6\n", gid.raw);
 }
 
+static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
+                                      struct port_attribute *attr, char *buf)
+{
+       return _show_port_gid_attr(p, attr, buf, print_ndev);
+}
+
+static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
+                                          struct port_attribute *attr,
+                                          char *buf)
+{
+       return _show_port_gid_attr(p, attr, buf, print_gid_type);
+}
+
 static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
                              char *buf)
 {
@@ -314,24 +396,32 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
 #define PORT_PMA_ATTR(_name, _counter, _width, _offset)                        \
 struct port_table_attribute port_pma_attr_##_name = {                  \
        .attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),        \
-       .index = (_offset) | ((_width) << 16) | ((_counter) << 24)      \
+       .index = (_offset) | ((_width) << 16) | ((_counter) << 24),     \
+       .attr_id = IB_PMA_PORT_COUNTERS ,                               \
 }
 
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
-                               char *buf)
+#define PORT_PMA_ATTR_EXT(_name, _width, _offset)                      \
+struct port_table_attribute port_pma_attr_ext_##_name = {              \
+       .attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),        \
+       .index = (_offset) | ((_width) << 16),                          \
+       .attr_id = IB_PMA_PORT_COUNTERS_EXT ,                           \
+}
+
+/*
+ * Get a Perfmgmt MAD block of data.
+ * Returns error code or the number of bytes retrieved.
+ */
+static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
+               void *data, int offset, size_t size)
 {
-       struct port_table_attribute *tab_attr =
-               container_of(attr, struct port_table_attribute, attr);
-       int offset = tab_attr->index & 0xffff;
-       int width  = (tab_attr->index >> 16) & 0xff;
-       struct ib_mad *in_mad  = NULL;
-       struct ib_mad *out_mad = NULL;
+       struct ib_mad *in_mad;
+       struct ib_mad *out_mad;
        size_t mad_size = sizeof(*out_mad);
        u16 out_mad_pkey_index = 0;
        ssize_t ret;
 
-       if (!p->ibdev->process_mad)
-               return sprintf(buf, "N/A (no PMA)\n");
+       if (!dev->process_mad)
+               return -ENOSYS;
 
        in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
        out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -344,12 +434,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
        in_mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_PERF_MGMT;
        in_mad->mad_hdr.class_version = 1;
        in_mad->mad_hdr.method        = IB_MGMT_METHOD_GET;
-       in_mad->mad_hdr.attr_id       = cpu_to_be16(0x12); /* PortCounters */
+       in_mad->mad_hdr.attr_id       = attr;
 
-       in_mad->data[41] = p->port_num; /* PortSelect field */
+       if (attr != IB_PMA_CLASS_PORT_INFO)
+               in_mad->data[41] = port_num;    /* PortSelect field */
 
-       if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
-                p->port_num, NULL, NULL,
+       if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
+                port_num, NULL, NULL,
                 (const struct ib_mad_hdr *)in_mad, mad_size,
                 (struct ib_mad_hdr *)out_mad, &mad_size,
                 &out_mad_pkey_index) &
@@ -358,31 +449,54 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
                ret = -EINVAL;
                goto out;
        }
+       memcpy(data, out_mad->data + offset, size);
+       ret = size;
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return ret;
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+                               char *buf)
+{
+       struct port_table_attribute *tab_attr =
+               container_of(attr, struct port_table_attribute, attr);
+       int offset = tab_attr->index & 0xffff;
+       int width  = (tab_attr->index >> 16) & 0xff;
+       ssize_t ret;
+       u8 data[8];
+
+       ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+                       40 + offset / 8, sizeof(data));
+       if (ret < 0)
+               return sprintf(buf, "N/A (no PMA)\n");
 
        switch (width) {
        case 4:
-               ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+               ret = sprintf(buf, "%u\n", (*data >>
                                            (4 - (offset % 8))) & 0xf);
                break;
        case 8:
-               ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+               ret = sprintf(buf, "%u\n", *data);
                break;
        case 16:
                ret = sprintf(buf, "%u\n",
-                             be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+                             be16_to_cpup((__be16 *)data));
                break;
        case 32:
                ret = sprintf(buf, "%u\n",
-                             be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+                             be32_to_cpup((__be32 *)data));
+               break;
+       case 64:
+               ret = sprintf(buf, "%llu\n",
+                               be64_to_cpup((__be64 *)data));
                break;
+
        default:
                ret = 0;
        }
 
-out:
-       kfree(in_mad);
-       kfree(out_mad);
-
        return ret;
 }
 
@@ -403,6 +517,18 @@ static PORT_PMA_ATTR(port_rcv_data             , 13, 32, 224);
 static PORT_PMA_ATTR(port_xmit_packets             , 14, 32, 256);
 static PORT_PMA_ATTR(port_rcv_packets              , 15, 32, 288);
 
+/*
+ * Counters added by extended set
+ */
+static PORT_PMA_ATTR_EXT(port_xmit_data                    , 64,  64);
+static PORT_PMA_ATTR_EXT(port_rcv_data             , 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets         , 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets          , 64, 256);
+static PORT_PMA_ATTR_EXT(unicast_xmit_packets      , 64, 320);
+static PORT_PMA_ATTR_EXT(unicast_rcv_packets       , 64, 384);
+static PORT_PMA_ATTR_EXT(multicast_xmit_packets            , 64, 448);
+static PORT_PMA_ATTR_EXT(multicast_rcv_packets     , 64, 512);
+
 static struct attribute *pma_attrs[] = {
        &port_pma_attr_symbol_error.attr.attr,
        &port_pma_attr_link_error_recovery.attr.attr,
@@ -423,11 +549,65 @@ static struct attribute *pma_attrs[] = {
        NULL
 };
 
+static struct attribute *pma_attrs_ext[] = {
+       &port_pma_attr_symbol_error.attr.attr,
+       &port_pma_attr_link_error_recovery.attr.attr,
+       &port_pma_attr_link_downed.attr.attr,
+       &port_pma_attr_port_rcv_errors.attr.attr,
+       &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+       &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+       &port_pma_attr_port_xmit_discards.attr.attr,
+       &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+       &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+       &port_pma_attr_local_link_integrity_errors.attr.attr,
+       &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+       &port_pma_attr_VL15_dropped.attr.attr,
+       &port_pma_attr_ext_port_xmit_data.attr.attr,
+       &port_pma_attr_ext_port_rcv_data.attr.attr,
+       &port_pma_attr_ext_port_xmit_packets.attr.attr,
+       &port_pma_attr_ext_port_rcv_packets.attr.attr,
+       &port_pma_attr_ext_unicast_rcv_packets.attr.attr,
+       &port_pma_attr_ext_unicast_xmit_packets.attr.attr,
+       &port_pma_attr_ext_multicast_rcv_packets.attr.attr,
+       &port_pma_attr_ext_multicast_xmit_packets.attr.attr,
+       NULL
+};
+
+static struct attribute *pma_attrs_noietf[] = {
+       &port_pma_attr_symbol_error.attr.attr,
+       &port_pma_attr_link_error_recovery.attr.attr,
+       &port_pma_attr_link_downed.attr.attr,
+       &port_pma_attr_port_rcv_errors.attr.attr,
+       &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+       &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+       &port_pma_attr_port_xmit_discards.attr.attr,
+       &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+       &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+       &port_pma_attr_local_link_integrity_errors.attr.attr,
+       &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+       &port_pma_attr_VL15_dropped.attr.attr,
+       &port_pma_attr_ext_port_xmit_data.attr.attr,
+       &port_pma_attr_ext_port_rcv_data.attr.attr,
+       &port_pma_attr_ext_port_xmit_packets.attr.attr,
+       &port_pma_attr_ext_port_rcv_packets.attr.attr,
+       NULL
+};
+
 static struct attribute_group pma_group = {
        .name  = "counters",
        .attrs  = pma_attrs
 };
 
+static struct attribute_group pma_group_ext = {
+       .name  = "counters",
+       .attrs  = pma_attrs_ext
+};
+
+static struct attribute_group pma_group_noietf = {
+       .name  = "counters",
+       .attrs  = pma_attrs_noietf
+};
+
 static void ib_port_release(struct kobject *kobj)
 {
        struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -451,12 +631,41 @@ static void ib_port_release(struct kobject *kobj)
        kfree(p);
 }
 
+static void ib_port_gid_attr_release(struct kobject *kobj)
+{
+       struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
+                                               kobj);
+       struct attribute *a;
+       int i;
+
+       if (g->ndev.attrs) {
+               for (i = 0; (a = g->ndev.attrs[i]); ++i)
+                       kfree(a);
+
+               kfree(g->ndev.attrs);
+       }
+
+       if (g->type.attrs) {
+               for (i = 0; (a = g->type.attrs[i]); ++i)
+                       kfree(a);
+
+               kfree(g->type.attrs);
+       }
+
+       kfree(g);
+}
+
 static struct kobj_type port_type = {
        .release       = ib_port_release,
        .sysfs_ops     = &port_sysfs_ops,
        .default_attrs = port_default_attrs
 };
 
+static struct kobj_type gid_attr_type = {
+       .sysfs_ops      = &gid_attr_sysfs_ops,
+       .release        = ib_port_gid_attr_release
+};
+
 static struct attribute **
 alloc_group_attrs(ssize_t (*show)(struct ib_port *,
                                  struct port_attribute *, char *buf),
@@ -500,6 +709,30 @@ err:
        return NULL;
 }
 
+/*
+ * Figure out which counter table to use depending on
+ * the device capabilities.
+ */
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+                                                int port_num)
+{
+       struct ib_class_port_info cpi;
+
+       if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
+                               &cpi, 40, sizeof(cpi)) >= 0) {
+               if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH)
+                       /* We have extended counters */
+                       return &pma_group_ext;
+
+               if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
+                       /* But not the IETF ones */
+                       return &pma_group_noietf;
+       }
+
+       /* Fall back to normal counters */
+       return &pma_group;
+}
+
 static int add_port(struct ib_device *device, int port_num,
                    int (*port_callback)(struct ib_device *,
                                         u8, struct kobject *))
@@ -528,9 +761,24 @@ static int add_port(struct ib_device *device, int port_num,
                return ret;
        }
 
-       ret = sysfs_create_group(&p->kobj, &pma_group);
-       if (ret)
+       p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
+       if (!p->gid_attr_group) {
+               ret = -ENOMEM;
                goto err_put;
+       }
+
+       p->gid_attr_group->port = p;
+       ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
+                                  &p->kobj, "gid_attrs");
+       if (ret) {
+               kfree(p->gid_attr_group);
+               goto err_put;
+       }
+
+       p->pma_table = get_counter_table(device, port_num);
+       ret = sysfs_create_group(&p->kobj, p->pma_table);
+       if (ret)
+               goto err_put_gid_attrs;
 
        p->gid_group.name  = "gids";
        p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -543,12 +791,38 @@ static int add_port(struct ib_device *device, int port_num,
        if (ret)
                goto err_free_gid;
 
+       p->gid_attr_group->ndev.name = "ndevs";
+       p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
+                                                         attr.gid_tbl_len);
+       if (!p->gid_attr_group->ndev.attrs) {
+               ret = -ENOMEM;
+               goto err_remove_gid;
+       }
+
+       ret = sysfs_create_group(&p->gid_attr_group->kobj,
+                                &p->gid_attr_group->ndev);
+       if (ret)
+               goto err_free_gid_ndev;
+
+       p->gid_attr_group->type.name = "types";
+       p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
+                                                         attr.gid_tbl_len);
+       if (!p->gid_attr_group->type.attrs) {
+               ret = -ENOMEM;
+               goto err_remove_gid_ndev;
+       }
+
+       ret = sysfs_create_group(&p->gid_attr_group->kobj,
+                                &p->gid_attr_group->type);
+       if (ret)
+               goto err_free_gid_type;
+
        p->pkey_group.name  = "pkeys";
        p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
                                                attr.pkey_tbl_len);
        if (!p->pkey_group.attrs) {
                ret = -ENOMEM;
-               goto err_remove_gid;
+               goto err_remove_gid_type;
        }
 
        ret = sysfs_create_group(&p->kobj, &p->pkey_group);
@@ -576,6 +850,28 @@ err_free_pkey:
        kfree(p->pkey_group.attrs);
        p->pkey_group.attrs = NULL;
 
+err_remove_gid_type:
+       sysfs_remove_group(&p->gid_attr_group->kobj,
+                          &p->gid_attr_group->type);
+
+err_free_gid_type:
+       for (i = 0; i < attr.gid_tbl_len; ++i)
+               kfree(p->gid_attr_group->type.attrs[i]);
+
+       kfree(p->gid_attr_group->type.attrs);
+       p->gid_attr_group->type.attrs = NULL;
+
+err_remove_gid_ndev:
+       sysfs_remove_group(&p->gid_attr_group->kobj,
+                          &p->gid_attr_group->ndev);
+
+err_free_gid_ndev:
+       for (i = 0; i < attr.gid_tbl_len; ++i)
+               kfree(p->gid_attr_group->ndev.attrs[i]);
+
+       kfree(p->gid_attr_group->ndev.attrs);
+       p->gid_attr_group->ndev.attrs = NULL;
+
 err_remove_gid:
        sysfs_remove_group(&p->kobj, &p->gid_group);
 
@@ -587,7 +883,10 @@ err_free_gid:
        p->gid_group.attrs = NULL;
 
 err_remove_pma:
-       sysfs_remove_group(&p->kobj, &pma_group);
+       sysfs_remove_group(&p->kobj, p->pma_table);
+
+err_put_gid_attrs:
+       kobject_put(&p->gid_attr_group->kobj);
 
 err_put:
        kobject_put(&p->kobj);
@@ -614,18 +913,12 @@ static ssize_t show_sys_image_guid(struct device *device,
                                   struct device_attribute *dev_attr, char *buf)
 {
        struct ib_device *dev = container_of(device, struct ib_device, dev);
-       struct ib_device_attr attr;
-       ssize_t ret;
-
-       ret = ib_query_device(dev, &attr);
-       if (ret)
-               return ret;
 
        return sprintf(buf, "%04x:%04x:%04x:%04x\n",
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
 }
 
 static ssize_t show_node_guid(struct device *device,
@@ -800,9 +1093,14 @@ static void free_port_list_attributes(struct ib_device *device)
        list_for_each_entry_safe(p, t, &device->port_list, entry) {
                struct ib_port *port = container_of(p, struct ib_port, kobj);
                list_del(&p->entry);
-               sysfs_remove_group(p, &pma_group);
+               sysfs_remove_group(p, port->pma_table);
                sysfs_remove_group(p, &port->pkey_group);
                sysfs_remove_group(p, &port->gid_group);
+               sysfs_remove_group(&port->gid_attr_group->kobj,
+                                  &port->gid_attr_group->ndev);
+               sysfs_remove_group(&port->gid_attr_group->kobj,
+                                  &port->gid_attr_group->type);
+               kobject_put(&port->gid_attr_group->kobj);
                kobject_put(p);
        }
 
index 72feee620ebfb33300a2121a2a22d0fc1d1eafb3..2116132568e70e61c8465ffc9d922d732aca28ed 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/string.h>
 #include <linux/export.h>
 #include <linux/if_ether.h>
+#include <linux/ip.h>
 
 #include <rdma/ib_pack.h>
 
@@ -116,6 +117,72 @@ static const struct ib_field vlan_table[]  = {
          .size_bits    = 16 }
 };
 
+static const struct ib_field ip4_table[]  = {
+       { STRUCT_FIELD(ip4, ver),
+         .offset_words = 0,
+         .offset_bits  = 0,
+         .size_bits    = 4 },
+       { STRUCT_FIELD(ip4, hdr_len),
+         .offset_words = 0,
+         .offset_bits  = 4,
+         .size_bits    = 4 },
+       { STRUCT_FIELD(ip4, tos),
+         .offset_words = 0,
+         .offset_bits  = 8,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, tot_len),
+         .offset_words = 0,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, id),
+         .offset_words = 1,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, frag_off),
+         .offset_words = 1,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, ttl),
+         .offset_words = 2,
+         .offset_bits  = 0,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, protocol),
+         .offset_words = 2,
+         .offset_bits  = 8,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, check),
+         .offset_words = 2,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, saddr),
+         .offset_words = 3,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+       { STRUCT_FIELD(ip4, daddr),
+         .offset_words = 4,
+         .offset_bits  = 0,
+         .size_bits    = 32 }
+};
+
+static const struct ib_field udp_table[]  = {
+       { STRUCT_FIELD(udp, sport),
+         .offset_words = 0,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, dport),
+         .offset_words = 0,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, length),
+         .offset_words = 1,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, csum),
+         .offset_words = 1,
+         .offset_bits  = 16,
+         .size_bits    = 16 }
+};
+
 static const struct ib_field grh_table[]  = {
        { STRUCT_FIELD(grh, ip_version),
          .offset_words = 0,
@@ -213,26 +280,59 @@ static const struct ib_field deth_table[] = {
          .size_bits    = 24 }
 };
 
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header)
+{
+       struct iphdr iph;
+
+       iph.ihl         = 5;
+       iph.version     = 4;
+       iph.tos         = header->ip4.tos;
+       iph.tot_len     = header->ip4.tot_len;
+       iph.id          = header->ip4.id;
+       iph.frag_off    = header->ip4.frag_off;
+       iph.ttl         = header->ip4.ttl;
+       iph.protocol    = header->ip4.protocol;
+       iph.check       = 0;
+       iph.saddr       = header->ip4.saddr;
+       iph.daddr       = header->ip4.daddr;
+
+       return ip_fast_csum((u8 *)&iph, iph.ihl);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);
+
 /**
  * ib_ud_header_init - Initialize UD header structure
  * @payload_bytes:Length of packet payload
  * @lrh_present: specify if LRH is present
  * @eth_present: specify if Eth header is present
  * @vlan_present: packet is tagged vlan
- * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @grh_present: GRH flag (if non-zero, GRH will be included)
+ * @ip_version: if non-zero, IP header, V4 or V6, will be included
+ * @udp_present :if non-zero, UDP header will be included
  * @immediate_present: specify if immediate data is present
  * @header:Structure to initialize
  */
-void ib_ud_header_init(int                         payload_bytes,
-                      int                  lrh_present,
-                      int                  eth_present,
-                      int                  vlan_present,
-                      int                  grh_present,
-                      int                  immediate_present,
-                      struct ib_ud_header *header)
+int ib_ud_header_init(int     payload_bytes,
+                     int    lrh_present,
+                     int    eth_present,
+                     int    vlan_present,
+                     int    grh_present,
+                     int    ip_version,
+                     int    udp_present,
+                     int    immediate_present,
+                     struct ib_ud_header *header)
 {
+       size_t udp_bytes = udp_present ? IB_UDP_BYTES : 0;
+
+       grh_present = grh_present && !ip_version;
        memset(header, 0, sizeof *header);
 
+       /*
+        * UDP header without IP header doesn't make sense
+        */
+       if (udp_present && ip_version != 4 && ip_version != 6)
+               return -EINVAL;
+
        if (lrh_present) {
                u16 packet_length;
 
@@ -252,16 +352,37 @@ void ib_ud_header_init(int                    payload_bytes,
        if (vlan_present)
                header->eth.type = cpu_to_be16(ETH_P_8021Q);
 
-       if (grh_present) {
+       if (ip_version == 6 || grh_present) {
                header->grh.ip_version      = 6;
                header->grh.payload_length  =
-                       cpu_to_be16((IB_BTH_BYTES     +
+                       cpu_to_be16((udp_bytes        +
+                                    IB_BTH_BYTES     +
                                     IB_DETH_BYTES    +
                                     payload_bytes    +
                                     4                + /* ICRC     */
                                     3) & ~3);          /* round up */
-               header->grh.next_header     = 0x1b;
+               header->grh.next_header     = udp_present ? IPPROTO_UDP : 0x1b;
+       }
+
+       if (ip_version == 4) {
+               header->ip4.ver = 4; /* version 4 */
+               header->ip4.hdr_len = 5; /* 5 words */
+               header->ip4.tot_len =
+                       cpu_to_be16(IB_IP4_BYTES   +
+                                    udp_bytes     +
+                                    IB_BTH_BYTES  +
+                                    IB_DETH_BYTES +
+                                    payload_bytes +
+                                    4);     /* ICRC     */
+               header->ip4.protocol = IPPROTO_UDP;
        }
+       if (udp_present && ip_version)
+               header->udp.length =
+                       cpu_to_be16(IB_UDP_BYTES   +
+                                    IB_BTH_BYTES  +
+                                    IB_DETH_BYTES +
+                                    payload_bytes +
+                                    4);     /* ICRC     */
 
        if (immediate_present)
                header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
@@ -273,8 +394,11 @@ void ib_ud_header_init(int                     payload_bytes,
        header->lrh_present = lrh_present;
        header->eth_present = eth_present;
        header->vlan_present = vlan_present;
-       header->grh_present = grh_present;
+       header->grh_present = grh_present || (ip_version == 6);
+       header->ipv4_present = ip_version == 4;
+       header->udp_present = udp_present;
        header->immediate_present = immediate_present;
+       return 0;
 }
 EXPORT_SYMBOL(ib_ud_header_init);
 
@@ -311,6 +435,16 @@ int ib_ud_header_pack(struct ib_ud_header *header,
                        &header->grh, buf + len);
                len += IB_GRH_BYTES;
        }
+       if (header->ipv4_present) {
+               ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
+                       &header->ip4, buf + len);
+               len += IB_IP4_BYTES;
+       }
+       if (header->udp_present) {
+               ib_pack(udp_table, ARRAY_SIZE(udp_table),
+                       &header->udp, buf + len);
+               len += IB_UDP_BYTES;
+       }
 
        ib_pack(bth_table, ARRAY_SIZE(bth_table),
                &header->bth, buf + len);
index 40becdb3196e07b97c94ade818af5755bfaae4db..e69bf266049d0117830577167c2987c83cd8ac62 100644 (file)
@@ -232,7 +232,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
        ib_ucontext_notifier_end_account(context);
 }
 
-static struct mmu_notifier_ops ib_umem_notifiers = {
+static const struct mmu_notifier_ops ib_umem_notifiers = {
        .release                    = ib_umem_notifier_release,
        .invalidate_page            = ib_umem_notifier_invalidate_page,
        .invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
index 57f281f8d686224b7a40488330b78ac38943270c..415a3185cde7f45dfc583180ef1a4ef81fec8699 100644 (file)
@@ -210,6 +210,7 @@ static void send_handler(struct ib_mad_agent *agent,
 }
 
 static void recv_handler(struct ib_mad_agent *agent,
+                        struct ib_mad_send_buf *send_buf,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct ib_umad_file *file = agent->context;
index 94bbd8c155fcca1daf5f2e9ee0fc65552821ff3f..612ccfd39bf98181693f07c5dfcc661dc8191e39 100644 (file)
@@ -204,6 +204,8 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
                             struct ib_event *event);
 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
 
+int uverbs_dealloc_mw(struct ib_mw *mw);
+
 struct ib_uverbs_flow_spec {
        union {
                union {
index 1c02deab068fbf1031d5b21db960feb9b06865c5..6ffc9c4e93afb4efa27fe6f3c17fc3cdf41d8c21 100644 (file)
@@ -291,9 +291,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        struct ib_uverbs_get_context      cmd;
        struct ib_uverbs_get_context_resp resp;
        struct ib_udata                   udata;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       struct ib_device_attr             dev_attr;
-#endif
        struct ib_ucontext               *ucontext;
        struct file                      *filp;
        int ret;
@@ -342,10 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        ucontext->odp_mrs_count = 0;
        INIT_LIST_HEAD(&ucontext->no_private_counters);
 
-       ret = ib_query_device(ib_dev, &dev_attr);
-       if (ret)
-               goto err_free;
-       if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+       if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
                ucontext->invalidate_range = NULL;
 
 #endif
@@ -447,8 +441,6 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 {
        struct ib_uverbs_query_device      cmd;
        struct ib_uverbs_query_device_resp resp;
-       struct ib_device_attr              attr;
-       int                                ret;
 
        if (out_len < sizeof resp)
                return -ENOSPC;
@@ -456,12 +448,8 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       ret = ib_query_device(ib_dev, &attr);
-       if (ret)
-               return ret;
-
        memset(&resp, 0, sizeof resp);
-       copy_query_dev_fields(file, ib_dev, &resp, &attr);
+       copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
@@ -986,11 +974,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        }
 
        if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
-               struct ib_device_attr attr;
-
-               ret = ib_query_device(pd->device, &attr);
-               if (ret || !(attr.device_cap_flags &
-                               IB_DEVICE_ON_DEMAND_PAGING)) {
+               if (!(pd->device->attrs.device_cap_flags &
+                     IB_DEVICE_ON_DEMAND_PAGING)) {
                        pr_debug("ODP support not available\n");
                        ret = -EINVAL;
                        goto err_put;
@@ -1008,7 +993,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        mr->pd      = pd;
        mr->uobject = uobj;
        atomic_inc(&pd->usecnt);
-       atomic_set(&mr->usecnt, 0);
 
        uobj->object = mr;
        ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
@@ -1106,11 +1090,6 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
                }
        }
 
-       if (atomic_read(&mr->usecnt)) {
-               ret = -EBUSY;
-               goto put_uobj_pd;
-       }
-
        old_pd = mr->pd;
        ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
                                        cmd.length, cmd.hca_va,
@@ -1258,7 +1237,7 @@ err_copy:
        idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
 
 err_unalloc:
-       ib_dealloc_mw(mw);
+       uverbs_dealloc_mw(mw);
 
 err_put:
        put_pd_read(pd);
@@ -1287,7 +1266,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 
        mw = uobj->object;
 
-       ret = ib_dealloc_mw(mw);
+       ret = uverbs_dealloc_mw(mw);
        if (!ret)
                uobj->live = 0;
 
@@ -1845,7 +1824,10 @@ static int create_qp(struct ib_uverbs_file *file,
                      sizeof(cmd->create_flags))
                attr.create_flags = cmd->create_flags;
 
-       if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+       if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+                               IB_QP_CREATE_CROSS_CHANNEL |
+                               IB_QP_CREATE_MANAGED_SEND |
+                               IB_QP_CREATE_MANAGED_RECV)) {
                ret = -EINVAL;
                goto err_put;
        }
index e3ef28861be63dd453d0fd1b55ec21f939919624..39680aed99dd0d4593f91317e666393c562d33d3 100644 (file)
@@ -133,6 +133,17 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
 
+int uverbs_dealloc_mw(struct ib_mw *mw)
+{
+       struct ib_pd *pd = mw->pd;
+       int ret;
+
+       ret = mw->device->dealloc_mw(mw);
+       if (!ret)
+               atomic_dec(&pd->usecnt);
+       return ret;
+}
+
 static void ib_uverbs_release_dev(struct kobject *kobj)
 {
        struct ib_uverbs_device *dev =
@@ -224,7 +235,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                struct ib_mw *mw = uobj->object;
 
                idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-               ib_dealloc_mw(mw);
+               uverbs_dealloc_mw(mw);
                kfree(uobj);
        }
 
index 7d2f14c9bbefaf1f3c28eac11b7bcefbc2802927..af020f80d50f4888436db49f935f29a0f5f5b22e 100644 (file)
@@ -144,5 +144,6 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
        memset(dst->dmac, 0, sizeof(dst->dmac));
        dst->net = NULL;
        dst->ifindex = 0;
+       dst->gid_type = IB_GID_TYPE_IB;
 }
 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
index 545906dec26dc5e936df37c635c68b94bb27e226..5af6d024e0538a9eb63049ef7009ad7dab09a967 100644 (file)
@@ -229,12 +229,6 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
 {
        struct ib_pd *pd;
-       struct ib_device_attr devattr;
-       int rc;
-
-       rc = ib_query_device(device, &devattr);
-       if (rc)
-               return ERR_PTR(rc);
 
        pd = device->alloc_pd(device, NULL, NULL);
        if (IS_ERR(pd))
@@ -245,7 +239,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
        pd->local_mr = NULL;
        atomic_set(&pd->usecnt, 0);
 
-       if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+       if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
                pd->local_dma_lkey = device->local_dma_lkey;
        else {
                struct ib_mr *mr;
@@ -311,8 +305,61 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
+static int ib_get_header_version(const union rdma_network_hdr *hdr)
+{
+       const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
+       struct iphdr ip4h_checked;
+       const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
+
+       /* If it's IPv6, the version must be 6, otherwise, the first
+        * 20 bytes (before the IPv4 header) are garbled.
+        */
+       if (ip6h->version != 6)
+               return (ip4h->version == 4) ? 4 : 0;
+       /* version may be 6 or 4 because the first 20 bytes could be garbled */
+
+       /* RoCE v2 requires no options, thus header length
+        * must be 5 words
+        */
+       if (ip4h->ihl != 5)
+               return 6;
+
+       /* Verify checksum.
+        * We can't write on scattered buffers so we need to copy to
+        * temp buffer.
+        */
+       memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
+       ip4h_checked.check = 0;
+       ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
+       /* if IPv4 header checksum is OK, believe it */
+       if (ip4h->check == ip4h_checked.check)
+               return 4;
+       return 6;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+                                                    u8 port_num,
+                                                    const struct ib_grh *grh)
+{
+       int grh_version;
+
+       if (rdma_protocol_ib(device, port_num))
+               return RDMA_NETWORK_IB;
+
+       grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+
+       if (grh_version == 4)
+               return RDMA_NETWORK_IPV4;
+
+       if (grh->next_hdr == IPPROTO_UDP)
+               return RDMA_NETWORK_IPV6;
+
+       return RDMA_NETWORK_ROCE_V1;
+}
+
 struct find_gid_index_context {
        u16 vlan_id;
+       enum ib_gid_type gid_type;
 };
 
 static bool find_gid_index(const union ib_gid *gid,
@@ -322,6 +369,9 @@ static bool find_gid_index(const union ib_gid *gid,
        struct find_gid_index_context *ctx =
                (struct find_gid_index_context *)context;
 
+       if (ctx->gid_type != gid_attr->gid_type)
+               return false;
+
        if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
            (is_vlan_dev(gid_attr->ndev) &&
             vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
@@ -332,14 +382,49 @@ static bool find_gid_index(const union ib_gid *gid,
 
 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
                                   u16 vlan_id, const union ib_gid *sgid,
+                                  enum ib_gid_type gid_type,
                                   u16 *gid_index)
 {
-       struct find_gid_index_context context = {.vlan_id = vlan_id};
+       struct find_gid_index_context context = {.vlan_id = vlan_id,
+                                                .gid_type = gid_type};
 
        return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
                                     &context, gid_index);
 }
 
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+                                 enum rdma_network_type net_type,
+                                 union ib_gid *sgid, union ib_gid *dgid)
+{
+       struct sockaddr_in  src_in;
+       struct sockaddr_in  dst_in;
+       __be32 src_saddr, dst_saddr;
+
+       if (!sgid || !dgid)
+               return -EINVAL;
+
+       if (net_type == RDMA_NETWORK_IPV4) {
+               memcpy(&src_in.sin_addr.s_addr,
+                      &hdr->roce4grh.saddr, 4);
+               memcpy(&dst_in.sin_addr.s_addr,
+                      &hdr->roce4grh.daddr, 4);
+               src_saddr = src_in.sin_addr.s_addr;
+               dst_saddr = dst_in.sin_addr.s_addr;
+               ipv6_addr_set_v4mapped(src_saddr,
+                                      (struct in6_addr *)sgid);
+               ipv6_addr_set_v4mapped(dst_saddr,
+                                      (struct in6_addr *)dgid);
+               return 0;
+       } else if (net_type == RDMA_NETWORK_IPV6 ||
+                  net_type == RDMA_NETWORK_IB) {
+               *dgid = hdr->ibgrh.dgid;
+               *sgid = hdr->ibgrh.sgid;
+               return 0;
+       } else {
+               return -EINVAL;
+       }
+}
+
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                       const struct ib_wc *wc, const struct ib_grh *grh,
                       struct ib_ah_attr *ah_attr)
@@ -347,33 +432,72 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
        u32 flow_class;
        u16 gid_index;
        int ret;
+       enum rdma_network_type net_type = RDMA_NETWORK_IB;
+       enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+       int hoplimit = 0xff;
+       union ib_gid dgid;
+       union ib_gid sgid;
 
        memset(ah_attr, 0, sizeof *ah_attr);
        if (rdma_cap_eth_ah(device, port_num)) {
+               if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
+                       net_type = wc->network_hdr_type;
+               else
+                       net_type = ib_get_net_type_by_grh(device, port_num, grh);
+               gid_type = ib_network_to_gid_type(net_type);
+       }
+       ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+                                    &sgid, &dgid);
+       if (ret)
+               return ret;
+
+       if (rdma_protocol_roce(device, port_num)) {
+               int if_index = 0;
                u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
                                wc->vlan_id : 0xffff;
+               struct net_device *idev;
+               struct net_device *resolved_dev;
 
                if (!(wc->wc_flags & IB_WC_GRH))
                        return -EPROTOTYPE;
 
-               if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
-                   !(wc->wc_flags & IB_WC_WITH_VLAN)) {
-                       ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
-                                                        ah_attr->dmac,
-                                                        wc->wc_flags & IB_WC_WITH_VLAN ?
-                                                        NULL : &vlan_id,
-                                                        0);
-                       if (ret)
-                               return ret;
+               if (!device->get_netdev)
+                       return -EOPNOTSUPP;
+
+               idev = device->get_netdev(device, port_num);
+               if (!idev)
+                       return -ENODEV;
+
+               ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
+                                                  ah_attr->dmac,
+                                                  wc->wc_flags & IB_WC_WITH_VLAN ?
+                                                  NULL : &vlan_id,
+                                                  &if_index, &hoplimit);
+               if (ret) {
+                       dev_put(idev);
+                       return ret;
                }
 
-               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
-                                             &grh->dgid, &gid_index);
+               resolved_dev = dev_get_by_index(&init_net, if_index);
+               if (resolved_dev->flags & IFF_LOOPBACK) {
+                       dev_put(resolved_dev);
+                       resolved_dev = idev;
+                       dev_hold(resolved_dev);
+               }
+               rcu_read_lock();
+               if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
+                                                                  resolved_dev))
+                       ret = -EHOSTUNREACH;
+               rcu_read_unlock();
+               dev_put(idev);
+               dev_put(resolved_dev);
                if (ret)
                        return ret;
 
-               if (wc->wc_flags & IB_WC_WITH_SMAC)
-                       memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+                                             &dgid, gid_type, &gid_index);
+               if (ret)
+                       return ret;
        }
 
        ah_attr->dlid = wc->slid;
@@ -383,10 +507,11 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 
        if (wc->wc_flags & IB_WC_GRH) {
                ah_attr->ah_flags = IB_AH_GRH;
-               ah_attr->grh.dgid = grh->sgid;
+               ah_attr->grh.dgid = sgid;
 
                if (!rdma_cap_eth_ah(device, port_num)) {
-                       ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+                       ret = ib_find_cached_gid_by_port(device, &dgid,
+                                                        IB_GID_TYPE_IB,
                                                         port_num, NULL,
                                                         &gid_index);
                        if (ret)
@@ -396,7 +521,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                ah_attr->grh.sgid_index = (u8) gid_index;
                flow_class = be32_to_cpu(grh->version_tclass_flow);
                ah_attr->grh.flow_label = flow_class & 0xFFFFF;
-               ah_attr->grh.hop_limit = 0xFF;
+               ah_attr->grh.hop_limit = hoplimit;
                ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
        }
        return 0;
@@ -1014,6 +1139,7 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
                        union ib_gid            sgid;
                        struct ib_gid_attr      sgid_attr;
                        int                     ifindex;
+                       int                     hop_limit;
 
                        ret = ib_query_gid(qp->device,
                                           qp_attr->ah_attr.port_num,
@@ -1028,12 +1154,14 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
 
                        ifindex = sgid_attr.ndev->ifindex;
 
-                       ret = rdma_addr_find_dmac_by_grh(&sgid,
-                                                        &qp_attr->ah_attr.grh.dgid,
-                                                        qp_attr->ah_attr.dmac,
-                                                        NULL, ifindex);
+                       ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+                                                          &qp_attr->ah_attr.grh.dgid,
+                                                          qp_attr->ah_attr.dmac,
+                                                          NULL, &ifindex, &hop_limit);
 
                        dev_put(sgid_attr.ndev);
+
+                       qp_attr->ah_attr.grh.hop_limit = hop_limit;
                }
        }
 out:
@@ -1215,29 +1343,17 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
                mr->pd      = pd;
                mr->uobject = NULL;
                atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
        }
 
        return mr;
 }
 EXPORT_SYMBOL(ib_get_dma_mr);
 
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
-       return mr->device->query_mr ?
-               mr->device->query_mr(mr, mr_attr) : -ENOSYS;
-}
-EXPORT_SYMBOL(ib_query_mr);
-
 int ib_dereg_mr(struct ib_mr *mr)
 {
-       struct ib_pd *pd;
+       struct ib_pd *pd = mr->pd;
        int ret;
 
-       if (atomic_read(&mr->usecnt))
-               return -EBUSY;
-
-       pd = mr->pd;
        ret = mr->device->dereg_mr(mr);
        if (!ret)
                atomic_dec(&pd->usecnt);
@@ -1273,49 +1389,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
                mr->pd      = pd;
                mr->uobject = NULL;
                atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
        }
 
        return mr;
 }
 EXPORT_SYMBOL(ib_alloc_mr);
 
-/* Memory windows */
-
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-       struct ib_mw *mw;
-
-       if (!pd->device->alloc_mw)
-               return ERR_PTR(-ENOSYS);
-
-       mw = pd->device->alloc_mw(pd, type);
-       if (!IS_ERR(mw)) {
-               mw->device  = pd->device;
-               mw->pd      = pd;
-               mw->uobject = NULL;
-               mw->type    = type;
-               atomic_inc(&pd->usecnt);
-       }
-
-       return mw;
-}
-EXPORT_SYMBOL(ib_alloc_mw);
-
-int ib_dealloc_mw(struct ib_mw *mw)
-{
-       struct ib_pd *pd;
-       int ret;
-
-       pd = mw->pd;
-       ret = mw->device->dealloc_mw(mw);
-       if (!ret)
-               atomic_dec(&pd->usecnt);
-
-       return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_mw);
-
 /* "Fast" memory regions */
 
 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
@@ -1530,7 +1609,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
                   int (*set_page)(struct ib_mr *, u64))
 {
        struct scatterlist *sg;
-       u64 last_end_dma_addr = 0, last_page_addr = 0;
+       u64 last_end_dma_addr = 0;
        unsigned int last_page_off = 0;
        u64 page_mask = ~((u64)mr->page_size - 1);
        int i, ret;
@@ -1572,7 +1651,6 @@ next_page:
 
                mr->length += dma_len;
                last_end_dma_addr = end_dma_addr;
-               last_page_addr = end_dma_addr & page_mask;
                last_page_off = end_dma_addr & ~page_mask;
        }
 
index cb78b1e9bcd9ec3035884e3c0c6504bf317dbfc8..f504ba73e5dc27200c988f342ee3d6423072ee9e 100644 (file)
@@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en
        error = l2t_send(tdev, skb, l2e);
        if (error < 0)
                kfree_skb(skb);
-       return error;
+       return error < 0 ? error : 0;
 }
 
 int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
@@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
        error = cxgb3_ofld_send(tdev, skb);
        if (error < 0)
                kfree_skb(skb);
-       return error;
+       return error < 0 ? error : 0;
 }
 
 static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
index cfe404925a399f2be0cd1bdbc23c262ad1b24ca9..97fbfd2c298ecc431ba7049e28f13ea33ba8a520 100644 (file)
@@ -115,10 +115,6 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
                case T3_SEND_WITH_SE_INV:
                        wc->opcode = IB_WC_SEND;
                        break;
-               case T3_BIND_MW:
-                       wc->opcode = IB_WC_BIND_MW;
-                       break;
-
                case T3_LOCAL_INV:
                        wc->opcode = IB_WC_LOCAL_INV;
                        break;
index 5c36ee2809acf52f8f212483dfb81e5649357baf..1d04c872c9d58c69a47dbb1b1c38f2ae69d80cde 100644 (file)
@@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
        return ret;
 }
 
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-                                       struct iwch_mr *mhp,
-                                       int shift,
-                                       int npages)
-{
-       u32 stag;
-       int ret;
-
-       /* We could support this... */
-       if (npages > mhp->attr.pbl_size)
-               return -ENOMEM;
-
-       stag = mhp->attr.stag;
-       if (cxio_reregister_phys_mem(&rhp->rdev,
-                                  &stag, mhp->attr.pdid,
-                                  mhp->attr.perms,
-                                  mhp->attr.zbva,
-                                  mhp->attr.va_fbo,
-                                  mhp->attr.len,
-                                  shift - 12,
-                                  mhp->attr.pbl_size, mhp->attr.pbl_addr))
-               return -ENOMEM;
-
-       ret = iwch_finish_mem_reg(mhp, stag);
-       if (ret)
-               cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-                      mhp->attr.pbl_addr);
-
-       return ret;
-}
-
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
 {
        mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
@@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
        return cxio_write_pbl(&mhp->rhp->rdev, pages,
                              mhp->attr.pbl_addr + (offset << 3), npages);
 }
-
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       u64 *iova_start,
-                                       u64 *total_size,
-                                       int *npages,
-                                       int *shift,
-                                       __be64 **page_list)
-{
-       u64 mask;
-       int i, j, n;
-
-       mask = 0;
-       *total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
-                       return -EINVAL;
-               if (i != 0 && i != num_phys_buf - 1 &&
-                   (buffer_list[i].size & ~PAGE_MASK))
-                       return -EINVAL;
-               *total_size += buffer_list[i].size;
-               if (i > 0)
-                       mask |= buffer_list[i].addr;
-               else
-                       mask |= buffer_list[i].addr & PAGE_MASK;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-               else
-                       mask |= (buffer_list[i].addr + buffer_list[i].size +
-                               PAGE_SIZE - 1) & PAGE_MASK;
-       }
-
-       if (*total_size > 0xFFFFFFFFULL)
-               return -ENOMEM;
-
-       /* Find largest page shift we can use to cover buffers */
-       for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
-               if ((1ULL << *shift) & mask)
-                       break;
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
-       buffer_list[0].addr &= ~0ull << *shift;
-
-       *npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               *npages += (buffer_list[i].size +
-                       (1ULL << *shift) - 1) >> *shift;
-
-       if (!*npages)
-               return -EINVAL;
-
-       *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
-       if (!*page_list)
-               return -ENOMEM;
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
-                    ++j)
-                       (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
-                           ((u64) j << *shift));
-
-       PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
-            __func__, (unsigned long long) *iova_start,
-            (unsigned long long) mask, *shift, (unsigned long long) *total_size,
-            *npages);
-
-       return 0;
-
-}
index c34725ca0bb426606e16708fafab2995490d7187..2734820d291b02387b8e925bb5f09ab6e23418dd 100644 (file)
@@ -458,9 +458,6 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
        u32 mmid;
 
        PDBG("%s ib_mr %p\n", __func__, ib_mr);
-       /* There can be no memory windows */
-       if (atomic_read(&ib_mr->usecnt))
-               return -EINVAL;
 
        mhp = to_iwch_mr(ib_mr);
        kfree(mhp->pages);
@@ -479,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
        return 0;
 }
 
-static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
-                                       struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       int acc,
-                                       u64 *iova_start)
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
 {
-       __be64 *page_list;
-       int shift;
-       u64 total_size;
-       int npages;
-       struct iwch_dev *rhp;
-       struct iwch_pd *php;
+       const u64 total_size = 0xffffffff;
+       const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
+       struct iwch_pd *php = to_iwch_pd(pd);
+       struct iwch_dev *rhp = php->rhp;
        struct iwch_mr *mhp;
-       int ret;
+       __be64 *page_list;
+       int shift = 26, npages, ret, i;
 
        PDBG("%s ib_pd %p\n", __func__, pd);
-       php = to_iwch_pd(pd);
-       rhp = php->rhp;
+
+       /*
+        * T3 only supports 32 bits of size.
+        */
+       if (sizeof(phys_addr_t) > 4) {
+               pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+               return ERR_PTR(-ENOTSUPP);
+       }
 
        mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
        if (!mhp)
@@ -504,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
 
        mhp->rhp = rhp;
 
-       /* First check that we have enough alignment */
-       if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+       npages = (total_size + (1ULL << shift) - 1) >> shift;
+       if (!npages) {
                ret = -EINVAL;
                goto err;
        }
 
-       if (num_phys_buf > 1 &&
-           ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
-               ret = -EINVAL;
+       page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
+       if (!page_list) {
+               ret = -ENOMEM;
                goto err;
        }
 
-       ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
-                                  &total_size, &npages, &shift, &page_list);
-       if (ret)
-               goto err;
+       for (i = 0; i < npages; i++)
+               page_list[i] = cpu_to_be64((u64)i << shift);
+
+       PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+               __func__, mask, shift, total_size, npages);
 
        ret = iwch_alloc_pbl(mhp, npages);
        if (ret) {
@@ -536,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
        mhp->attr.zbva = 0;
 
        mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-       mhp->attr.va_fbo = *iova_start;
+       mhp->attr.va_fbo = 0;
        mhp->attr.page_size = shift - 12;
 
        mhp->attr.len = (u32) total_size;
@@ -553,76 +552,8 @@ err_pbl:
 err:
        kfree(mhp);
        return ERR_PTR(ret);
-
-}
-
-static int iwch_reregister_phys_mem(struct ib_mr *mr,
-                                    int mr_rereg_mask,
-                                    struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf,
-                                    int acc, u64 * iova_start)
-{
-
-       struct iwch_mr mh, *mhp;
-       struct iwch_pd *php;
-       struct iwch_dev *rhp;
-       __be64 *page_list = NULL;
-       int shift = 0;
-       u64 total_size;
-       int npages = 0;
-       int ret;
-
-       PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
-       /* There can be no memory windows */
-       if (atomic_read(&mr->usecnt))
-               return -EINVAL;
-
-       mhp = to_iwch_mr(mr);
-       rhp = mhp->rhp;
-       php = to_iwch_pd(mr->pd);
-
-       /* make sure we are on the same adapter */
-       if (rhp != php->rhp)
-               return -EINVAL;
-
-       memcpy(&mh, mhp, sizeof *mhp);
-
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               php = to_iwch_pd(pd);
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mh.attr.perms = iwch_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               ret = build_phys_page_list(buffer_list, num_phys_buf,
-                                          iova_start,
-                                          &total_size, &npages,
-                                          &shift, &page_list);
-               if (ret)
-                       return ret;
-       }
-
-       ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
-       kfree(page_list);
-       if (ret) {
-               return ret;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mhp->attr.pdid = php->pdid;
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               mhp->attr.zbva = 0;
-               mhp->attr.va_fbo = *iova_start;
-               mhp->attr.page_size = shift - 12;
-               mhp->attr.len = (u32) total_size;
-               mhp->attr.pbl_size = npages;
-       }
-
-       return 0;
 }
 
-
 static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                      u64 virt, int acc, struct ib_udata *udata)
 {
@@ -726,28 +657,6 @@ err:
        return ERR_PTR(err);
 }
 
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ib_phys_buf bl;
-       u64 kva;
-       struct ib_mr *ibmr;
-
-       PDBG("%s ib_pd %p\n", __func__, pd);
-
-       /*
-        * T3 only supports 32 bits of size.
-        */
-       if (sizeof(phys_addr_t) > 4) {
-               pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
-               return ERR_PTR(-ENOTSUPP);
-       }
-       bl.size = 0xffffffff;
-       bl.addr = 0;
-       kva = 0;
-       ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
-       return ibmr;
-}
-
 static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
        struct iwch_dev *rhp;
@@ -1452,12 +1361,9 @@ int iwch_register_device(struct iwch_dev *dev)
        dev->ibdev.resize_cq = iwch_resize_cq;
        dev->ibdev.poll_cq = iwch_poll_cq;
        dev->ibdev.get_dma_mr = iwch_get_dma_mr;
-       dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
-       dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
        dev->ibdev.reg_user_mr = iwch_reg_user_mr;
        dev->ibdev.dereg_mr = iwch_dereg_mr;
        dev->ibdev.alloc_mw = iwch_alloc_mw;
-       dev->ibdev.bind_mw = iwch_bind_mw;
        dev->ibdev.dealloc_mw = iwch_dealloc_mw;
        dev->ibdev.alloc_mr = iwch_alloc_mr;
        dev->ibdev.map_mr_sg = iwch_map_mr_sg;
index 2ac85b86a680dea89becaf76c75956cda42a1644..252c464a09f6a7ef6c4360a7449ba973e36369c0 100644 (file)
@@ -330,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
-int iwch_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind);
 int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
 int iwch_post_zb_read(struct iwch_ep *ep);
@@ -341,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev);
 void stop_read_rep_timer(struct iwch_qp *qhp);
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
                      struct iwch_mr *mhp, int shift);
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-                                       struct iwch_mr *mhp,
-                                       int shift,
-                                       int npages);
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
 void iwch_free_pbl(struct iwch_mr *mhp);
 int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       u64 *iova_start,
-                                       u64 *total_size,
-                                       int *npages,
-                                       int *shift,
-                                       __be64 **page_list);
-
 
 #define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
 
index d0548fc6395eac2343775ea4a4b85d3c83bb98b0..d939980a708fdcf1daa6a7a53686cb6509ef2bb5 100644 (file)
@@ -526,88 +526,6 @@ out:
        return err;
 }
 
-int iwch_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind)
-{
-       struct iwch_dev *rhp;
-       struct iwch_mw *mhp;
-       struct iwch_qp *qhp;
-       union t3_wr *wqe;
-       u32 pbl_addr;
-       u8 page_size;
-       u32 num_wrs;
-       unsigned long flag;
-       struct ib_sge sgl;
-       int err=0;
-       enum t3_wr_flags t3_wr_flags;
-       u32 idx;
-       struct t3_swsq *sqp;
-
-       qhp = to_iwch_qp(qp);
-       mhp = to_iwch_mw(mw);
-       rhp = qhp->rhp;
-
-       spin_lock_irqsave(&qhp->lock, flag);
-       if (qhp->attr.state > IWCH_QP_STATE_RTS) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return -EINVAL;
-       }
-       num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
-                           qhp->wq.sq_size_log2);
-       if (num_wrs == 0) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return -ENOMEM;
-       }
-       idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
-       PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
-            mw, mw_bind);
-       wqe = (union t3_wr *) (qhp->wq.queue + idx);
-
-       t3_wr_flags = 0;
-       if (mw_bind->send_flags & IB_SEND_SIGNALED)
-               t3_wr_flags = T3_COMPLETION_FLAG;
-
-       sgl.addr = mw_bind->bind_info.addr;
-       sgl.lkey = mw_bind->bind_info.mr->lkey;
-       sgl.length = mw_bind->bind_info.length;
-       wqe->bind.reserved = 0;
-       wqe->bind.type = TPT_VATO;
-
-       /* TBD: check perms */
-       wqe->bind.perms = iwch_ib_to_tpt_bind_access(
-               mw_bind->bind_info.mw_access_flags);
-       wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
-       wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
-       wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
-       wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
-       err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
-       if (err) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return err;
-       }
-       wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
-       sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
-       sqp->wr_id = mw_bind->wr_id;
-       sqp->opcode = T3_BIND_MW;
-       sqp->sq_wptr = qhp->wq.sq_wptr;
-       sqp->complete = 0;
-       sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
-       wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
-       wqe->bind.mr_pagesz = page_size;
-       build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
-                      Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
-                      sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
-       ++(qhp->wq.wptr);
-       ++(qhp->wq.sq_wptr);
-       spin_unlock_irqrestore(&qhp->lock, flag);
-
-       if (cxio_wq_db_enabled(&qhp->wq))
-               ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-       return err;
-}
-
 static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
                                    u8 *layer_type, u8 *ecode)
 {
index 326d07d823a5e62f0ee206b8c5f5865f7f97c08d..cd2ff5f9518a2b7143d53821672a5d98b9773b17 100644 (file)
@@ -3271,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
                                    &ep->com.mapped_local_addr;
 
+       if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
+               err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
+                                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+               if (err)
+                       return err;
+       }
        c4iw_init_wr_wait(&ep->com.wr_wait);
        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
                                   ep->stid, &sin6->sin6_addr,
@@ -3282,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
                                          0, 0, __func__);
        else if (err > 0)
                err = net_xmit_errno(err);
-       if (err)
+       if (err) {
+               cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
+                                  (const u32 *)&sin6->sin6_addr.s6_addr, 1);
                pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
                       err, ep->stid,
                       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
-       else
-               cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
-                              (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+       }
        return err;
 }
 
index de9cd6901752fc1e3da38d64f62bfce7cb853501..cf21df4a8bf5b80da00f6f82003980650df9c522 100644 (file)
@@ -744,9 +744,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                case FW_RI_SEND_WITH_SE:
                        wc->opcode = IB_WC_SEND;
                        break;
-               case FW_RI_BIND_MW:
-                       wc->opcode = IB_WC_BIND_MW;
-                       break;
 
                case FW_RI_LOCAL_INV:
                        wc->opcode = IB_WC_LOCAL_INV;
index 58fce1742b8d8c33b91dfa89075617a9c5cf34f7..8024ea4417b8735b93e77dc9310314db44b8833e 100644 (file)
@@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file)
 static int qp_open(struct inode *inode, struct file *file)
 {
        struct c4iw_debugfs_data *qpd;
-       int ret = 0;
        int count = 1;
 
        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
-       if (!qpd) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!qpd)
+               return -ENOMEM;
+
        qpd->devp = inode->i_private;
        qpd->pos = 0;
 
@@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file)
        qpd->bufsize = count * 128;
        qpd->buf = vmalloc(qpd->bufsize);
        if (!qpd->buf) {
-               ret = -ENOMEM;
-               goto err1;
+               kfree(qpd);
+               return -ENOMEM;
        }
 
        spin_lock_irq(&qpd->devp->lock);
@@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file)
 
        qpd->buf[qpd->pos++] = 0;
        file->private_data = qpd;
-       goto out;
-err1:
-       kfree(qpd);
-out:
-       return ret;
+       return 0;
 }
 
 static const struct file_operations qp_debugfs_fops = {
@@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
                pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
                       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
                       rdev->lldi.ucq_density);
-               err = -EINVAL;
-               goto err1;
+               return -EINVAL;
        }
        if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
            rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
@@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
                       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
                       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
                       rdev->lldi.vr->cq.size);
-               err = -EINVAL;
-               goto err1;
+               return -EINVAL;
        }
 
        rdev->qpmask = rdev->lldi.udb_density - 1;
@@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
             rdev->lldi.db_reg, rdev->lldi.gts_reg,
             rdev->qpmask, rdev->cqmask);
 
-       if (c4iw_num_stags(rdev) == 0) {
-               err = -EINVAL;
-               goto err1;
-       }
+       if (c4iw_num_stags(rdev) == 0)
+               return -EINVAL;
 
        rdev->stats.pd.total = T4_MAX_NUM_PD;
        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
@@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing resources\n", err);
-               goto err1;
+               return err;
        }
        err = c4iw_pblpool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
-               goto err2;
+               goto destroy_resource;
        }
        err = c4iw_rqtpool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
-               goto err3;
+               goto destroy_pblpool;
        }
        err = c4iw_ocqp_pool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
-               goto err4;
+               goto destroy_rqtpool;
        }
        rdev->status_page = (struct t4_dev_status_page *)
                            __get_free_page(GFP_KERNEL);
-       if (!rdev->status_page) {
-               pr_err(MOD "error allocating status page\n");
-               goto err4;
-       }
+       if (!rdev->status_page)
+               goto destroy_ocqp_pool;
+       rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
+       rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
+       rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
+       rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
 
        if (c4iw_wr_log) {
                rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
@@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        rdev->status_page->db_off = 0;
 
        return 0;
-err4:
+destroy_ocqp_pool:
+       c4iw_ocqp_pool_destroy(rdev);
+destroy_rqtpool:
        c4iw_rqtpool_destroy(rdev);
-err3:
+destroy_pblpool:
        c4iw_pblpool_destroy(rdev);
-err2:
+destroy_resource:
        c4iw_destroy_resource(&rdev->resource);
-err1:
        return err;
 }
 
index 00e55faa086aa2a30259c5c8b96cc0ae129a4ef6..fb2de75a039216e2f732a8f61ba387de121f1ce7 100644 (file)
@@ -947,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind);
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
 int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -968,17 +966,6 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
                                           u64 length, u64 virt, int acc,
                                           struct ib_udata *udata);
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
-                                       struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       int acc,
-                                       u64 *iova_start);
-int c4iw_reregister_phys_mem(struct ib_mr *mr,
-                                    int mr_rereg_mask,
-                                    struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf,
-                                    int acc, u64 *iova_start);
 int c4iw_dereg_mr(struct ib_mr *ib_mr);
 int c4iw_destroy_cq(struct ib_cq *ib_cq);
 struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
index e1629ab58db7873a3d9a6c044ba7bc65eb4512c6..7849890c478141e57136127c84f4cad9fd46227d 100644 (file)
@@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
        return ret;
 }
 
-static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
-                         struct c4iw_mr *mhp, int shift, int npages)
-{
-       u32 stag;
-       int ret;
-
-       if (npages > mhp->attr.pbl_size)
-               return -ENOMEM;
-
-       stag = mhp->attr.stag;
-       ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
-                             FW_RI_STAG_NSMR, mhp->attr.perms,
-                             mhp->attr.mw_bind_enable, mhp->attr.zbva,
-                             mhp->attr.va_fbo, mhp->attr.len, shift - 12,
-                             mhp->attr.pbl_size, mhp->attr.pbl_addr);
-       if (ret)
-               return ret;
-
-       ret = finish_mem_reg(mhp, stag);
-       if (ret)
-               dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-                      mhp->attr.pbl_addr);
-
-       return ret;
-}
-
 static int alloc_pbl(struct c4iw_mr *mhp, int npages)
 {
        mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
@@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages)
        return 0;
 }
 
-static int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, u64 *iova_start,
-                               u64 *total_size, int *npages,
-                               int *shift, __be64 **page_list)
-{
-       u64 mask;
-       int i, j, n;
-
-       mask = 0;
-       *total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
-                       return -EINVAL;
-               if (i != 0 && i != num_phys_buf - 1 &&
-                   (buffer_list[i].size & ~PAGE_MASK))
-                       return -EINVAL;
-               *total_size += buffer_list[i].size;
-               if (i > 0)
-                       mask |= buffer_list[i].addr;
-               else
-                       mask |= buffer_list[i].addr & PAGE_MASK;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-               else
-                       mask |= (buffer_list[i].addr + buffer_list[i].size +
-                               PAGE_SIZE - 1) & PAGE_MASK;
-       }
-
-       if (*total_size > 0xFFFFFFFFULL)
-               return -ENOMEM;
-
-       /* Find largest page shift we can use to cover buffers */
-       for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
-               if ((1ULL << *shift) & mask)
-                       break;
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
-       buffer_list[0].addr &= ~0ull << *shift;
-
-       *npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               *npages += (buffer_list[i].size +
-                       (1ULL << *shift) - 1) >> *shift;
-
-       if (!*npages)
-               return -EINVAL;
-
-       *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
-       if (!*page_list)
-               return -ENOMEM;
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
-                    ++j)
-                       (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
-                           ((u64) j << *shift));
-
-       PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
-            __func__, (unsigned long long)*iova_start,
-            (unsigned long long)mask, *shift, (unsigned long long)*total_size,
-            *npages);
-
-       return 0;
-
-}
-
-int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
-                            struct ib_pd *pd, struct ib_phys_buf *buffer_list,
-                            int num_phys_buf, int acc, u64 *iova_start)
-{
-
-       struct c4iw_mr mh, *mhp;
-       struct c4iw_pd *php;
-       struct c4iw_dev *rhp;
-       __be64 *page_list = NULL;
-       int shift = 0;
-       u64 total_size;
-       int npages;
-       int ret;
-
-       PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
-       /* There can be no memory windows */
-       if (atomic_read(&mr->usecnt))
-               return -EINVAL;
-
-       mhp = to_c4iw_mr(mr);
-       rhp = mhp->rhp;
-       php = to_c4iw_pd(mr->pd);
-
-       /* make sure we are on the same adapter */
-       if (rhp != php->rhp)
-               return -EINVAL;
-
-       memcpy(&mh, mhp, sizeof *mhp);
-
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               php = to_c4iw_pd(pd);
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
-               mh.attr.perms = c4iw_ib_to_tpt_access(acc);
-               mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
-                                        IB_ACCESS_MW_BIND;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               ret = build_phys_page_list(buffer_list, num_phys_buf,
-                                               iova_start,
-                                               &total_size, &npages,
-                                               &shift, &page_list);
-               if (ret)
-                       return ret;
-       }
-
-       if (mr_exceeds_hw_limits(rhp, total_size)) {
-               kfree(page_list);
-               return -EINVAL;
-       }
-
-       ret = reregister_mem(rhp, php, &mh, shift, npages);
-       kfree(page_list);
-       if (ret)
-               return ret;
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mhp->attr.pdid = php->pdid;
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               mhp->attr.zbva = 0;
-               mhp->attr.va_fbo = *iova_start;
-               mhp->attr.page_size = shift - 12;
-               mhp->attr.len = (u32) total_size;
-               mhp->attr.pbl_size = npages;
-       }
-
-       return 0;
-}
-
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf, int acc, u64 *iova_start)
-{
-       __be64 *page_list;
-       int shift;
-       u64 total_size;
-       int npages;
-       struct c4iw_dev *rhp;
-       struct c4iw_pd *php;
-       struct c4iw_mr *mhp;
-       int ret;
-
-       PDBG("%s ib_pd %p\n", __func__, pd);
-       php = to_c4iw_pd(pd);
-       rhp = php->rhp;
-
-       mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-       if (!mhp)
-               return ERR_PTR(-ENOMEM);
-
-       mhp->rhp = rhp;
-
-       /* First check that we have enough alignment */
-       if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       if (num_phys_buf > 1 &&
-           ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
-                                       &total_size, &npages, &shift,
-                                       &page_list);
-       if (ret)
-               goto err;
-
-       if (mr_exceeds_hw_limits(rhp, total_size)) {
-               kfree(page_list);
-               ret = -EINVAL;
-               goto err;
-       }
-
-       ret = alloc_pbl(mhp, npages);
-       if (ret) {
-               kfree(page_list);
-               goto err;
-       }
-
-       ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
-                            npages);
-       kfree(page_list);
-       if (ret)
-               goto err_pbl;
-
-       mhp->attr.pdid = php->pdid;
-       mhp->attr.zbva = 0;
-
-       mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
-       mhp->attr.va_fbo = *iova_start;
-       mhp->attr.page_size = shift - 12;
-
-       mhp->attr.len = (u32) total_size;
-       mhp->attr.pbl_size = npages;
-       ret = register_mem(rhp, php, mhp, shift);
-       if (ret)
-               goto err_pbl;
-
-       return &mhp->ibmr;
-
-err_pbl:
-       c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
-                             mhp->attr.pbl_size << 3);
-
-err:
-       kfree(mhp);
-       return ERR_PTR(ret);
-
-}
-
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
 {
        struct c4iw_dev *rhp;
@@ -952,9 +704,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
        u32 mmid;
 
        PDBG("%s ib_mr %p\n", __func__, ib_mr);
-       /* There can be no memory windows */
-       if (atomic_read(&ib_mr->usecnt))
-               return -EINVAL;
 
        mhp = to_c4iw_mr(ib_mr);
        rhp = mhp->rhp;
index 0a7d99818b17d13384e32cb81446bd250647f85f..ec04272fbdc2ffbf882318678332a506c1f1d4ec 100644 (file)
@@ -549,12 +549,9 @@ int c4iw_register_device(struct c4iw_dev *dev)
        dev->ibdev.resize_cq = c4iw_resize_cq;
        dev->ibdev.poll_cq = c4iw_poll_cq;
        dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
-       dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
-       dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
        dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
        dev->ibdev.dereg_mr = c4iw_dereg_mr;
        dev->ibdev.alloc_mw = c4iw_alloc_mw;
-       dev->ibdev.bind_mw = c4iw_bind_mw;
        dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
        dev->ibdev.alloc_mr = c4iw_alloc_mr;
        dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
index aa515afee7248823428cb2b725bf10c70f4fd82a..e99345eb875aa286a8b962696eba9742189e660c 100644 (file)
@@ -933,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        return err;
 }
 
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
-{
-       return -ENOSYS;
-}
-
 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
                                    u8 *ecode)
 {
index 1092a2d1f607464152fbd2d7e836e180d2131312..6126bbe36095c21021f9cc5cdca3b75da299dc85 100644 (file)
@@ -699,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
 
 struct t4_dev_status_page {
        u8 db_off;
+       u8 pad1;
+       u16 pad2;
+       u32 pad3;
+       u64 qp_start;
+       u64 qp_size;
+       u64 cq_start;
+       u64 cq_size;
 };
index cbd0ce1707282a63fe1acb10c223c4af22e28ad1..295f422b9a3ab4caa829182712dcab6bc19a504d 100644 (file)
@@ -32,7 +32,7 @@
 #ifndef __C4IW_USER_H__
 #define __C4IW_USER_H__
 
-#define C4IW_UVERBS_ABI_VERSION        2
+#define C4IW_UVERBS_ABI_VERSION        3
 
 /*
  * Make sure that all structs defined in this file remain laid out so
index 86af71351d9a5be6cbdd87f3d22ccb7078c82b23..105246fba2e7c381958c2e5e74b3a4af7b1441a8 100644 (file)
@@ -92,7 +92,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
                                ah_attr->grh.sgid_index, &sgid, &gid_attr);
        if (ret)
                return ERR_PTR(ret);
-       memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+       eth_zero_addr(ah->av.eth.s_mac);
        if (gid_attr.ndev) {
                if (is_vlan_dev(gid_attr.ndev))
                        vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
@@ -104,6 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
        ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
        ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
        ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+       ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
        if (ah_attr->static_rate) {
                ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
                while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
index b88fc8f5ab180c717e827f582187b817f92a0e33..9f8b516eb2b0f2412452685d21df360075245f0c 100644 (file)
@@ -811,9 +811,6 @@ repoll:
                        wc->opcode    = IB_WC_MASKED_FETCH_ADD;
                        wc->byte_len  = 8;
                        break;
-               case MLX4_OPCODE_BIND_MW:
-                       wc->opcode    = IB_WC_BIND_MW;
-                       break;
                case MLX4_OPCODE_LSO:
                        wc->opcode    = IB_WC_LSO;
                        break;
index 26833bfa639bb61fb9658187bfa5cd45497340ed..d68f506c1922e8d18af269a4fcde33fd0e3ae5d5 100644 (file)
@@ -817,17 +817,48 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
        return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
-static void edit_counter(struct mlx4_counter *cnt,
-                                       struct ib_pma_portcounters *pma_cnt)
+static void edit_counter(struct mlx4_counter *cnt, void *counters,
+                        __be16 attr_id)
 {
-       ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
-                            (be64_to_cpu(cnt->tx_bytes) >> 2));
-       ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
-                            (be64_to_cpu(cnt->rx_bytes) >> 2));
-       ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
-                            be64_to_cpu(cnt->tx_frames));
-       ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
-                            be64_to_cpu(cnt->rx_frames));
+       switch (attr_id) {
+       case IB_PMA_PORT_COUNTERS:
+       {
+               struct ib_pma_portcounters *pma_cnt =
+                       (struct ib_pma_portcounters *)counters;
+
+               ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
+                                    (be64_to_cpu(cnt->tx_bytes) >> 2));
+               ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
+                                    (be64_to_cpu(cnt->rx_bytes) >> 2));
+               ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
+                                    be64_to_cpu(cnt->tx_frames));
+               ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
+                                    be64_to_cpu(cnt->rx_frames));
+               break;
+       }
+       case IB_PMA_PORT_COUNTERS_EXT:
+       {
+               struct ib_pma_portcounters_ext *pma_cnt_ext =
+                       (struct ib_pma_portcounters_ext *)counters;
+
+               pma_cnt_ext->port_xmit_data =
+                       cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2);
+               pma_cnt_ext->port_rcv_data =
+                       cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2);
+               pma_cnt_ext->port_xmit_packets = cnt->tx_frames;
+               pma_cnt_ext->port_rcv_packets = cnt->rx_frames;
+               break;
+       }
+       }
+}
+
+static int iboe_process_mad_port_info(void *out_mad)
+{
+       struct ib_class_port_info cpi = {};
+
+       cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+       memcpy(out_mad, &cpi, sizeof(cpi));
+       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -842,6 +873,9 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
        if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
                return -EINVAL;
 
+       if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)
+               return iboe_process_mad_port_info((void *)(out_mad->data + 40));
+
        memset(&counter_stats, 0, sizeof(counter_stats));
        mutex_lock(&dev->counters_table[port_num - 1].mutex);
        list_for_each_entry(tmp_counter,
@@ -863,7 +897,8 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
                switch (counter_stats.counter_mode & 0xf) {
                case 0:
                        edit_counter(&counter_stats,
-                                    (void *)(out_mad->data + 40));
+                                    (void *)(out_mad->data + 40),
+                                    in_mad->mad_hdr.attr_id);
                        err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
                        break;
                default:
@@ -894,8 +929,10 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
         */
        if (link == IB_LINK_LAYER_INFINIBAND) {
                if (mlx4_is_slave(dev->dev) &&
-                   in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
-                   in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS)
+                   (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+                    (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+                     in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+                     in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
                        return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
                                                in_grh, in_mad, out_mad);
 
index 97d6878f993828085a0f26f941572674ce68b967..1c7ab6cabbb86989fba8952b8f5e6be645e313dc 100644 (file)
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
        return dev;
 }
 
-static int mlx4_ib_update_gids(struct gid_entry *gids,
-                              struct mlx4_ib_dev *ibdev,
-                              u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+                                 struct mlx4_ib_dev *ibdev,
+                                 u8 port_num)
 {
        struct mlx4_cmd_mailbox *mailbox;
        int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
        return err;
 }
 
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+                                    struct mlx4_ib_dev *ibdev,
+                                    u8 port_num)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       int err;
+       struct mlx4_dev *dev = ibdev->dev;
+       int i;
+       struct {
+               union ib_gid    gid;
+               __be32          rsrvd1[2];
+               __be16          rsrvd2;
+               u8              type;
+               u8              version;
+               __be32          rsrvd3;
+       } *gid_tbl;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return -ENOMEM;
+
+       gid_tbl = mailbox->buf;
+       for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+               memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+               if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+                       gid_tbl[i].version = 2;
+                       if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+                               gid_tbl[i].type = 1;
+                       else
+                               memset(&gid_tbl[i].gid, 0, 12);
+               }
+       }
+
+       err = mlx4_cmd(dev, mailbox->dma,
+                      MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+                      1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                      MLX4_CMD_WRAPPED);
+       if (mlx4_is_bonded(dev))
+               err += mlx4_cmd(dev, mailbox->dma,
+                               MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+                               1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                               MLX4_CMD_WRAPPED);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+                              struct mlx4_ib_dev *ibdev,
+                              u8 port_num)
+{
+       if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+               return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+       return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
 static int mlx4_ib_add_gid(struct ib_device *device,
                           u8 port_num,
                           unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
        port_gid_table = &iboe->gids[port_num - 1];
        spin_lock_bh(&iboe->lock);
        for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
-               if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+               if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+                   (port_gid_table->gids[i].gid_type == attr->gid_type))  {
                        found = i;
                        break;
                }
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
                        } else {
                                *context = port_gid_table->gids[free].ctx;
                                memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+                               port_gid_table->gids[free].gid_type = attr->gid_type;
                                port_gid_table->gids[free].ctx->real_index = free;
                                port_gid_table->gids[free].ctx->refcount = 1;
                                hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
                if (!gids) {
                        ret = -ENOMEM;
                } else {
-                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
                                memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+                               gids[i].gid_type = port_gid_table->gids[i].gid_type;
+                       }
                }
        }
        spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        int i;
        int ret;
        unsigned long flags;
+       struct ib_gid_attr attr;
 
        if (port_num > MLX4_MAX_PORTS)
                return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
                return index;
 
-       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
        if (ret)
                return ret;
 
+       if (attr.ndev)
+               dev_put(attr.ndev);
+
        if (!memcmp(&gid, &zgid, sizeof(gid)))
                return -EINVAL;
 
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        port_gid_table = &iboe->gids[port_num - 1];
 
        for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
-               if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+               if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+                   attr.gid_type == port_gid_table->gids[i].gid_type) {
                        ctx = port_gid_table->gids[i].ctx;
                        break;
                }
@@ -2119,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
                               struct ib_port_immutable *immutable)
 {
        struct ib_port_attr attr;
+       struct mlx4_ib_dev *mdev = to_mdev(ibdev);
        int err;
 
        err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2128,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 
-       if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+       if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
                immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       else
-               immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+       } else {
+               if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+                       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+               if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+                       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+                               RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+       }
 
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
@@ -2283,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
            dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
                ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
-               ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
                ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
 
                ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2423,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (mlx4_ib_init_sriov(ibdev))
                goto err_mad;
 
-       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+           dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
                if (!iboe->nb.notifier_call) {
                        iboe->nb.notifier_call = mlx4_ib_netdev_event;
                        err = register_netdevice_notifier(&iboe->nb);
@@ -2432,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                                goto err_notif;
                        }
                }
+               if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+                       err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+                       if (err) {
+                               goto err_notif;
+                       }
+               }
        }
 
        for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
index 1caa11edac03347a246467436daa72dd18d5c505..52ce7b000044f6e0814b59b08e1ea27a696e77d8 100644 (file)
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
        unsigned                tail;
 };
 
+enum {
+       MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
 enum mlx4_ib_qp_flags {
        MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
        MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
        MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+       /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+       MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
        MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
        MLX4_IB_SRIOV_SQP = 1 << 31,
 };
@@ -478,6 +485,7 @@ struct gid_cache_context {
 
 struct gid_entry {
        union ib_gid    gid;
+       enum ib_gid_type gid_type;
        struct gid_cache_context *ctx;
 };
 
@@ -704,8 +712,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
 struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                   struct ib_mw_bind *mw_bind);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
                               enum ib_mr_type mr_type,
index 4d1e1c632603a7b81e6685cd5ad6020f98a02a47..242b94ec105babe092ba3fee42c931ee24518adc 100644 (file)
@@ -366,28 +366,6 @@ err_free:
        return ERR_PTR(err);
 }
 
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                   struct ib_mw_bind *mw_bind)
-{
-       struct ib_bind_mw_wr  wr;
-       struct ib_send_wr *bad_wr;
-       int ret;
-
-       memset(&wr, 0, sizeof(wr));
-       wr.wr.opcode            = IB_WR_BIND_MW;
-       wr.wr.wr_id             = mw_bind->wr_id;
-       wr.wr.send_flags        = mw_bind->send_flags;
-       wr.mw                   = mw;
-       wr.bind_info            = mw_bind->bind_info;
-       wr.rkey                 = ib_inc_rkey(mw->rkey);
-
-       ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
-       if (!ret)
-               mw->rkey = wr.rkey;
-
-       return ret;
-}
-
 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
 {
        struct mlx4_ib_mw *mw = to_mmw(ibmw);
index 13eaaf45288f80d4bb6658d5a18985a978e8c98d..fd97534762b8dc7f83651a52e9398a3aa6e4944e 100644 (file)
@@ -32,6 +32,8 @@
  */
 
 #include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -85,6 +87,7 @@ struct mlx4_ib_sqp {
        u32                     send_psn;
        struct ib_ud_header     ud_header;
        u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];
+       struct ib_qp            *roce_v2_gsi;
 };
 
 enum {
@@ -115,7 +118,6 @@ static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_REG_MR]                          = cpu_to_be32(MLX4_OPCODE_FMR),
        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
-       [IB_WR_BIND_MW]                         = cpu_to_be32(MLX4_OPCODE_BIND_MW),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -154,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
                        }
                }
        }
-       return proxy_sqp;
+       if (proxy_sqp)
+               return 1;
+
+       return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
 }
 
 /* used for INIT/CLOSE port logic */
@@ -796,11 +801,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+               qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+                                       gfp | __GFP_NOWARN);
                if (!qp->sq.wrid)
                        qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
                                                gfp, PAGE_KERNEL);
-               qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+               qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+                                       gfp | __GFP_NOWARN);
                if (!qp->rq.wrid)
                        qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
                                                gfp, PAGE_KERNEL);
@@ -1099,9 +1106,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
                return dev->dev->caps.qp1_proxy[attr->port_num - 1];
 }
 
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-                               struct ib_qp_init_attr *init_attr,
-                               struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+                                       struct ib_qp_init_attr *init_attr,
+                                       struct ib_udata *udata)
 {
        struct mlx4_ib_qp *qp = NULL;
        int err;
@@ -1120,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                                        MLX4_IB_SRIOV_TUNNEL_QP |
                                        MLX4_IB_SRIOV_SQP |
                                        MLX4_IB_QP_NETIF |
+                                       MLX4_IB_QP_CREATE_ROCE_V2_GSI |
                                        MLX4_IB_QP_CREATE_USE_GFP_NOIO))
                return ERR_PTR(-EINVAL);
 
@@ -1128,15 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                        return ERR_PTR(-EINVAL);
        }
 
-       if (init_attr->create_flags &&
-           ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
-            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
-                                          MLX4_IB_QP_CREATE_USE_GFP_NOIO |
-                                          MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
-             init_attr->qp_type != IB_QPT_UD) ||
-            ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
-             init_attr->qp_type > IB_QPT_GSI)))
-               return ERR_PTR(-EINVAL);
+       if (init_attr->create_flags) {
+               if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+                       return ERR_PTR(-EINVAL);
+
+               if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+                                                MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+                                                MLX4_IB_QP_CREATE_ROCE_V2_GSI  |
+                                                MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+                    init_attr->qp_type != IB_QPT_UD) ||
+                   (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+                    init_attr->qp_type > IB_QPT_GSI) ||
+                   (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+                    init_attr->qp_type != IB_QPT_GSI))
+                       return ERR_PTR(-EINVAL);
+       }
 
        switch (init_attr->qp_type) {
        case IB_QPT_XRC_TGT:
@@ -1173,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        case IB_QPT_SMI:
        case IB_QPT_GSI:
        {
+               int sqpn;
+
                /* Userspace is not allowed to create special QPs: */
                if (udata)
                        return ERR_PTR(-EINVAL);
+               if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+                       int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+                       if (res)
+                               return ERR_PTR(res);
+               } else {
+                       sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+               }
 
                err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
-                                      get_sqp_num(to_mdev(pd->device), init_attr),
+                                      sqpn,
                                       &qp, gfp);
                if (err)
                        return ERR_PTR(err);
 
                qp->port        = init_attr->port_num;
-               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+                       init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
                break;
        }
        default:
@@ -1196,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        return &qp->ibqp;
 }
 
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+                               struct ib_qp_init_attr *init_attr,
+                               struct ib_udata *udata) {
+       struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+       struct ib_qp *ibqp;
+       struct mlx4_ib_dev *dev = to_mdev(device);
+
+       ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+       if (!IS_ERR(ibqp) &&
+           (init_attr->qp_type == IB_QPT_GSI) &&
+           !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+               struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+               int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+               if (is_eth &&
+                   dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+                       init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+                       sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+                       if (IS_ERR(sqp->roce_v2_gsi)) {
+                               pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+                               sqp->roce_v2_gsi = NULL;
+                       } else {
+                               sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+                               sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+                       }
+
+                       init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+               }
+       }
+       return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
 {
        struct mlx4_ib_dev *dev = to_mdev(qp->device);
        struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1225,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
        return 0;
 }
 
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+       struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+       if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+               if (sqp->roce_v2_gsi)
+                       ib_destroy_qp(sqp->roce_v2_gsi);
+       }
+
+       return _mlx4_ib_destroy_qp(qp);
+}
+
 static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
 {
        switch (type) {
@@ -1507,6 +1579,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
        return 0;
 }
 
+enum {
+       MLX4_QPC_ROCE_MODE_1 = 0,
+       MLX4_QPC_ROCE_MODE_2 = 2,
+       MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+       switch (gid_type) {
+       case IB_GID_TYPE_ROCE:
+               return MLX4_QPC_ROCE_MODE_1;
+       case IB_GID_TYPE_ROCE_UDP_ENCAP:
+               return MLX4_QPC_ROCE_MODE_2;
+       default:
+               return MLX4_QPC_ROCE_MODE_UNDEFINED;
+       }
+}
+
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1591,9 +1681,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        }
 
        if (qp->ibqp.uobject)
-               context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
+               context->usr_page = cpu_to_be32(
+                       mlx4_to_hw_uar_index(dev->dev,
+                                            to_mucontext(ibqp->uobject->context)->uar.index));
        else
-               context->usr_page = cpu_to_be32(dev->priv_uar.index);
+               context->usr_page = cpu_to_be32(
+                       mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
 
        if (attr_mask & IB_QP_DEST_QPN)
                context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -1633,6 +1726,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                        mlx4_ib_steer_qp_reg(dev, qp, 1);
                        steer_qp = 1;
                }
+
+               if (ibqp->qp_type == IB_QPT_GSI) {
+                       enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+                               IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+                       u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+                       context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+               }
        }
 
        if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1650,9 +1751,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                u16 vlan = 0xffff;
                u8 smac[ETH_ALEN];
                int status = 0;
+               int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+                       attr->ah_attr.ah_flags & IB_AH_GRH;
 
-               if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
-                   attr->ah_attr.ah_flags & IB_AH_GRH) {
+               if (is_eth) {
                        int index = attr->ah_attr.grh.sgid_index;
 
                        status = ib_get_cached_gid(ibqp->device, port_num,
@@ -1674,6 +1776,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 
                optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
                           MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+               if (is_eth &&
+                   (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+                       u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+                       if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+                               err = -EINVAL;
+                               goto out;
+                       }
+                       context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+               }
+
        }
 
        if (attr_mask & IB_QP_TIMEOUT) {
@@ -1845,7 +1959,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                sqd_event = 0;
 
        if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-               context->rlkey |= (1 << 4);
+               context->rlkey_roce_mode |= (1 << 4);
 
        /*
         * Before passing a kernel QP to the HW, make sure that the
@@ -2022,8 +2136,8 @@ out:
        return err;
 }
 
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                     int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                             int attr_mask, struct ib_udata *udata)
 {
        struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2126,6 +2240,27 @@ out:
        return err;
 }
 
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+{
+       struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+       int ret;
+
+       ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+       if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+               int err = 0;
+
+               if (sqp->roce_v2_gsi)
+                       err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+               if (err)
+                       pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+                              err);
+       }
+       return ret;
+}
+
 static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 {
        int i;
@@ -2168,7 +2303,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
                send_size += sizeof (struct mlx4_ib_tunnel_header);
 
-       ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+       ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
 
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
                sqp->ud_header.lrh.service_level =
@@ -2252,16 +2387,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        return 0;
 }
 
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
-       int i;
-
-       for (i = ETH_ALEN; i; i--) {
-               dst_mac[i - 1] = src_mac & 0xff;
-               src_mac >>= 8;
-       }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
 {
@@ -2281,6 +2407,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
        bool is_eth;
        bool is_vlan = false;
        bool is_grh;
+       bool is_udp = false;
+       int ip_version = 0;
 
        send_size = 0;
        for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2289,6 +2417,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
        is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
        is_grh = mlx4_ib_ah_grh_present(ah);
        if (is_eth) {
+               struct ib_gid_attr gid_attr;
+
                if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
                        /* When multi-function is enabled, the ib_core gid
                         * indexes don't necessarily match the hw ones, so
@@ -2302,19 +2432,35 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                        err = ib_get_cached_gid(ib_dev,
                                                be32_to_cpu(ah->av.ib.port_pd) >> 24,
                                                ah->av.ib.gid_index, &sgid,
-                                               NULL);
-                       if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
-                               err = -ENOENT;
-                       if (err)
+                                               &gid_attr);
+                       if (!err) {
+                               if (gid_attr.ndev)
+                                       dev_put(gid_attr.ndev);
+                               if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+                                       err = -ENOENT;
+                       }
+                       if (!err) {
+                               is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+                               if (is_udp) {
+                                       if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+                                               ip_version = 4;
+                                       else
+                                               ip_version = 6;
+                                       is_grh = false;
+                               }
+                       } else {
                                return err;
+                       }
                }
-
                if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
                        vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
                        is_vlan = 1;
                }
        }
-       ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+       err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+                         ip_version, is_udp, 0, &sqp->ud_header);
+       if (err)
+               return err;
 
        if (!is_eth) {
                sqp->ud_header.lrh.service_level =
@@ -2323,7 +2469,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
        }
 
-       if (is_grh) {
+       if (is_grh || (ip_version == 6)) {
                sqp->ud_header.grh.traffic_class =
                        (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
                sqp->ud_header.grh.flow_label    =
@@ -2352,6 +2498,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                       ah->av.ib.dgid, 16);
        }
 
+       if (ip_version == 4) {
+               sqp->ud_header.ip4.tos =
+                       (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+               sqp->ud_header.ip4.id = 0;
+               sqp->ud_header.ip4.frag_off = htons(IP_DF);
+               sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+               memcpy(&sqp->ud_header.ip4.saddr,
+                      sgid.raw + 12, 4);
+               memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+               sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+       }
+
+       if (is_udp) {
+               sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+               sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+               sqp->ud_header.udp.csum = 0;
+       }
+
        mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 
        if (!is_eth) {
@@ -2380,34 +2545,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 
        if (is_eth) {
                struct in6_addr in6;
-
+               u16 ether_type;
                u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
 
+               ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+                       (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
                mlx->sched_prio = cpu_to_be16(pcp);
 
+               ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
                memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
-               /* FIXME: cache smac value? */
                memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
                memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
                memcpy(&in6, sgid.raw, sizeof(in6));
 
-               if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
-                       u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
-                       u8 smac[ETH_ALEN];
-
-                       mlx4_u64_to_smac(smac, mac);
-                       memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
-               } else {
-                       /* use the src mac of the tunnel */
-                       memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
-               }
 
                if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
                        mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
                if (!is_vlan) {
-                       sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+                       sqp->ud_header.eth.type = cpu_to_be16(ether_type);
                } else {
-                       sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+                       sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
                        sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
                }
        } else {
@@ -2528,25 +2686,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
        fseg->reserved[1]       = 0;
 }
 
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
-               struct ib_bind_mw_wr *wr)
-{
-       bseg->flags1 =
-               convert_access(wr->bind_info.mw_access_flags) &
-               cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ  |
-                           MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
-                           MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
-       bseg->flags2 = 0;
-       if (wr->mw->type == IB_MW_TYPE_2)
-               bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
-       if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
-               bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
-       bseg->new_rkey = cpu_to_be32(wr->rkey);
-       bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
-       bseg->addr = cpu_to_be64(wr->bind_info.addr);
-       bseg->length = cpu_to_be64(wr->bind_info.length);
-}
-
 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
 {
        memset(iseg, 0, sizeof(*iseg));
@@ -2766,6 +2905,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        int i;
        struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 
+       if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+               if (sqp->roce_v2_gsi) {
+                       struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+                       struct ib_gid_attr gid_attr;
+                       union ib_gid gid;
+
+                       if (!ib_get_cached_gid(ibqp->device,
+                                              be32_to_cpu(ah->av.ib.port_pd) >> 24,
+                                              ah->av.ib.gid_index, &gid,
+                                              &gid_attr)) {
+                               if (gid_attr.ndev)
+                                       dev_put(gid_attr.ndev);
+                               qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+                                       to_mqp(sqp->roce_v2_gsi) : qp;
+                       } else {
+                               pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+                                      ah->av.ib.gid_index);
+                       }
+               }
+       }
+
        spin_lock_irqsave(&qp->sq.lock, flags);
        if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
                err = -EIO;
@@ -2867,13 +3029,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
                                break;
 
-                       case IB_WR_BIND_MW:
-                               ctrl->srcrb_flags |=
-                                       cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-                               set_bind_seg(wqe, bind_mw_wr(wr));
-                               wqe  += sizeof(struct mlx4_wqe_bind_seg);
-                               size += sizeof(struct mlx4_wqe_bind_seg) / 16;
-                               break;
                        default:
                                /* No extra segments required for sends */
                                break;
index c394376ebe06f159aebca07ef899ef5933386713..0597f3eef5d03ce4729c0fc0f5f0ffd076ae1176 100644 (file)
@@ -171,7 +171,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+               srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+                                       GFP_KERNEL | __GFP_NOWARN);
                if (!srq->wrid) {
                        srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
                                              GFP_KERNEL, PAGE_KERNEL);
index 66080580e24db3e90c887bcea7132e6687bc3d33..745efa4cfc718eec9b749508f54b091ed13332d3 100644 (file)
 
 #include "mlx5_ib.h"
 
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
-                          struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+                                 struct mlx5_ib_ah *ah,
+                                 struct ib_ah_attr *ah_attr,
+                                 enum rdma_link_layer ll)
 {
        if (ah_attr->ah_flags & IB_AH_GRH) {
                memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
                ah->av.tclass = ah_attr->grh.traffic_class;
        }
 
-       ah->av.rlid = cpu_to_be16(ah_attr->dlid);
-       ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
-       ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+       ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+               ah->av.udp_sport =
+                       mlx5_get_roce_udp_sport(dev,
+                                               ah_attr->port_num,
+                                               ah_attr->grh.sgid_index);
+               ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+       } else {
+               ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+               ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+               ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+       }
 
        return &ah->ibah;
 }
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 {
        struct mlx5_ib_ah *ah;
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       enum rdma_link_layer ll;
+
+       ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+       if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+               return ERR_PTR(-EINVAL);
 
        ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
        if (!ah)
                return ERR_PTR(-ENOMEM);
 
-       return create_ib_ah(ah_attr, ah); /* never fails */
+       return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
 }
 
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
index 92ddae101ecc7dc6032705e56a2c75d46fd0bf71..fd1de31e0611cbbd3d8691e595677d8e182f3198 100644 (file)
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                wc->opcode    = IB_WC_MASKED_FETCH_ADD;
                wc->byte_len  = 8;
                break;
-       case MLX5_OPCODE_BIND_MW:
-               wc->opcode    = IB_WC_BIND_MW;
-               break;
        case MLX5_OPCODE_UMR:
                wc->opcode = get_umr_comp(wq, idx);
                break;
@@ -171,6 +168,7 @@ enum {
 static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                             struct mlx5_ib_qp *qp)
 {
+       enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
        struct mlx5_ib_srq *srq;
        struct mlx5_ib_wq *wq;
@@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
        } else {
                wc->pkey_index = 0;
        }
+
+       if (ll != IB_LINK_LAYER_ETHERNET)
+               return;
+
+       switch (wc->sl & 0x3) {
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+               wc->network_hdr_type = RDMA_NETWORK_IB;
+               break;
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+               wc->network_hdr_type = RDMA_NETWORK_IPV6;
+               break;
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+               wc->network_hdr_type = RDMA_NETWORK_IPV4;
+               break;
+       }
+       wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
 }
 
 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -760,12 +774,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
        int eqn;
        int err;
 
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
        if (entries < 0)
                return ERR_PTR(-EINVAL);
 
+       if (check_cq_create_flags(attr->flags))
+               return ERR_PTR(-EOPNOTSUPP);
+
        entries = roundup_pow_of_two(entries + 1);
        if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
                return ERR_PTR(-EINVAL);
@@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
        spin_lock_init(&cq->lock);
        cq->resize_buf = NULL;
        cq->resize_umem = NULL;
+       cq->create_flags = attr->flags;
 
        if (context) {
                err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 
        cq->cqe_size = cqe_size;
        cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+       if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+               cqb->ctx.cqe_sz_flags |= (1 << 1);
+
        cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
        err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
        if (err)
index b0ec175cc6ba3b4c63ec8c5e22082a3603b5b473..03c418ccbc982e4114681044798c075a65357b28 100644 (file)
@@ -40,6 +40,8 @@
 #include <linux/io-mapping.h>
 #include <linux/sched.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
 #include <linux/mlx5/vport.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
@@ -66,12 +68,14 @@ static char mlx5_version[] =
        DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
        DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
 
+enum {
+       MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
 static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
 {
-       struct mlx5_ib_dev *dev = to_mdev(device);
-
-       switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+       switch (port_type_cap) {
        case MLX5_CAP_PORT_TYPE_IB:
                return IB_LINK_LAYER_INFINIBAND;
        case MLX5_CAP_PORT_TYPE_ETH:
@@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
        }
 }
 
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+       return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+                            unsigned long event, void *ptr)
+{
+       struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+       struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+                                                roce.nb);
+
+       if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+               return NOTIFY_DONE;
+
+       write_lock(&ibdev->roce.netdev_lock);
+       if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+               ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+       write_unlock(&ibdev->roce.netdev_lock);
+
+       return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+                                            u8 port_num)
+{
+       struct mlx5_ib_dev *ibdev = to_mdev(device);
+       struct net_device *ndev;
+
+       /* Ensure ndev does not disappear before we invoke dev_hold()
+        */
+       read_lock(&ibdev->roce.netdev_lock);
+       ndev = ibdev->roce.netdev;
+       if (ndev)
+               dev_hold(ndev);
+       read_unlock(&ibdev->roce.netdev_lock);
+
+       return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+                               struct ib_port_attr *props)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       struct net_device *ndev;
+       enum ib_mtu ndev_ib_mtu;
+       u16 qkey_viol_cntr;
+
+       memset(props, 0, sizeof(*props));
+
+       props->port_cap_flags  |= IB_PORT_CM_SUP;
+       props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
+
+       props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
+                                               roce_address_table_size);
+       props->max_mtu          = IB_MTU_4096;
+       props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+       props->pkey_tbl_len     = 1;
+       props->state            = IB_PORT_DOWN;
+       props->phys_state       = 3;
+
+       mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+       props->qkey_viol_cntr = qkey_viol_cntr;
+
+       ndev = mlx5_ib_get_netdev(device, port_num);
+       if (!ndev)
+               return 0;
+
+       if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+               props->state      = IB_PORT_ACTIVE;
+               props->phys_state = 5;
+       }
+
+       ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+       dev_put(ndev);
+
+       props->active_mtu       = min(props->max_mtu, ndev_ib_mtu);
+
+       props->active_width     = IB_WIDTH_4X;  /* TODO */
+       props->active_speed     = IB_SPEED_QDR; /* TODO */
+
+       return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+                                    const struct ib_gid_attr *attr,
+                                    void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+       char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+                                              source_l3_address);
+       void *mlx5_addr_mac     = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+                                              source_mac_47_32);
+
+       if (!gid)
+               return;
+
+       ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+       if (is_vlan_dev(attr->ndev)) {
+               MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+               MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+       }
+
+       switch (attr->gid_type) {
+       case IB_GID_TYPE_IB:
+               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+               break;
+       case IB_GID_TYPE_ROCE_UDP_ENCAP:
+               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+               break;
+
+       default:
+               WARN_ON(true);
+       }
+
+       if (attr->gid_type != IB_GID_TYPE_IB) {
+               if (ipv6_addr_v4mapped((void *)gid))
+                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
+                                   MLX5_ROCE_L3_TYPE_IPV4);
+               else
+                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
+                                   MLX5_ROCE_L3_TYPE_IPV6);
+       }
+
+       if ((attr->gid_type == IB_GID_TYPE_IB) ||
+           !ipv6_addr_v4mapped((void *)gid))
+               memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+       else
+               memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+                        unsigned int index,
+                        const union ib_gid *gid,
+                        const struct ib_gid_attr *attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
+       u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+       void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+       if (ll != IB_LINK_LAYER_ETHERNET)
+               return -EINVAL;
+
+       memset(in, 0, sizeof(in));
+
+       ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+       MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+       MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+       memset(out, 0, sizeof(out));
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+                          unsigned int index, const union ib_gid *gid,
+                          const struct ib_gid_attr *attr,
+                          __always_unused void **context)
+{
+       return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+                          unsigned int index, __always_unused void **context)
+{
+       return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+                              int index)
+{
+       struct ib_gid_attr attr;
+       union ib_gid gid;
+
+       if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+               return 0;
+
+       if (!attr.ndev)
+               return 0;
+
+       dev_put(attr.ndev);
+
+       if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+               return 0;
+
+       return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
        return !dev->mdev->issi;
@@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
        if (mlx5_use_mad_ifc(to_mdev(ibdev)))
                return MLX5_VPORT_ACCESS_METHOD_MAD;
 
-       if (mlx5_ib_port_link_layer(ibdev) ==
+       if (mlx5_ib_port_link_layer(ibdev, 1) ==
            IB_LINK_LAYER_ETHERNET)
                return MLX5_VPORT_ACCESS_METHOD_NIC;
 
        return MLX5_VPORT_ACCESS_METHOD_HCA;
 }
 
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+                           struct ib_device_attr *props)
+{
+       u8 tmp;
+       u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+       u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+       u8 atomic_req_8B_endianness_mode =
+               MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+       /* Check if HW supports 8 bytes standard atomic operations and capable
+        * of host endianness respond
+        */
+       tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+       if (((atomic_operations & tmp) == tmp) &&
+           (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+           (atomic_req_8B_endianness_mode)) {
+               props->atomic_cap = IB_ATOMIC_HCA;
+       } else {
+               props->atomic_cap = IB_ATOMIC_NONE;
+       }
+}
+
 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
                                        __be64 *sys_image_guid)
 {
@@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
 
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
-               if (!err)
-                       *sys_image_guid = cpu_to_be64(tmp);
-               return err;
+               break;
+
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+               break;
 
        default:
                return -EINVAL;
        }
+
+       if (!err)
+               *sys_image_guid = cpu_to_be64(tmp);
+
+       return err;
+
 }
 
 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
 
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
-               if (!err)
-                       *node_guid = cpu_to_be64(tmp);
-               return err;
+               break;
+
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+               break;
 
        default:
                return -EINVAL;
        }
+
+       if (!err)
+               *node_guid = cpu_to_be64(tmp);
+
+       return err;
 }
 
 struct mlx5_reg_node_desc {
@@ -263,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        if (MLX5_CAP_GEN(mdev, block_lb_mc))
                props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 
+       if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+           (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+                       props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
        props->vendor_part_id      = mdev->pdev->device;
        props->hw_ver              = mdev->pdev->revision;
 
@@ -278,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->max_sge = min(max_rq_sg, max_sq_sg);
        props->max_sge_rd = props->max_sge;
        props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
-       props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+       props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
        props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
        props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
        props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -289,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
        props->max_srq_sge         = max_rq_sg - 1;
        props->max_fast_reg_page_list_len = (unsigned int)-1;
-       props->atomic_cap          = IB_ATOMIC_NONE;
+       get_atomic_caps(dev, props);
        props->masked_atomic_cap   = IB_ATOMIC_NONE;
        props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
        props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
                                           props->max_mcast_grp;
        props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+       props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+       props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        if (MLX5_CAP_GEN(mdev, pg))
@@ -303,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->odp_caps = dev->odp_caps;
 #endif
 
+       if (MLX5_CAP_GEN(mdev, cd))
+               props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
        return 0;
 }
 
@@ -483,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                return mlx5_query_hca_port(ibdev, port, props);
 
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               return mlx5_query_port_roce(ibdev, port, props);
+
        default:
                return -EINVAL;
        }
@@ -583,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
                                                  struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
-       struct mlx5_ib_alloc_ucontext_req_v2 req;
-       struct mlx5_ib_alloc_ucontext_resp resp;
+       struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+       struct mlx5_ib_alloc_ucontext_resp resp = {};
        struct mlx5_ib_ucontext *context;
        struct mlx5_uuar_info *uuari;
        struct mlx5_uar *uars;
@@ -595,24 +844,28 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        int err;
        int i;
        size_t reqlen;
+       size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
+                                    max_cqe_version);
 
        if (!dev->ib_active)
                return ERR_PTR(-EAGAIN);
 
-       memset(&req, 0, sizeof(req));
+       if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+               return ERR_PTR(-EINVAL);
+
        reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
        if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
                ver = 0;
-       else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+       else if (reqlen >= min_req_v2)
                ver = 2;
        else
                return ERR_PTR(-EINVAL);
 
-       err = ib_copy_from_udata(&req, udata, reqlen);
+       err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
        if (err)
                return ERR_PTR(err);
 
-       if (req.flags || req.reserved)
+       if (req.flags)
                return ERR_PTR(-EINVAL);
 
        if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -621,6 +874,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        if (req.total_num_uuars == 0)
                return ERR_PTR(-EINVAL);
 
+       if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+               return ERR_PTR(-EOPNOTSUPP);
+
+       if (reqlen > sizeof(req) &&
+           !ib_is_udata_cleared(udata, sizeof(req),
+                                reqlen - sizeof(req)))
+               return ERR_PTR(-EOPNOTSUPP);
+
        req.total_num_uuars = ALIGN(req.total_num_uuars,
                                    MLX5_NON_FP_BF_REGS_PER_PAGE);
        if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -636,6 +897,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
        resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
        resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+       resp.cqe_version = min_t(__u8,
+                                (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+                                req.max_cqe_version);
+       resp.response_length = min(offsetof(typeof(resp), response_length) +
+                                  sizeof(resp.response_length), udata->outlen);
 
        context = kzalloc(sizeof(*context), GFP_KERNEL);
        if (!context)
@@ -681,22 +947,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
 #endif
 
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+               err = mlx5_core_alloc_transport_domain(dev->mdev,
+                                                      &context->tdn);
+               if (err)
+                       goto out_uars;
+       }
+
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
 
        resp.tot_uuars = req.total_num_uuars;
        resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
-       err = ib_copy_to_udata(udata, &resp,
-                              sizeof(resp) - sizeof(resp.reserved));
+
+       if (field_avail(typeof(resp), cqe_version, udata->outlen))
+               resp.response_length += sizeof(resp.cqe_version);
+
+       if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+               resp.comp_mask |=
+                       MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+               resp.hca_core_clock_offset =
+                       offsetof(struct mlx5_init_seg, internal_timer_h) %
+                       PAGE_SIZE;
+               resp.response_length += sizeof(resp.hca_core_clock_offset) +
+                                       sizeof(resp.reserved2) +
+                                       sizeof(resp.reserved3);
+       }
+
+       err = ib_copy_to_udata(udata, &resp, resp.response_length);
        if (err)
-               goto out_uars;
+               goto out_td;
 
        uuari->ver = ver;
        uuari->num_low_latency_uuars = req.num_low_latency_uuars;
        uuari->uars = uars;
        uuari->num_uars = num_uars;
+       context->cqe_version = resp.cqe_version;
+
        return &context->ibucontext;
 
+out_td:
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+               mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
 out_uars:
        for (i--; i >= 0; i--)
                mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -721,6 +1014,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
        struct mlx5_uuar_info *uuari = &context->uuari;
        int i;
 
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+               mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
        for (i = 0; i < uuari->num_uars; i++) {
                if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
                        mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -790,6 +1086,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
        case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
                return -ENOSYS;
 
+       case MLX5_IB_MMAP_CORE_CLOCK:
+               if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+                       return -EINVAL;
+
+               if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+                       return -EPERM;
+
+               /* Don't expose to user-space information it shouldn't have */
+               if (PAGE_SIZE > 4096)
+                       return -EOPNOTSUPP;
+
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+               pfn = (dev->mdev->iseg_base +
+                      offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+                       PAGE_SHIFT;
+               if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+                                      PAGE_SIZE, vma->vm_page_prot))
+                       return -EAGAIN;
+
+               mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+                           vma->vm_start,
+                           (unsigned long long)pfn << PAGE_SHIFT);
+               break;
+
        default:
                return -EINVAL;
        }
@@ -1758,6 +2078,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
        mlx5_ib_dealloc_pd(devr->p0);
 }
 
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+       u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+       u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+       u32 ret = 0;
+
+       if (ll == IB_LINK_LAYER_INFINIBAND)
+               return RDMA_CORE_PORT_IBA_IB;
+
+       if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+               return 0;
+
+       if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+               return 0;
+
+       if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+               ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+       if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+               ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+       return ret;
+}
+
 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
                               struct ib_port_immutable *immutable)
 {
@@ -1770,20 +2116,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+       immutable->core_cap_flags = get_core_cap_flags(ibdev);
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
 }
 
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+       int err;
+
+       dev->roce.nb.notifier_call = mlx5_netdev_event;
+       err = register_netdevice_notifier(&dev->roce.nb);
+       if (err)
+               return err;
+
+       err = mlx5_nic_vport_enable_roce(dev->mdev);
+       if (err)
+               goto err_unregister_netdevice_notifier;
+
+       return 0;
+
+err_unregister_netdevice_notifier:
+       unregister_netdevice_notifier(&dev->roce.nb);
+       return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+       mlx5_nic_vport_disable_roce(dev->mdev);
+       unregister_netdevice_notifier(&dev->roce.nb);
+}
+
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
        struct mlx5_ib_dev *dev;
+       enum rdma_link_layer ll;
+       int port_type_cap;
        int err;
        int i;
 
-       /* don't create IB instance over Eth ports, no RoCE yet! */
-       if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+       port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+       ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+       if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
                return NULL;
 
        printk_once(KERN_INFO "%s", mlx5_version);
@@ -1794,6 +2170,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
        dev->mdev = mdev;
 
+       rwlock_init(&dev->roce.netdev_lock);
        err = get_port_caps(dev);
        if (err)
                goto err_dealloc;
@@ -1839,11 +2216,18 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
                (1ull << IB_USER_VERBS_CMD_OPEN_QP);
        dev->ib_dev.uverbs_ex_cmd_mask =
-               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE)     |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ)        |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 
        dev->ib_dev.query_device        = mlx5_ib_query_device;
        dev->ib_dev.query_port          = mlx5_ib_query_port;
+       dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
        dev->ib_dev.query_gid           = mlx5_ib_query_gid;
+       dev->ib_dev.add_gid             = mlx5_ib_add_gid;
+       dev->ib_dev.del_gid             = mlx5_ib_del_gid;
        dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
        dev->ib_dev.modify_device       = mlx5_ib_modify_device;
        dev->ib_dev.modify_port         = mlx5_ib_modify_port;
@@ -1893,7 +2277,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                        (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
        }
 
-       if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+       if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
            IB_LINK_LAYER_ETHERNET) {
                dev->ib_dev.create_flow = mlx5_ib_create_flow;
                dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
@@ -1908,9 +2292,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
 
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               err = mlx5_enable_roce(dev);
+               if (err)
+                       goto err_dealloc;
+       }
+
        err = create_dev_resources(&dev->devr);
        if (err)
-               goto err_dealloc;
+               goto err_disable_roce;
 
        err = mlx5_ib_odp_init_one(dev);
        if (err)
@@ -1947,6 +2337,10 @@ err_odp:
 err_rsrc:
        destroy_dev_resources(&dev->devr);
 
+err_disable_roce:
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               mlx5_disable_roce(dev);
+
 err_dealloc:
        ib_dealloc_device((struct ib_device *)dev);
 
@@ -1956,11 +2350,14 @@ err_dealloc:
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
        struct mlx5_ib_dev *dev = context;
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
 
        ib_unregister_device(&dev->ib_dev);
        destroy_umrc_res(dev);
        mlx5_ib_odp_remove_one(dev);
        destroy_dev_resources(&dev->devr);
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               mlx5_disable_roce(dev);
        ib_dealloc_device(&dev->ib_dev);
 }
 
index 1474cccd1e0f3c4b5e9bc5b6575dc32fc8535349..d2b9737baa3675b1dc9ced794767f1790ada35ee 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/srq.h>
 #include <linux/types.h>
+#include <linux/mlx5/transobj.h>
 
 #define mlx5_ib_dbg(dev, format, arg...)                               \
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,   \
@@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,   \
 pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,    \
        __LINE__, current->pid, ##arg)
 
+#define field_avail(type, fld, sz) (offsetof(type, fld) +              \
+                                   sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
 enum {
        MLX5_IB_MMAP_CMD_SHIFT  = 8,
        MLX5_IB_MMAP_CMD_MASK   = 0xff,
@@ -62,7 +68,9 @@ enum {
 
 enum mlx5_ib_mmap_cmd {
        MLX5_IB_MMAP_REGULAR_PAGE               = 0,
-       MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES       = 1, /* always last */
+       MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES       = 1,
+       /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+       MLX5_IB_MMAP_CORE_CLOCK                 = 5,
 };
 
 enum {
@@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags {
        MLX5_MAD_IFC_NET_VIEW           = 4,
 };
 
+enum {
+       MLX5_CROSS_CHANNEL_UUAR         = 0,
+};
+
+enum {
+       MLX5_CQE_VERSION_V0,
+       MLX5_CQE_VERSION_V1,
+};
+
 struct mlx5_ib_ucontext {
        struct ib_ucontext      ibucontext;
        struct list_head        db_page_list;
@@ -93,6 +110,9 @@ struct mlx5_ib_ucontext {
         */
        struct mutex            db_page_mutex;
        struct mlx5_uuar_info   uuari;
+       u8                      cqe_version;
+       /* Transport Domain number */
+       u32                     tdn;
 };
 
 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -201,47 +221,70 @@ struct mlx5_ib_pfault {
        struct mlx5_pagefault   mpfault;
 };
 
+struct mlx5_ib_ubuffer {
+       struct ib_umem         *umem;
+       int                     buf_size;
+       u64                     buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+       struct mlx5_ib_qp       *container_mibqp;
+       struct mlx5_core_qp     mqp;
+       struct mlx5_ib_ubuffer  ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+       struct mlx5_ib_qp_base  base;
+       u16                     xrcdn;
+       u8                      alt_port;
+       u8                      atomic_rd_en;
+       u8                      resp_depth;
+};
+
 struct mlx5_ib_rq {
+       struct mlx5_ib_qp_base base;
+       struct mlx5_ib_wq       *rq;
+       struct mlx5_ib_ubuffer  ubuffer;
+       struct mlx5_db          *doorbell;
        u32                     tirn;
+       u8                      state;
+};
+
+struct mlx5_ib_sq {
+       struct mlx5_ib_qp_base base;
+       struct mlx5_ib_wq       *sq;
+       struct mlx5_ib_ubuffer  ubuffer;
+       struct mlx5_db          *doorbell;
+       u32                     tisn;
+       u8                      state;
 };
 
 struct mlx5_ib_raw_packet_qp {
+       struct mlx5_ib_sq sq;
        struct mlx5_ib_rq rq;
 };
 
 struct mlx5_ib_qp {
        struct ib_qp            ibqp;
        union {
-               struct mlx5_core_qp             mqp;
-               struct mlx5_ib_raw_packet_qp    raw_packet_qp;
+               struct mlx5_ib_qp_trans trans_qp;
+               struct mlx5_ib_raw_packet_qp raw_packet_qp;
        };
-
        struct mlx5_buf         buf;
 
        struct mlx5_db          db;
        struct mlx5_ib_wq       rq;
 
-       u32                     doorbell_qpn;
        u8                      sq_signal_bits;
        u8                      fm_cache;
-       int                     sq_max_wqes_per_wr;
-       int                     sq_spare_wqes;
        struct mlx5_ib_wq       sq;
 
-       struct ib_umem         *umem;
-       int                     buf_size;
-
        /* serialize qp state modifications
         */
        struct mutex            mutex;
-       u16                     xrcdn;
        u32                     flags;
        u8                      port;
-       u8                      alt_port;
-       u8                      atomic_rd_en;
-       u8                      resp_depth;
        u8                      state;
-       int                     mlx_type;
        int                     wq_sig;
        int                     scat_cqe;
        int                     max_inline_data;
@@ -284,6 +327,9 @@ struct mlx5_ib_cq_buf {
 enum mlx5_ib_qp_flags {
        MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 0,
        MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
+       MLX5_IB_QP_CROSS_CHANNEL                = 1 << 2,
+       MLX5_IB_QP_MANAGED_SEND                 = 1 << 3,
+       MLX5_IB_QP_MANAGED_RECV                 = 1 << 4,
 };
 
 struct mlx5_umr_wr {
@@ -326,6 +372,7 @@ struct mlx5_ib_cq {
        struct mlx5_ib_cq_buf  *resize_buf;
        struct ib_umem         *resize_umem;
        int                     cqe_size;
+       u32                     create_flags;
 };
 
 struct mlx5_ib_srq {
@@ -449,9 +496,19 @@ struct mlx5_ib_resources {
        struct ib_srq   *s1;
 };
 
+struct mlx5_roce {
+       /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+        * netdev pointer
+        */
+       rwlock_t                netdev_lock;
+       struct net_device       *netdev;
+       struct notifier_block   nb;
+};
+
 struct mlx5_ib_dev {
        struct ib_device                ib_dev;
        struct mlx5_core_dev            *mdev;
+       struct mlx5_roce                roce;
        MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
        int                             num_ports;
        /* serialize update of capability mask
@@ -498,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
 
 static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
 {
-       return container_of(mqp, struct mlx5_ib_qp, mqp);
+       return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
 }
 
 static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
@@ -550,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
                 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
                 const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
-                          struct mlx5_ib_ah *ah);
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
 int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -578,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
-                         void *buffer, u32 length);
+                         void *buffer, u32 length,
+                         struct mlx5_ib_qp_base *base);
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
                                const struct ib_cq_init_attr *attr,
                                struct ib_ucontext *context,
@@ -680,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+                              int index);
+
 static inline void init_query_mad(struct ib_smp *mad)
 {
        mad->base_version  = 1;
@@ -705,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type)
 #define MLX5_MAX_UMR_SHIFT 16
 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
 
+static inline u32 check_cq_create_flags(u32 flags)
+{
+       /*
+        * It returns non-zero value for unsupported CQ
+        * create flags, otherwise it returns zero.
+        */
+       return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+                         IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+                                    u32 *user_index)
+{
+       if (cqe_version) {
+               if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+                   (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+                       return -EINVAL;
+               *user_index = cmd_uidx;
+       } else {
+               *user_index = MLX5_IB_DEFAULT_UIDX;
+       }
+
+       return 0;
+}
 #endif /* MLX5_IB_H */
index aa8391e75385016bb11a2526f8660331193b7c4e..b8d76361a48dcfe29914ff13011b09d2893c851d 100644 (file)
@@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
 
 static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
                                      struct mlx5_ib_pfault *pfault,
-                                     int error) {
+                                     int error)
+{
        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
-       int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
+       int ret = mlx5_core_page_fault_resume(dev->mdev,
+                                             qpn,
                                              pfault->mpfault.flags,
                                              error);
        if (ret)
-               pr_err("Failed to resolve the page fault on QP 0x%x\n",
-                      qp->mqp.qpn);
+               pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
 }
 
 /*
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 #if defined(DEBUG)
        u32 ctrl_wqe_index, ctrl_qpn;
 #endif
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
 
        ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
        if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 
        if (ds == 0) {
                mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
-                           wqe_index, qp->mqp.qpn);
+                           wqe_index, qpn);
                return -EFAULT;
        }
 
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
                        MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
        if (wqe_index != ctrl_wqe_index) {
                mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
-                           wqe_index, qp->mqp.qpn,
+                           wqe_index, qpn,
                            ctrl_wqe_index);
                return -EFAULT;
        }
 
        ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
                MLX5_WQE_CTRL_QPN_SHIFT;
-       if (qp->mqp.qpn != ctrl_qpn) {
+       if (qpn != ctrl_qpn) {
                mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
-                           wqe_index, qp->mqp.qpn,
+                           wqe_index, qpn,
                            ctrl_qpn);
                return -EFAULT;
        }
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
        int resume_with_error = 0;
        u16 wqe_index = pfault->mpfault.wqe.wqe_index;
        int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
 
        buffer = (char *)__get_free_page(GFP_KERNEL);
        if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
        }
 
        ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
-                                   PAGE_SIZE);
+                                   PAGE_SIZE, &qp->trans_qp.base);
        if (ret < 0) {
                mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
-                           -ret, wqe_index, qp->mqp.qpn);
+                           -ret, wqe_index, qpn);
                resume_with_error = 1;
                goto resolve_page_fault;
        }
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
 resolve_page_fault:
        mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
        mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
-                   qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+                   qpn, resume_with_error,
+                   pfault->mpfault.flags);
 
        free_page((unsigned long)buffer);
 }
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
        qp->disable_page_faults = 1;
        spin_lock_init(&qp->disable_page_faults_lock);
 
-       qp->mqp.pfault_handler  = mlx5_ib_pfault_handler;
+       qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
 
        for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
                INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
index 307bdbca8938e1d87493d7048b93126bd9261e26..34cb8e87c7b8b673493285f807206c437ac42a84 100644 (file)
@@ -32,6 +32,8 @@
 
 #include <linux/module.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
 #include "mlx5_ib.h"
 #include "user.h"
 
@@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
  * Return: the number of bytes copied, or an error code.
  */
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
-                         void *buffer, u32 length)
+                         void *buffer, u32 length,
+                         struct mlx5_ib_qp_base *base)
 {
        struct ib_device *ibdev = qp->ibqp.device;
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
        size_t offset;
        size_t wq_end;
-       struct ib_umem *umem = qp->umem;
+       struct ib_umem *umem = base->ubuffer.umem;
        u32 first_copy_length;
        int wqe_length;
        int ret;
@@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
        struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
        struct ib_event event;
 
-       if (type == MLX5_EVENT_TYPE_PATH_MIG)
-               to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+       if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+               /* This event is only valid for trans_qps */
+               to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+       }
 
        if (ibqp->event_handler) {
                event.device     = ibqp->device;
@@ -265,8 +270,10 @@ static int sq_overhead(enum ib_qp_type qp_type)
                /* fall through */
        case IB_QPT_RC:
                size += sizeof(struct mlx5_wqe_ctrl_seg) +
-                       sizeof(struct mlx5_wqe_atomic_seg) +
-                       sizeof(struct mlx5_wqe_raddr_seg);
+                       max(sizeof(struct mlx5_wqe_atomic_seg) +
+                           sizeof(struct mlx5_wqe_raddr_seg),
+                           sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+                           sizeof(struct mlx5_mkey_seg));
                break;
 
        case IB_QPT_XRC_TGT:
@@ -274,9 +281,9 @@ static int sq_overhead(enum ib_qp_type qp_type)
 
        case IB_QPT_UC:
                size += sizeof(struct mlx5_wqe_ctrl_seg) +
-                       sizeof(struct mlx5_wqe_raddr_seg) +
-                       sizeof(struct mlx5_wqe_umr_ctrl_seg) +
-                       sizeof(struct mlx5_mkey_seg);
+                       max(sizeof(struct mlx5_wqe_raddr_seg),
+                           sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+                           sizeof(struct mlx5_mkey_seg));
                break;
 
        case IB_QPT_UD:
@@ -366,7 +373,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 
 static int set_user_buf_size(struct mlx5_ib_dev *dev,
                            struct mlx5_ib_qp *qp,
-                           struct mlx5_ib_create_qp *ucmd)
+                           struct mlx5_ib_create_qp *ucmd,
+                           struct mlx5_ib_qp_base *base,
+                           struct ib_qp_init_attr *attr)
 {
        int desc_sz = 1 << qp->sq.wqe_shift;
 
@@ -391,8 +400,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
                return -EINVAL;
        }
 
-       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
-               (qp->sq.wqe_cnt << 6);
+       if (attr->qp_type == IB_QPT_RAW_PACKET) {
+               base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+               qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+       } else {
+               base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+                                        (qp->sq.wqe_cnt << 6);
+       }
 
        return 0;
 }
@@ -578,8 +592,8 @@ static int to_mlx5_st(enum ib_qp_type type)
        case IB_QPT_SMI:                return MLX5_QP_ST_QP0;
        case IB_QPT_GSI:                return MLX5_QP_ST_QP1;
        case IB_QPT_RAW_IPV6:           return MLX5_QP_ST_RAW_IPV6;
-       case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
        case IB_QPT_RAW_PACKET:
+       case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
        case IB_QPT_MAX:
        default:                return -EINVAL;
        }
@@ -590,13 +604,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
        return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
 }
 
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+                           struct ib_pd *pd,
+                           unsigned long addr, size_t size,
+                           struct ib_umem **umem,
+                           int *npages, int *page_shift, int *ncont,
+                           u32 *offset)
+{
+       int err;
+
+       *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+       if (IS_ERR(*umem)) {
+               mlx5_ib_dbg(dev, "umem_get failed\n");
+               return PTR_ERR(*umem);
+       }
+
+       mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+       err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+       if (err) {
+               mlx5_ib_warn(dev, "bad offset\n");
+               goto err_umem;
+       }
+
+       mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+                   addr, size, *npages, *page_shift, *ncont, *offset);
+
+       return 0;
+
+err_umem:
+       ib_umem_release(*umem);
+       *umem = NULL;
+
+       return err;
+}
+
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                          struct mlx5_ib_qp *qp, struct ib_udata *udata,
+                         struct ib_qp_init_attr *attr,
                          struct mlx5_create_qp_mbox_in **in,
-                         struct mlx5_ib_create_qp_resp *resp, int *inlen)
+                         struct mlx5_ib_create_qp_resp *resp, int *inlen,
+                         struct mlx5_ib_qp_base *base)
 {
        struct mlx5_ib_ucontext *context;
        struct mlx5_ib_create_qp ucmd;
+       struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
        int page_shift = 0;
        int uar_index;
        int npages;
@@ -615,18 +667,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        /*
         * TBD: should come from the verbs when we have the API
         */
-       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
-       if (uuarn < 0) {
-               mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
-               mlx5_ib_dbg(dev, "reverting to medium latency\n");
-               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+               uuarn = MLX5_CROSS_CHANNEL_UUAR;
+       else {
+               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
                if (uuarn < 0) {
-                       mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
-                       mlx5_ib_dbg(dev, "reverting to high latency\n");
-                       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+                       mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+                       mlx5_ib_dbg(dev, "reverting to medium latency\n");
+                       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
                        if (uuarn < 0) {
-                               mlx5_ib_warn(dev, "uuar allocation failed\n");
-                               return uuarn;
+                               mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+                               mlx5_ib_dbg(dev, "reverting to high latency\n");
+                               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+                               if (uuarn < 0) {
+                                       mlx5_ib_warn(dev, "uuar allocation failed\n");
+                                       return uuarn;
+                               }
                        }
                }
        }
@@ -638,32 +695,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
 
-       err = set_user_buf_size(dev, qp, &ucmd);
+       err = set_user_buf_size(dev, qp, &ucmd, base, attr);
        if (err)
                goto err_uuar;
 
-       if (ucmd.buf_addr && qp->buf_size) {
-               qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
-                                      qp->buf_size, 0, 0);
-               if (IS_ERR(qp->umem)) {
-                       mlx5_ib_dbg(dev, "umem_get failed\n");
-                       err = PTR_ERR(qp->umem);
+       if (ucmd.buf_addr && ubuffer->buf_size) {
+               ubuffer->buf_addr = ucmd.buf_addr;
+               err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+                                      ubuffer->buf_size,
+                                      &ubuffer->umem, &npages, &page_shift,
+                                      &ncont, &offset);
+               if (err)
                        goto err_uuar;
-               }
        } else {
-               qp->umem = NULL;
-       }
-
-       if (qp->umem) {
-               mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
-                                  &ncont, NULL);
-               err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
-               if (err) {
-                       mlx5_ib_warn(dev, "bad offset\n");
-                       goto err_umem;
-               }
-               mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
-                           ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+               ubuffer->umem = NULL;
        }
 
        *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +717,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                err = -ENOMEM;
                goto err_umem;
        }
-       if (qp->umem)
-               mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+       if (ubuffer->umem)
+               mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+                                    (*in)->pas, 0);
        (*in)->ctx.log_pg_sz_remote_qpn =
                cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
        (*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +750,31 @@ err_free:
        kvfree(*in);
 
 err_umem:
-       if (qp->umem)
-               ib_umem_release(qp->umem);
+       if (ubuffer->umem)
+               ib_umem_release(ubuffer->umem);
 
 err_uuar:
        free_uuar(&context->uuari, uuarn);
        return err;
 }
 
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+                           struct mlx5_ib_qp_base *base)
 {
        struct mlx5_ib_ucontext *context;
 
        context = to_mucontext(pd->uobject->context);
        mlx5_ib_db_unmap_user(context, &qp->db);
-       if (qp->umem)
-               ib_umem_release(qp->umem);
+       if (base->ubuffer.umem)
+               ib_umem_release(base->ubuffer.umem);
        free_uuar(&context->uuari, qp->uuarn);
 }
 
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
                            struct ib_qp_init_attr *init_attr,
                            struct mlx5_ib_qp *qp,
-                           struct mlx5_create_qp_mbox_in **in, int *inlen)
+                           struct mlx5_create_qp_mbox_in **in, int *inlen,
+                           struct mlx5_ib_qp_base *base)
 {
        enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
        struct mlx5_uuar_info *uuari;
@@ -758,9 +806,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 
        qp->rq.offset = 0;
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
-       qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+       base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
 
-       err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+       err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
        if (err) {
                mlx5_ib_dbg(dev, "err %d\n", err);
                goto err_uuar;
@@ -853,19 +901,304 @@ static int is_connected(enum ib_qp_type qp_type)
        return 0;
 }
 
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+                                   struct mlx5_ib_sq *sq, u32 tdn)
+{
+       u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+       void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+       memset(in, 0, sizeof(in));
+
+       MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+       return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+                                     struct mlx5_ib_sq *sq)
+{
+       mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+                                  struct mlx5_ib_sq *sq, void *qpin,
+                                  struct ib_pd *pd)
+{
+       struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+       __be64 *pas;
+       void *in;
+       void *sqc;
+       void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+       void *wq;
+       int inlen;
+       int err;
+       int page_shift = 0;
+       int npages;
+       int ncont = 0;
+       u32 offset = 0;
+
+       err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+                              &sq->ubuffer.umem, &npages, &page_shift,
+                              &ncont, &offset);
+       if (err)
+               return err;
+
+       inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_umem;
+       }
+
+       sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+       MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+       MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+       MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+       MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+       MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+       MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+       wq = MLX5_ADDR_OF(sqc, sqc, wq);
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+       MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+       MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+       MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+       MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+       MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+       MLX5_SET(wq, wq, log_wq_pg_sz,  page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+       MLX5_SET(wq, wq, page_offset, offset);
+
+       pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+       mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+       err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+       kvfree(in);
+
+       if (err)
+               goto err_umem;
+
+       return 0;
+
+err_umem:
+       ib_umem_release(sq->ubuffer.umem);
+       sq->ubuffer.umem = NULL;
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_sq *sq)
+{
+       mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+       ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+       u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+       u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+       u32 log_rq_size   = MLX5_GET(qpc, qpc, log_rq_size);
+       u32 page_offset   = MLX5_GET(qpc, qpc, page_offset);
+       u32 po_quanta     = 1 << (log_page_size - 6);
+       u32 rq_sz         = 1 << (log_rq_size + 4 + log_rq_stride);
+       u32 page_size     = 1 << log_page_size;
+       u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
+       u32 rq_num_pas    = (rq_sz_po + page_size - 1) / page_size;
+
+       return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+                                  struct mlx5_ib_rq *rq, void *qpin)
+{
+       __be64 *pas;
+       __be64 *qp_pas;
+       void *in;
+       void *rqc;
+       void *wq;
+       void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+       int inlen;
+       int err;
+       u32 rq_pas_size = get_rq_pas_size(qpc);
+
+       inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+       MLX5_SET(rqc, rqc, vsd, 1);
+       MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+       MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+       MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+       MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+       MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+       wq = MLX5_ADDR_OF(rqc, rqc, wq);
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+       MLX5_SET(wq, wq, end_padding_mode,
+                MLX5_GET(qpc, qpc, end_padding_mode));
+       MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+       MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+       MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+       MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+       MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+       MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+       pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+       qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+       memcpy(pas, qp_pas, rq_pas_size);
+
+       err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+       kvfree(in);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_rq *rq)
+{
+       mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+                                   struct mlx5_ib_rq *rq, u32 tdn)
+{
+       u32 *in;
+       void *tirc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+       MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+       MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+       err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+       kvfree(in);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+                                     struct mlx5_ib_rq *rq)
+{
+       mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               struct mlx5_create_qp_mbox_in *in,
+                               struct ib_pd *pd)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       struct ib_uobject *uobj = pd->uobject;
+       struct ib_ucontext *ucontext = uobj->context;
+       struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+       int err;
+       u32 tdn = mucontext->tdn;
+
+       if (qp->sq.wqe_cnt) {
+               err = create_raw_packet_qp_tis(dev, sq, tdn);
+               if (err)
+                       return err;
+
+               err = create_raw_packet_qp_sq(dev, sq, in, pd);
+               if (err)
+                       goto err_destroy_tis;
+
+               sq->base.container_mibqp = qp;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err = create_raw_packet_qp_rq(dev, rq, in);
+               if (err)
+                       goto err_destroy_sq;
+
+               rq->base.container_mibqp = qp;
+
+               err = create_raw_packet_qp_tir(dev, rq, tdn);
+               if (err)
+                       goto err_destroy_rq;
+       }
+
+       qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+                                                    rq->base.mqp.qpn;
+
+       return 0;
+
+err_destroy_rq:
+       destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+       if (!qp->sq.wqe_cnt)
+               return err;
+       destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+       destroy_raw_packet_qp_tis(dev, sq);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+                                 struct mlx5_ib_qp *qp)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+       if (qp->rq.wqe_cnt) {
+               destroy_raw_packet_qp_tir(dev, rq);
+               destroy_raw_packet_qp_rq(dev, rq);
+       }
+
+       if (qp->sq.wqe_cnt) {
+               destroy_raw_packet_qp_sq(dev, sq);
+               destroy_raw_packet_qp_tis(dev, sq);
+       }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+                                   struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+       sq->sq = &qp->sq;
+       rq->rq = &qp->rq;
+       sq->doorbell = &qp->db;
+       rq->doorbell = &qp->db;
+}
+
 static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                            struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_resources *devr = &dev->devr;
        struct mlx5_core_dev *mdev = dev->mdev;
+       struct mlx5_ib_qp_base *base;
        struct mlx5_ib_create_qp_resp resp;
        struct mlx5_create_qp_mbox_in *in;
        struct mlx5_ib_create_qp ucmd;
        int inlen = sizeof(*in);
        int err;
+       u32 uidx = MLX5_IB_DEFAULT_UIDX;
+       void *qpc;
+
+       base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+              &qp->raw_packet_qp.rq.base :
+              &qp->trans_qp.base;
 
-       mlx5_ib_odp_create_qp(qp);
+       if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+               mlx5_ib_odp_create_qp(qp);
 
        mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
@@ -880,6 +1213,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                }
        }
 
+       if (init_attr->create_flags &
+                       (IB_QP_CREATE_CROSS_CHANNEL |
+                        IB_QP_CREATE_MANAGED_SEND |
+                        IB_QP_CREATE_MANAGED_RECV)) {
+               if (!MLX5_CAP_GEN(mdev, cd)) {
+                       mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+                       return -EINVAL;
+               }
+               if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+                       qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+               if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+                       qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+               if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+                       qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+       }
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
@@ -889,6 +1237,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                        return -EFAULT;
                }
 
+               err = get_qp_user_index(to_mucontext(pd->uobject->context),
+                                       &ucmd, udata->inlen, &uidx);
+               if (err)
+                       return err;
+
                qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
                qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
        } else {
@@ -918,11 +1271,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                                            ucmd.sq_wqe_count, max_wqes);
                                return -EINVAL;
                        }
-                       err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+                       err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+                                            &resp, &inlen, base);
                        if (err)
                                mlx5_ib_dbg(dev, "err %d\n", err);
                } else {
-                       err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+                       err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+                                              base);
                        if (err)
                                mlx5_ib_dbg(dev, "err %d\n", err);
                }
@@ -954,6 +1309,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
                in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
 
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+       if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+       if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
        if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
                int rcqe_sz;
                int scqe_sz;
@@ -1018,26 +1380,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
        in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
 
-       err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+       if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+               /* 0xffffff means we ask to work with cqe version 0 */
+               MLX5_SET(qpc, qpc, user_index, uidx);
+       }
+
+       if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+               qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+               raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+               err = create_raw_packet_qp(dev, qp, in, pd);
+       } else {
+               err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+       }
+
        if (err) {
                mlx5_ib_dbg(dev, "create qp failed\n");
                goto err_create;
        }
 
        kvfree(in);
-       /* Hardware wants QPN written in big-endian order (after
-        * shifting) for send doorbell.  Precompute this value to save
-        * a little bit when posting sends.
-        */
-       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
 
-       qp->mqp.event = mlx5_ib_qp_event;
+       base->container_mibqp = qp;
+       base->mqp.event = mlx5_ib_qp_event;
 
        return 0;
 
 err_create:
        if (qp->create_type == MLX5_QP_USER)
-               destroy_qp_user(pd, qp);
+               destroy_qp_user(pd, qp, base);
        else if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
 
@@ -1129,11 +1500,11 @@ static void get_cqs(struct mlx5_ib_qp *qp,
        case IB_QPT_UD:
        case IB_QPT_RAW_IPV6:
        case IB_QPT_RAW_ETHERTYPE:
+       case IB_QPT_RAW_PACKET:
                *send_cq = to_mcq(qp->ibqp.send_cq);
                *recv_cq = to_mcq(qp->ibqp.recv_cq);
                break;
 
-       case IB_QPT_RAW_PACKET:
        case IB_QPT_MAX:
        default:
                *send_cq = NULL;
@@ -1142,45 +1513,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
        }
 }
 
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               u16 operation);
+
 static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_cq *send_cq, *recv_cq;
+       struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_modify_qp_mbox_in *in;
        int err;
 
+       base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+              &qp->raw_packet_qp.rq.base :
+              &qp->trans_qp.base;
+
        in = kzalloc(sizeof(*in), GFP_KERNEL);
        if (!in)
                return;
 
        if (qp->state != IB_QPS_RESET) {
-               mlx5_ib_qp_disable_pagefaults(qp);
-               if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
-                                       MLX5_QP_STATE_RST, in, 0, &qp->mqp))
-                       mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
-                                    qp->mqp.qpn);
+               if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+                       mlx5_ib_qp_disable_pagefaults(qp);
+                       err = mlx5_core_qp_modify(dev->mdev,
+                                                 MLX5_CMD_OP_2RST_QP, in, 0,
+                                                 &base->mqp);
+               } else {
+                       err = modify_raw_packet_qp(dev, qp,
+                                                  MLX5_CMD_OP_2RST_QP);
+               }
+               if (err)
+                       mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+                                    base->mqp.qpn);
        }
 
        get_cqs(qp, &send_cq, &recv_cq);
 
        if (qp->create_type == MLX5_QP_KERNEL) {
                mlx5_ib_lock_cqs(send_cq, recv_cq);
-               __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+               __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
                                   qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
                if (send_cq != recv_cq)
-                       __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+                       __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+                                          NULL);
                mlx5_ib_unlock_cqs(send_cq, recv_cq);
        }
 
-       err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
-       if (err)
-               mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
-       kfree(in);
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+               destroy_raw_packet_qp(dev, qp);
+       } else {
+               err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+               if (err)
+                       mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+                                    base->mqp.qpn);
+       }
 
+       kfree(in);
 
        if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
        else if (qp->create_type == MLX5_QP_USER)
-               destroy_qp_user(&get_pd(qp)->ibpd, qp);
+               destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
 }
 
 static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1225,6 +1617,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 
        if (pd) {
                dev = to_mdev(pd->device);
+
+               if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+                       if (!pd->uobject) {
+                               mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+                               return ERR_PTR(-EINVAL);
+                       } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+                               mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+                               return ERR_PTR(-EINVAL);
+                       }
+               }
        } else {
                /* being cautious here */
                if (init_attr->qp_type != IB_QPT_XRC_TGT &&
@@ -1250,6 +1652,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                }
 
                /* fall through */
+       case IB_QPT_RAW_PACKET:
        case IB_QPT_RC:
        case IB_QPT_UC:
        case IB_QPT_UD:
@@ -1272,19 +1675,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                else if (is_qp1(init_attr->qp_type))
                        qp->ibqp.qp_num = 1;
                else
-                       qp->ibqp.qp_num = qp->mqp.qpn;
+                       qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
 
                mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
-                           qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+                           qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+                           to_mcq(init_attr->recv_cq)->mcq.cqn,
                            to_mcq(init_attr->send_cq)->mcq.cqn);
 
-               qp->xrcdn = xrcdn;
+               qp->trans_qp.xrcdn = xrcdn;
 
                break;
 
        case IB_QPT_RAW_IPV6:
        case IB_QPT_RAW_ETHERTYPE:
-       case IB_QPT_RAW_PACKET:
        case IB_QPT_MAX:
        default:
                mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1318,12 +1721,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
                dest_rd_atomic = attr->max_dest_rd_atomic;
        else
-               dest_rd_atomic = qp->resp_depth;
+               dest_rd_atomic = qp->trans_qp.resp_depth;
 
        if (attr_mask & IB_QP_ACCESS_FLAGS)
                access_flags = attr->qp_access_flags;
        else
-               access_flags = qp->atomic_rd_en;
+               access_flags = qp->trans_qp.atomic_rd_en;
 
        if (!dest_rd_atomic)
                access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1763,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
        return rate + MLX5_STAT_RATE_OFFSET;
 }
 
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+                                     struct mlx5_ib_sq *sq, u8 sl)
+{
+       void *in;
+       void *tisc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+       tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+       MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+       err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+       kvfree(in);
+
+       return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                        const struct ib_ah_attr *ah,
                         struct mlx5_qp_path *path, u8 port, int attr_mask,
                         u32 path_flags, const struct ib_qp_attr *attr)
 {
+       enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
        int err;
 
-       path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
-       path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
        if (attr_mask & IB_QP_PKEY_INDEX)
                path->pkey_index = attr->pkey_index;
 
-       path->grh_mlid  = ah->src_path_bits & 0x7f;
-       path->rlid      = cpu_to_be16(ah->dlid);
-
        if (ah->ah_flags & IB_AH_GRH) {
                if (ah->grh.sgid_index >=
                    dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1807,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
                               dev->mdev->port_caps[port - 1].gid_table_len);
                        return -EINVAL;
                }
-               path->grh_mlid |= 1 << 7;
+       }
+
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               if (!(ah->ah_flags & IB_AH_GRH))
+                       return -EINVAL;
+               memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+               path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+                                                         ah->grh.sgid_index);
+               path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+       } else {
+               path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+               path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+                                                                       0;
+               path->rlid = cpu_to_be16(ah->dlid);
+               path->grh_mlid = ah->src_path_bits & 0x7f;
+               if (ah->ah_flags & IB_AH_GRH)
+                       path->grh_mlid  |= 1 << 7;
+               path->dci_cfi_prio_sl = ah->sl & 0xf;
+       }
+
+       if (ah->ah_flags & IB_AH_GRH) {
                path->mgid_index = ah->grh.sgid_index;
                path->hop_limit  = ah->grh.hop_limit;
                path->tclass_flowlabel =
@@ -1401,7 +1845,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
        if (attr_mask & IB_QP_TIMEOUT)
                path->ackto_lt = attr->timeout << 3;
 
-       path->sl = ah->sl & 0xf;
+       if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+               return modify_raw_packet_eth_prio(dev->mdev,
+                                                 &qp->raw_packet_qp.sq,
+                                                 ah->sl & 0xf);
 
        return 0;
 }
@@ -1549,12 +1996,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
        return result;
 }
 
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+                                  struct mlx5_ib_rq *rq, int new_state)
+{
+       void *in;
+       void *rqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+       rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+       MLX5_SET(rqc, rqc, state, new_state);
+
+       err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+       if (err)
+               goto out;
+
+       rq->state = new_state;
+
+out:
+       kvfree(in);
+       return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+                                  struct mlx5_ib_sq *sq, int new_state)
+{
+       void *in;
+       void *sqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+       sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+       MLX5_SET(sqc, sqc, state, new_state);
+
+       err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+       if (err)
+               goto out;
+
+       sq->state = new_state;
+
+out:
+       kvfree(in);
+       return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               u16 operation)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       int rq_state;
+       int sq_state;
+       int err;
+
+       switch (operation) {
+       case MLX5_CMD_OP_RST2INIT_QP:
+               rq_state = MLX5_RQC_STATE_RDY;
+               sq_state = MLX5_SQC_STATE_RDY;
+               break;
+       case MLX5_CMD_OP_2ERR_QP:
+               rq_state = MLX5_RQC_STATE_ERR;
+               sq_state = MLX5_SQC_STATE_ERR;
+               break;
+       case MLX5_CMD_OP_2RST_QP:
+               rq_state = MLX5_RQC_STATE_RST;
+               sq_state = MLX5_SQC_STATE_RST;
+               break;
+       case MLX5_CMD_OP_INIT2INIT_QP:
+       case MLX5_CMD_OP_INIT2RTR_QP:
+       case MLX5_CMD_OP_RTR2RTS_QP:
+       case MLX5_CMD_OP_RTS2RTS_QP:
+               /* Nothing to do here... */
+               return 0;
+       default:
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err =  modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+               if (err)
+                       return err;
+       }
+
+       if (qp->sq.wqe_cnt)
+               return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+       return 0;
+}
+
 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
 {
+       static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+               [MLX5_QP_STATE_RST] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
+               },
+               [MLX5_QP_STATE_INIT]  = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
+                       [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
+               },
+               [MLX5_QP_STATE_RTR]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
+               },
+               [MLX5_QP_STATE_RTS]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
+               },
+               [MLX5_QP_STATE_SQD] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+               },
+               [MLX5_QP_STATE_SQER] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
+               },
+               [MLX5_QP_STATE_ERR] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+               }
+       };
+
        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_qp_context *context;
        struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2153,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        int sqd_event;
        int mlx5_st;
        int err;
+       u16 op;
 
        in = kzalloc(sizeof(*in), GFP_KERNEL);
        if (!in)
@@ -1623,7 +2213,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                context->pri_path.port = attr->port_num;
 
        if (attr_mask & IB_QP_AV) {
-               err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+               err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
                                    attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
                                    attr_mask, 0, attr);
                if (err)
@@ -1634,7 +2224,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                context->pri_path.ackto_lt |= attr->timeout << 3;
 
        if (attr_mask & IB_QP_ALT_PATH) {
-               err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+               err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+                                   &context->alt_path,
                                    attr->alt_port_num, attr_mask, 0, attr);
                if (err)
                        goto out;
@@ -1706,41 +2297,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
         * again to RTS, and may cause the driver and the device to get out of
         * sync. */
        if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
-           (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+           (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+           (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
                mlx5_ib_qp_disable_pagefaults(qp);
 
+       if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+           !optab[mlx5_cur][mlx5_new])
+               goto out;
+
+       op = optab[mlx5_cur][mlx5_new];
        optpar = ib_mask_to_mlx5_opt(attr_mask);
        optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
        in->optparam = cpu_to_be32(optpar);
-       err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
-                                 to_mlx5_state(new_state), in, sqd_event,
-                                 &qp->mqp);
+
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+               err = modify_raw_packet_qp(dev, qp, op);
+       else
+               err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+                                         &base->mqp);
        if (err)
                goto out;
 
-       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+           (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
                mlx5_ib_qp_enable_pagefaults(qp);
 
        qp->state = new_state;
 
        if (attr_mask & IB_QP_ACCESS_FLAGS)
-               qp->atomic_rd_en = attr->qp_access_flags;
+               qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               qp->resp_depth = attr->max_dest_rd_atomic;
+               qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
        if (attr_mask & IB_QP_PORT)
                qp->port = attr->port_num;
        if (attr_mask & IB_QP_ALT_PATH)
-               qp->alt_port = attr->alt_port_num;
+               qp->trans_qp.alt_port = attr->alt_port_num;
 
        /*
         * If we moved a kernel QP to RESET, clean up all old CQ
         * entries and reinitialize the QP.
         */
        if (new_state == IB_QPS_RESET && !ibqp->uobject) {
-               mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+               mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
                                 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
                if (send_cq != recv_cq)
-                       mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+                       mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
 
                qp->rq.head = 0;
                qp->rq.tail = 0;
@@ -1765,15 +2366,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        enum ib_qp_state cur_state, new_state;
        int err = -EINVAL;
        int port;
+       enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
 
        mutex_lock(&qp->mutex);
 
        cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
+       if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+               port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+               ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+       }
+
        if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
            !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
-                               IB_LINK_LAYER_UNSPECIFIED))
+                               ll))
                goto out;
 
        if ((attr_mask & IB_QP_PORT) &&
@@ -2570,7 +3177,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
 
        ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
                                             mlx5_opcode | ((u32)opmod << 24));
-       ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+       ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
        ctrl->fm_ce_se |= fence;
        qp->fm_cache = next_fence;
        if (unlikely(qp->wq_sig))
@@ -3003,7 +3610,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
            ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
                return;
 
-       ib_ah_attr->sl = path->sl & 0xf;
+       ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
 
        ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
        ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3021,39 +3628,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
        }
 }
 
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
-                    struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+                                       struct mlx5_ib_sq *sq,
+                                       u8 *sq_state)
+{
+       void *out;
+       void *sqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+       out = mlx5_vzalloc(inlen);
+       if (!out)
+               return -ENOMEM;
+
+       err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+       if (err)
+               goto out;
+
+       sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+       *sq_state = MLX5_GET(sqc, sqc, state);
+       sq->state = *sq_state;
+
+out:
+       kvfree(out);
+       return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+                                       struct mlx5_ib_rq *rq,
+                                       u8 *rq_state)
+{
+       void *out;
+       void *rqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+       out = mlx5_vzalloc(inlen);
+       if (!out)
+               return -ENOMEM;
+
+       err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+       if (err)
+               goto out;
+
+       rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+       *rq_state = MLX5_GET(rqc, rqc, state);
+       rq->state = *rq_state;
+
+out:
+       kvfree(out);
+       return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+                                 struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+       static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+               [MLX5_RQC_STATE_RST] = {
+                       [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQ_STATE_NA]      = IB_QPS_RESET,
+               },
+               [MLX5_RQC_STATE_RDY] = {
+                       [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
+                       [MLX5_SQC_STATE_ERR]    = IB_QPS_SQE,
+                       [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE,
+               },
+               [MLX5_RQC_STATE_ERR] = {
+                       [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_ERR]    = IB_QPS_ERR,
+                       [MLX5_SQ_STATE_NA]      = IB_QPS_ERR,
+               },
+               [MLX5_RQ_STATE_NA] = {
+                       [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
+                       [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE,
+                       [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE_BAD,
+               },
+       };
+
+       *qp_state = sqrq_trans[rq_state][sq_state];
+
+       if (*qp_state == MLX5_QP_STATE_BAD) {
+               WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+                    qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+                    qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+               return -EINVAL;
+       }
+
+       if (*qp_state == MLX5_QP_STATE)
+               *qp_state = qp->state;
+
+       return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_qp *qp,
+                                    u8 *raw_packet_qp_state)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       int err;
+       u8 sq_state = MLX5_SQ_STATE_NA;
+       u8 rq_state = MLX5_RQ_STATE_NA;
+
+       if (qp->sq.wqe_cnt) {
+               err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+               if (err)
+                       return err;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+               if (err)
+                       return err;
+       }
+
+       return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+                                     raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                        struct ib_qp_attr *qp_attr)
 {
-       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
-       struct mlx5_ib_qp *qp = to_mqp(ibqp);
        struct mlx5_query_qp_mbox_out *outb;
        struct mlx5_qp_context *context;
        int mlx5_state;
        int err = 0;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       /*
-        * Wait for any outstanding page faults, in case the user frees memory
-        * based upon this query's result.
-        */
-       flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
-       mutex_lock(&qp->mutex);
        outb = kzalloc(sizeof(*outb), GFP_KERNEL);
-       if (!outb) {
-               err = -ENOMEM;
-               goto out;
-       }
+       if (!outb)
+               return -ENOMEM;
+
        context = &outb->ctx;
-       err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+       err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+                                sizeof(*outb));
        if (err)
-               goto out_free;
+               goto out;
 
        mlx5_state = be32_to_cpu(context->flags) >> 28;
 
        qp->state                    = to_ib_qp_state(mlx5_state);
-       qp_attr->qp_state            = qp->state;
        qp_attr->path_mtu            = context->mtu_msgmax >> 5;
        qp_attr->path_mig_state      =
                to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3087,6 +3808,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
        qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
        qp_attr->rnr_retry          = (be32_to_cpu(context->params1) >> 13) & 0x7;
        qp_attr->alt_timeout        = context->alt_path.ackto_lt >> 3;
+
+out:
+       kfree(outb);
+       return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+                    int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       int err = 0;
+       u8 raw_packet_qp_state;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       /*
+        * Wait for any outstanding page faults, in case the user frees memory
+        * based upon this query's result.
+        */
+       flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+       mutex_lock(&qp->mutex);
+
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+               err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+               if (err)
+                       goto out;
+               qp->state = raw_packet_qp_state;
+               qp_attr->port_num = 1;
+       } else {
+               err = query_qp_attr(dev, qp, qp_attr);
+               if (err)
+                       goto out;
+       }
+
+       qp_attr->qp_state            = qp->state;
        qp_attr->cur_qp_state        = qp_attr->qp_state;
        qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
        qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
@@ -3110,12 +3868,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
                qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+       if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+               qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+       if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+               qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
        qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
                IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 
-out_free:
-       kfree(outb);
-
 out:
        mutex_unlock(&qp->mutex);
        return err;
index e008505e96e9778ff9a71cfc021de7ecee6c3331..4659256cd95e698c3a9dad3c69407276e2117f29 100644 (file)
@@ -78,28 +78,41 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                           struct ib_udata *udata, int buf_size, int *inlen)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_ib_create_srq ucmd;
+       struct mlx5_ib_create_srq ucmd = {};
        size_t ucmdlen;
+       void *xsrqc;
        int err;
        int npages;
        int page_shift;
        int ncont;
        u32 offset;
+       u32 uidx = MLX5_IB_DEFAULT_UIDX;
+       int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 
-       ucmdlen =
-               (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
-                sizeof(ucmd)) ? (sizeof(ucmd) -
-                                 sizeof(ucmd.reserved)) : sizeof(ucmd);
+       if (drv_data < 0)
+               return -EINVAL;
+
+       ucmdlen = (drv_data < sizeof(ucmd)) ?
+                 drv_data : sizeof(ucmd);
 
        if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
                mlx5_ib_dbg(dev, "failed copy udata\n");
                return -EFAULT;
        }
 
-       if (ucmdlen == sizeof(ucmd) &&
-           ucmd.reserved != 0)
+       if (ucmd.reserved0 || ucmd.reserved1)
                return -EINVAL;
 
+       if (drv_data > sizeof(ucmd) &&
+           !ib_is_udata_cleared(udata, sizeof(ucmd),
+                                drv_data - sizeof(ucmd)))
+               return -EINVAL;
+
+       err = get_srq_user_index(to_mucontext(pd->uobject->context),
+                                &ucmd, udata->inlen, &uidx);
+       if (err)
+               return err;
+
        srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
        srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +151,12 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
        (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
+       if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+                                    xrc_srq_context_entry);
+               MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+       }
+
        return 0;
 
 err_in:
@@ -158,6 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        struct mlx5_wqe_srq_next_seg *next;
        int page_shift;
        int npages;
+       void *xsrqc;
 
        err = mlx5_db_alloc(dev->mdev, &srq->db);
        if (err) {
@@ -204,6 +224,13 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 
        (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
+       if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+                                    xrc_srq_context_entry);
+               /* 0xffffff means we ask to work with cqe version 0 */
+               MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+       }
+
        return 0;
 
 err_in:
index 76fb7b927d373ef2710d412ec015309fb24fdd72..b94a55404a596dab323a843dec9a817fddbaff74 100644 (file)
@@ -35,6 +35,8 @@
 
 #include <linux/types.h>
 
+#include "mlx5_ib.h"
+
 enum {
        MLX5_QP_FLAG_SIGNATURE          = 1 << 0,
        MLX5_QP_FLAG_SCATTER_CQE        = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
        __u32   total_num_uuars;
        __u32   num_low_latency_uuars;
        __u32   flags;
-       __u32   reserved;
+       __u32   comp_mask;
+       __u8    max_cqe_version;
+       __u8    reserved0;
+       __u16   reserved1;
+       __u32   reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+       MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
 };
 
 struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
        __u32   max_recv_wr;
        __u32   max_srq_recv_wr;
        __u16   num_ports;
-       __u16   reserved;
+       __u16   reserved1;
+       __u32   comp_mask;
+       __u32   response_length;
+       __u8    cqe_version;
+       __u8    reserved2;
+       __u16   reserved3;
+       __u64   hca_core_clock_offset;
 };
 
 struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
        __u64   buf_addr;
        __u64   db_addr;
        __u32   flags;
-       __u32   reserved; /* explicit padding (optional on i386) */
+       __u32   reserved0; /* explicit padding (optional on i386) */
+       __u32   uidx;
+       __u32   reserved1;
 };
 
 struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,48 @@ struct mlx5_ib_create_qp {
        __u32   rq_wqe_count;
        __u32   rq_wqe_shift;
        __u32   flags;
+       __u32   uidx;
+       __u32   reserved0;
+       __u64   sq_buf_addr;
 };
 
 struct mlx5_ib_create_qp_resp {
        __u32   uuar_index;
 };
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+                                   struct mlx5_ib_create_qp *ucmd,
+                                   int inlen,
+                                   u32 *user_index)
+{
+       u8 cqe_version = ucontext->cqe_version;
+
+       if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+
+       if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+                                    struct mlx5_ib_create_srq *ucmd,
+                                    int inlen,
+                                    u32 *user_index)
+{
+       u8 cqe_version = ucontext->cqe_version;
+
+       if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+
+       if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
 #endif /* MLX5_IB_USER_H */
index 40ba8333815571ff2b8c5e24e173713f3a16e66e..a6531ffe29a6f7afebd52ae41223f7fd7d74d131 100644 (file)
@@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
                        entry->opcode    = IB_WC_FETCH_ADD;
                        entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
                        break;
-               case MTHCA_OPCODE_BIND_MW:
-                       entry->opcode    = IB_WC_BIND_MW;
-                       break;
                default:
                        entry->opcode    = MTHCA_OPCODE_INVALID;
                        break;
index dc2d48c59e6274c54e84af0d26b4da1b56271b13..9866c35cc977d5036b8b7c6b1126a3508eef9212 100644 (file)
@@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
        return &mr->ibmr;
 }
 
-static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
-                                      struct ib_phys_buf *buffer_list,
-                                      int                 num_phys_buf,
-                                      int                 acc,
-                                      u64                *iova_start)
-{
-       struct mthca_mr *mr;
-       u64 *page_list;
-       u64 total_size;
-       unsigned long mask;
-       int shift;
-       int npages;
-       int err;
-       int i, j, n;
-
-       mask = buffer_list[0].addr ^ *iova_start;
-       total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0)
-                       mask |= buffer_list[i].addr;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-
-               total_size += buffer_list[i].size;
-       }
-
-       if (mask & ~PAGE_MASK)
-               return ERR_PTR(-EINVAL);
-
-       shift = __ffs(mask | 1 << 31);
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
-       buffer_list[0].addr &= ~0ull << shift;
-
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(-ENOMEM);
-
-       npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-
-       if (!npages)
-               return &mr->ibmr;
-
-       page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
-       if (!page_list) {
-               kfree(mr);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-                    ++j)
-                       page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
-
-       mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
-                 "in PD %x; shift %d, npages %d.\n",
-                 (unsigned long long) buffer_list[0].addr,
-                 (unsigned long long) *iova_start,
-                 to_mpd(pd)->pd_num,
-                 shift, npages);
-
-       err = mthca_mr_alloc_phys(to_mdev(pd->device),
-                                 to_mpd(pd)->pd_num,
-                                 page_list, shift, npages,
-                                 *iova_start, total_size,
-                                 convert_access(acc), mr);
-
-       if (err) {
-               kfree(page_list);
-               kfree(mr);
-               return ERR_PTR(err);
-       }
-
-       kfree(page_list);
-       mr->umem = NULL;
-
-       return &mr->ibmr;
-}
-
 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                       u64 virt, int acc, struct ib_udata *udata)
 {
@@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev)
        dev->ib_dev.destroy_cq           = mthca_destroy_cq;
        dev->ib_dev.poll_cq              = mthca_poll_cq;
        dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
-       dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
        dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
        dev->ib_dev.dereg_mr             = mthca_dereg_mr;
        dev->ib_dev.get_port_immutable   = mthca_port_immutable;
index 35fe506e2cfa892259a4975919b07355be3df345..96e5fb91fb48e7c48d802e8b30e7e5af20c6a630 100644 (file)
@@ -1485,7 +1485,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        u16 pkey;
 
        ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
-                         mthca_ah_grh_present(to_mah(wr->ah)), 0,
+                         mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
                          &sqp->ud_header);
 
        err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
index 8a3ad170d790cc336c08a527db314d859beefd6c..cb9f0f27308de0dd439844cc06742a74caa52899 100644 (file)
@@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
 /* External CM API Interface */
 /* instance of function pointers for client API */
 /* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static struct nes_cm_ops nes_cm_api = {
+static const struct nes_cm_ops nes_cm_api = {
        mini_cm_accelerated,
        mini_cm_listen,
        mini_cm_del_listen,
@@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        int passive_state;
        struct nes_ib_device *nesibdev;
        struct ib_mr *ibmr = NULL;
-       struct ib_phys_buf ibphysbuf;
        struct nes_pd *nespd;
        u64 tagged_offset;
        u8 mpa_frame_offset = 0;
@@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                u64temp = (unsigned long)nesqp;
                nesibdev = nesvnic->nesibdev;
                nespd = nesqp->nespd;
-               ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
-               ibphysbuf.size = buff_len;
                tagged_offset = (u64)(unsigned long)*start_buff;
-               ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
-                                                  &ibphysbuf, 1,
-                                                  IB_ACCESS_LOCAL_WRITE,
-                                                  &tagged_offset);
-               if (!ibmr) {
+               ibmr = nes_reg_phys_mr(&nespd->ibpd,
+                               nesqp->ietf_frame_pbase + mpa_frame_offset,
+                               buff_len, IB_ACCESS_LOCAL_WRITE,
+                               &tagged_offset);
+               if (IS_ERR(ibmr)) {
                        nes_debug(NES_DBG_CM, "Unable to register memory region"
                                  "for lSMM for cm_node = %p \n",
                                  cm_node);
                        pci_free_consistent(nesdev->pcidev,
                                            nesqp->private_data_len + nesqp->ietf_frame_size,
                                            nesqp->ietf_frame, nesqp->ietf_frame_pbase);
-                       return -ENOMEM;
+                       return PTR_ERR(ibmr);
                }
 
                ibmr->pd = &nespd->ibpd;
index 32a6420c29400184ea8f392c9e7004eb51611b67..147c2c884227d82ad3f40cec561a91abee71f80a 100644 (file)
@@ -423,7 +423,7 @@ struct nes_cm_core {
 
        struct timer_list       tcp_timer;
 
-       struct nes_cm_ops       *api;
+       const struct nes_cm_ops *api;
 
        int (*post_event)(struct nes_cm_event *event);
        atomic_t                events_posted;
index 2042c0f2975942a3284bce33eda298cfc32ca11d..6d3a169c049b315d764dc1eec9dd47436c53b1a4 100644 (file)
@@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
        if (action == NES_ARP_DELETE) {
                nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
                nesadapter->arp_table[arp_index].ip_addr = 0;
-               memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+               eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
                nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
                return arp_index;
        }
index 137880a19ebe4827bc7f8b419ba6faf9dc50de41..8c4daf7f22ec14fc5a56b13f5ddeca9c09a142e7 100644 (file)
@@ -206,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
 }
 
 
-/**
- * nes_bind_mw
- */
-static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
-               struct ib_mw_bind *ibmw_bind)
-{
-       u64 u64temp;
-       struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
-       struct nes_device *nesdev = nesvnic->nesdev;
-       /* struct nes_mr *nesmr = to_nesmw(ibmw); */
-       struct nes_qp *nesqp = to_nesqp(ibqp);
-       struct nes_hw_qp_wqe *wqe;
-       unsigned long flags = 0;
-       u32 head;
-       u32 wqe_misc = 0;
-       u32 qsize;
-
-       if (nesqp->ibqp_state > IB_QPS_RTS)
-               return -EINVAL;
-
-       spin_lock_irqsave(&nesqp->lock, flags);
-
-       head = nesqp->hwqp.sq_head;
-       qsize = nesqp->hwqp.sq_tail;
-
-       /* Check for SQ overflow */
-       if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
-               spin_unlock_irqrestore(&nesqp->lock, flags);
-               return -ENOMEM;
-       }
-
-       wqe = &nesqp->hwqp.sq_vbase[head];
-       /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
-       nes_fill_init_qp_wqe(wqe, nesqp, head);
-       u64temp = ibmw_bind->wr_id;
-       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
-       wqe_misc = NES_IWARP_SQ_OP_BIND;
-
-       wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
-       if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
-               wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
-       if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
-               wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
-       if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
-               wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
-                           ibmw_bind->bind_info.mr->lkey);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
-                       ibmw_bind->bind_info.length);
-       wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
-       u64temp = (u64)ibmw_bind->bind_info.addr;
-       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
-
-       head++;
-       if (head >= qsize)
-               head = 0;
-
-       nesqp->hwqp.sq_head = head;
-       barrier();
-
-       nes_write32(nesdev->regs+NES_WQE_ALLOC,
-                       (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
-       spin_unlock_irqrestore(&nesqp->lock, flags);
-
-       return 0;
-}
-
-
 /*
  * nes_alloc_fast_mr
  */
@@ -2074,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
 /**
  * nes_reg_phys_mr
  */
-static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
-               struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
-               u64 * iova_start)
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+               int acc, u64 *iova_start)
 {
        u64 region_length;
        struct nes_pd *nespd = to_nespd(ib_pd);
@@ -2088,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
        struct nes_vpbl vpbl;
        struct nes_root_vpbl root_vpbl;
        u32 stag;
-       u32 i;
        unsigned long mask;
        u32 stag_index = 0;
        u32 next_stag_index = 0;
        u32 driver_key = 0;
-       u32 root_pbl_index = 0;
-       u32 cur_pbl_index = 0;
        int err = 0;
        int ret = 0;
        u16 pbl_count = 0;
@@ -2113,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
 
        next_stag_index >>= 8;
        next_stag_index %= nesadapter->max_mr;
-       if (num_phys_buf > (1024*512)) {
-               return ERR_PTR(-E2BIG);
-       }
 
-       if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+       if ((addr ^ *iova_start) & ~PAGE_MASK)
                return ERR_PTR(-EINVAL);
 
        err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
@@ -2132,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                return ERR_PTR(-ENOMEM);
        }
 
-       for (i = 0; i < num_phys_buf; i++) {
+       /* Allocate a 4K buffer for the PBL */
+       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+                       &vpbl.pbl_pbase);
+       nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+                       vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+       if (!vpbl.pbl_vbase) {
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               ibmr = ERR_PTR(-ENOMEM);
+               kfree(nesmr);
+               goto reg_phys_err;
+       }
 
-               if ((i & 0x01FF) == 0) {
-                       if (root_pbl_index == 1) {
-                               /* Allocate the root PBL */
-                               root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
-                                               &root_vpbl.pbl_pbase);
-                               nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
-                                               root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
-                               if (!root_vpbl.pbl_vbase) {
-                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                       vpbl.pbl_pbase);
-                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                       kfree(nesmr);
-                                       return ERR_PTR(-ENOMEM);
-                               }
-                               root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
-                               if (!root_vpbl.leaf_vpbl) {
-                                       pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
-                                                       root_vpbl.pbl_pbase);
-                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                       vpbl.pbl_pbase);
-                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                       kfree(nesmr);
-                                       return ERR_PTR(-ENOMEM);
-                               }
-                               root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
-                               root_vpbl.pbl_vbase[0].pa_high =
-                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
-                               root_vpbl.leaf_vpbl[0] = vpbl;
-                       }
-                       /* Allocate a 4K buffer for the PBL */
-                       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-                                       &vpbl.pbl_pbase);
-                       nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
-                                       vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
-                       if (!vpbl.pbl_vbase) {
-                               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                               ibmr = ERR_PTR(-ENOMEM);
-                               kfree(nesmr);
-                               goto reg_phys_err;
-                       }
-                       /* Fill in the root table */
-                       if (1 <= root_pbl_index) {
-                               root_vpbl.pbl_vbase[root_pbl_index].pa_low =
-                                               cpu_to_le32((u32)vpbl.pbl_pbase);
-                               root_vpbl.pbl_vbase[root_pbl_index].pa_high =
-                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
-                               root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
-                       }
-                       root_pbl_index++;
-                       cur_pbl_index = 0;
-               }
 
-               mask = !buffer_list[i].size;
-               if (i != 0)
-                       mask |= buffer_list[i].addr;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-
-               if (mask & ~PAGE_MASK) {
-                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                       nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
-                       ibmr = ERR_PTR(-EINVAL);
-                       kfree(nesmr);
-                       goto reg_phys_err;
-               }
+       mask = !size;
 
-               region_length += buffer_list[i].size;
-               if ((i != 0) && (single_page)) {
-                       if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
-                               single_page = 0;
-               }
-               vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
-               vpbl.pbl_vbase[cur_pbl_index++].pa_high =
-                               cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+       if (mask & ~PAGE_MASK) {
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
+               ibmr = ERR_PTR(-EINVAL);
+               kfree(nesmr);
+               goto reg_phys_err;
        }
 
+       region_length += size;
+       vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
+       vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
+
        stag = stag_index << 8;
        stag |= driver_key;
        stag += (u32)stag_key;
@@ -2219,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                        stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
 
        /* Make the leaf PBL the root if only one PBL */
-       if (root_pbl_index == 1) {
-               root_vpbl.pbl_pbase = vpbl.pbl_pbase;
-       }
+       root_vpbl.pbl_pbase = vpbl.pbl_pbase;
 
        if (single_page) {
                pbl_count = 0;
        } else {
-               pbl_count = root_pbl_index;
+               pbl_count = 1;
        }
        ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
-                       buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+                       addr, pbl_count, 1, acc, iova_start,
                        &nesmr->pbls_used, &nesmr->pbl_4k);
 
        if (ret == 0) {
@@ -2242,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                ibmr = ERR_PTR(-ENOMEM);
        }
 
-       reg_phys_err:
-       /* free the resources */
-       if (root_pbl_index == 1) {
-               /* single PBL case */
-               pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
-       } else {
-               for (i=0; i<root_pbl_index; i++) {
-                       pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
-                                       root_vpbl.leaf_vpbl[i].pbl_pbase);
-               }
-               kfree(root_vpbl.leaf_vpbl);
-               pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
-                               root_vpbl.pbl_pbase);
-       }
-
+reg_phys_err:
+       /* single PBL case */
+       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
        return ibmr;
 }
 
@@ -2266,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
  */
 static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
 {
-       struct ib_phys_buf bl;
        u64 kva = 0;
 
        nes_debug(NES_DBG_MR, "\n");
 
-       bl.size = (u64)0xffffffffffULL;
-       bl.addr = 0;
-       return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+       return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
 }
 
-
 /**
  * nes_reg_user_mr
  */
@@ -3888,12 +3738,10 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
        nesibdev->ibdev.destroy_cq = nes_destroy_cq;
        nesibdev->ibdev.poll_cq = nes_poll_cq;
        nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
-       nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
        nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
        nesibdev->ibdev.dereg_mr = nes_dereg_mr;
        nesibdev->ibdev.alloc_mw = nes_alloc_mw;
        nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
-       nesibdev->ibdev.bind_mw = nes_bind_mw;
 
        nesibdev->ibdev.alloc_mr = nes_alloc_mr;
        nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
index a204b677af22a8d00fba4b5b65bc0620d4cb7a1c..70290883d06769f3eee19ed4b36750808c2fa3d8 100644 (file)
@@ -190,4 +190,8 @@ struct nes_qp {
        u8                    pau_state;
        __u64                 nesuqp_addr;
 };
+
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+               u64 addr, u64 size, int acc, u64 *iova_start);
+
 #endif                 /* NES_VERBS_H */
index 040bb8b5cb15a65c34b15ab67c3c5fe14e27524c..12503f15fbd6b29830dee04d81f8fd4082319be2 100644 (file)
@@ -323,9 +323,6 @@ struct ocrdma_cq {
                         */
        u32 max_hw_cqe;
        bool phase_change;
-       bool deferred_arm, deferred_sol;
-       bool first_arm;
-
        spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
                                                   * to cq polling
                                                   */
index 9820074be59d73bd1aac4b56d4a38254d2f9b7bb..3790771f2baad2bae17c52298e9b293eee3e7bad 100644 (file)
@@ -152,9 +152,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        if ((pd->uctx) &&
            (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
            (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
-               status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
-                                                   attr->dmac, &vlan_tag,
-                                                   sgid_attr.ndev->ifindex);
+               status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
+                                                     attr->dmac, &vlan_tag,
+                                                     &sgid_attr.ndev->ifindex,
+                                                     NULL);
                if (status) {
                        pr_err("%s(): Failed to resolve dmac from gid." 
                                "status = %d\n", __func__, status);
index 3afb40b85159bd2c10559443536f83af5e4fa9ca..f38743018cb454bca0e42440ebd27071e16a1ffe 100644 (file)
@@ -175,7 +175,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        dev->ibdev.req_notify_cq = ocrdma_arm_cq;
 
        dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
-       dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
        dev->ibdev.dereg_mr = ocrdma_dereg_mr;
        dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
 
@@ -229,6 +228,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
 
        ocrdma_alloc_pd_pool(dev);
 
+       if (!ocrdma_alloc_stats_resources(dev)) {
+               pr_err("%s: stats resource allocation failed\n", __func__);
+               goto alloc_err;
+       }
+
        spin_lock_init(&dev->av_tbl.lock);
        spin_lock_init(&dev->flush_q_lock);
        return 0;
@@ -239,6 +243,7 @@ alloc_err:
 
 static void ocrdma_free_resources(struct ocrdma_dev *dev)
 {
+       ocrdma_release_stats_resources(dev);
        kfree(dev->stag_arr);
        kfree(dev->qp_tbl);
        kfree(dev->cq_tbl);
index 86c303a620c1660625ebb94f1b54e578856b26ff..255f774080a4aae08952df0374cdcb5a4063c8d1 100644 (file)
@@ -64,10 +64,11 @@ static int ocrdma_add_stat(char *start, char *pcur,
        return cpy_len;
 }
 
-static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev)
 {
        struct stats_mem *mem = &dev->stats_mem;
 
+       mutex_init(&dev->stats_lock);
        /* Alloc mbox command mem*/
        mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
                        sizeof(struct ocrdma_rdma_stats_resp));
@@ -91,13 +92,14 @@ static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
        return true;
 }
 
-static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+void ocrdma_release_stats_resources(struct ocrdma_dev *dev)
 {
        struct stats_mem *mem = &dev->stats_mem;
 
        if (mem->va)
                dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
                                  mem->va, mem->pa);
+       mem->va = NULL;
        kfree(mem->debugfs_mem);
 }
 
@@ -838,15 +840,9 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
                                &dev->reset_stats, &ocrdma_dbg_ops))
                goto err;
 
-       /* Now create dma_mem for stats mbx command */
-       if (!ocrdma_alloc_stats_mem(dev))
-               goto err;
-
-       mutex_init(&dev->stats_lock);
 
        return;
 err:
-       ocrdma_release_stats_mem(dev);
        debugfs_remove_recursive(dev->dir);
        dev->dir = NULL;
 }
@@ -855,9 +851,7 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
 {
        if (!dev->dir)
                return;
-       debugfs_remove(dev->dir);
-       mutex_destroy(&dev->stats_lock);
-       ocrdma_release_stats_mem(dev);
+       debugfs_remove_recursive(dev->dir);
 }
 
 void ocrdma_init_debugfs(void)
index c9e58d04c7b8d15c15c5d8ae4205c87cc3be521c..bba1fec4f11f265bd88f376dde4c9474634063e6 100644 (file)
@@ -65,6 +65,8 @@ enum OCRDMA_STATS_TYPE {
 
 void ocrdma_rem_debugfs(void);
 void ocrdma_init_debugfs(void);
+bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev);
+void ocrdma_release_stats_resources(struct ocrdma_dev *dev);
 void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
 void ocrdma_add_port_stats(struct ocrdma_dev *dev);
 int ocrdma_pma_counters(struct ocrdma_dev *dev,
index 76e96f97b3f6459e68d13c444be6e1beb6b578a9..12420e4ecf3da09d552ac117adcd85049ccd891b 100644 (file)
@@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
                                        IB_DEVICE_SYS_IMAGE_GUID |
                                        IB_DEVICE_LOCAL_DMA_LKEY |
                                        IB_DEVICE_MEM_MGT_EXTENSIONS;
-       attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
-       attr->max_sge_rd = 0;
+       attr->max_sge = dev->attr.max_send_sge;
+       attr->max_sge_rd = attr->max_sge;
        attr->max_cq = dev->attr.max_cq;
        attr->max_cqe = dev->attr.max_cqe;
        attr->max_mr = dev->attr.max_mr;
@@ -1094,7 +1094,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
        spin_lock_init(&cq->comp_handler_lock);
        INIT_LIST_HEAD(&cq->sq_head);
        INIT_LIST_HEAD(&cq->rq_head);
-       cq->first_arm = true;
 
        if (ib_ctx) {
                uctx = get_ocrdma_ucontext(ib_ctx);
@@ -2726,8 +2725,7 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
                OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
        ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
                                                OCRDMA_CQE_SRCQP_MASK;
-       ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
-                                               OCRDMA_CQE_PKEY_MASK;
+       ibwc->pkey_index = 0;
        ibwc->wc_flags = IB_WC_GRH;
        ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
                                        OCRDMA_CQE_UD_XFER_LEN_SHIFT);
@@ -2911,12 +2909,9 @@ expand_cqe:
        }
 stop_cqe:
        cq->getp = cur_getp;
-       if (cq->deferred_arm || polled_hw_cqes) {
-               ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm,
-                                 cq->deferred_sol, polled_hw_cqes);
-               cq->deferred_arm = false;
-               cq->deferred_sol = false;
-       }
+
+       if (polled_hw_cqes)
+               ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
 
        return i;
 }
@@ -3000,13 +2995,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
        if (cq_flags & IB_CQ_SOLICITED)
                sol_needed = true;
 
-       if (cq->first_arm) {
-               ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
-               cq->first_arm = false;
-       }
-
-       cq->deferred_arm = true;
-       cq->deferred_sol = sol_needed;
+       ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
        spin_unlock_irqrestore(&cq->cq_lock, flags);
 
        return 0;
@@ -3066,169 +3055,6 @@ pl_err:
        return ERR_PTR(-ENOMEM);
 }
 
-#define MAX_KERNEL_PBE_SIZE 65536
-static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
-                                   int buf_cnt, u32 *pbe_size)
-{
-       u64 total_size = 0;
-       u64 buf_size = 0;
-       int i;
-       *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
-       *pbe_size = roundup_pow_of_two(*pbe_size);
-
-       /* find the smallest PBE size that we can have */
-       for (i = 0; i < buf_cnt; i++) {
-               /* first addr may not be page aligned, so ignore checking */
-               if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
-                                (buf_list[i].size & ~PAGE_MASK))) {
-                       return 0;
-               }
-
-               /* if configured PBE size is greater then the chosen one,
-                * reduce the PBE size.
-                */
-               buf_size = roundup(buf_list[i].size, PAGE_SIZE);
-               /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
-               buf_size = roundup_pow_of_two(buf_size);
-               if (*pbe_size > buf_size)
-                       *pbe_size = buf_size;
-
-               total_size += buf_size;
-       }
-       *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
-           (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
-
-       /* num_pbes = total_size / (*pbe_size);  this is implemented below. */
-
-       return total_size >> ilog2(*pbe_size);
-}
-
-static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
-                             u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
-                             struct ocrdma_hw_mr *hwmr)
-{
-       int i;
-       int idx;
-       int pbes_per_buf = 0;
-       u64 buf_addr = 0;
-       int num_pbes;
-       struct ocrdma_pbe *pbe;
-       int total_num_pbes = 0;
-
-       if (!hwmr->num_pbes)
-               return;
-
-       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-       num_pbes = 0;
-
-       /* go through the OS phy regions & fill hw pbe entries into pbls. */
-       for (i = 0; i < ib_buf_cnt; i++) {
-               buf_addr = buf_list[i].addr;
-               pbes_per_buf =
-                   roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
-                   pbe_size;
-               hwmr->len += buf_list[i].size;
-               /* number of pbes can be more for one OS buf, when
-                * buffers are of different sizes.
-                * split the ib_buf to one or more pbes.
-                */
-               for (idx = 0; idx < pbes_per_buf; idx++) {
-                       /* we program always page aligned addresses,
-                        * first unaligned address is taken care by fbo.
-                        */
-                       if (i == 0) {
-                               /* for non zero fbo, assign the
-                                * start of the page.
-                                */
-                               pbe->pa_lo =
-                                   cpu_to_le32((u32) (buf_addr & PAGE_MASK));
-                               pbe->pa_hi =
-                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
-                       } else {
-                               pbe->pa_lo =
-                                   cpu_to_le32((u32) (buf_addr & 0xffffffff));
-                               pbe->pa_hi =
-                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
-                       }
-                       buf_addr += pbe_size;
-                       num_pbes += 1;
-                       total_num_pbes += 1;
-                       pbe++;
-
-                       if (total_num_pbes == hwmr->num_pbes)
-                               goto mr_tbl_done;
-                       /* if the pbl is full storing the pbes,
-                        * move to next pbl.
-                        */
-                       if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
-                               pbl_tbl++;
-                               pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-                               num_pbes = 0;
-                       }
-               }
-       }
-mr_tbl_done:
-       return;
-}
-
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
-                                  struct ib_phys_buf *buf_list,
-                                  int buf_cnt, int acc, u64 *iova_start)
-{
-       int status = -ENOMEM;
-       struct ocrdma_mr *mr;
-       struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
-       u32 num_pbes;
-       u32 pbe_size = 0;
-
-       if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
-               return ERR_PTR(-EINVAL);
-
-       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(status);
-
-       num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
-       if (num_pbes == 0) {
-               status = -EINVAL;
-               goto pbl_err;
-       }
-       status = ocrdma_get_pbl_info(dev, mr, num_pbes);
-       if (status)
-               goto pbl_err;
-
-       mr->hwmr.pbe_size = pbe_size;
-       mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
-       mr->hwmr.va = *iova_start;
-       mr->hwmr.local_rd = 1;
-       mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
-       mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
-       mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
-       mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
-       mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
-
-       status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
-       if (status)
-               goto pbl_err;
-       build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
-                         &mr->hwmr);
-       status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
-       if (status)
-               goto mbx_err;
-
-       mr->ibmr.lkey = mr->hwmr.lkey;
-       if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
-               mr->ibmr.rkey = mr->hwmr.lkey;
-       return &mr->ibmr;
-
-mbx_err:
-       ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
-pbl_err:
-       kfree(mr);
-       return ERR_PTR(status);
-}
-
 static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
 {
        struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
index a2f3b4dc20b0363d0a09769aaee2d69dd957ae33..8b517fd3677924099a1d08663035de8f19a09d79 100644 (file)
@@ -117,9 +117,6 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
 
 int ocrdma_dereg_mr(struct ib_mr *);
 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
-                                  struct ib_phys_buf *buffer_list,
-                                  int num_phys_buf, int acc, u64 *iova_start);
 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
                                 u64 virt, int acc, struct ib_udata *);
 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
index 13ef22bd9459200309f2ab9fccb248eccbef9027..fcdf37913a264d52b5204f771523b37a2b0c4985 100644 (file)
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
 {
        int error;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        *dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(*dentry))
                error = qibfs_mknod(d_inode(parent), *dentry,
                                    mode, fops, data);
        else
                error = PTR_ERR(*dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        return error;
 }
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
        int ret, i;
 
        root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        snprintf(unit, sizeof(unit), "%u", dd->unit);
        dir = lookup_one_len(unit, root, strlen(unit));
 
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
                goto bail;
        }
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        remove_file(dir, "counters");
        remove_file(dir, "counter_names");
        remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
                }
        }
        remove_file(dir, "flash");
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = simple_rmdir(d_inode(root), dir);
        d_delete(dir);
        dput(dir);
 
 bail:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        dput(root);
        return ret;
 }
index 294f5c706be972b4b6563e37a32064fade665be2..5f53304e8a9b5a4c71448ba02865d912915d13a1 100644 (file)
@@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
        rval = init_qib_mregion(&mr->mr, pd, count);
        if (rval)
                goto bail;
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
+
        rval = qib_alloc_lkey(&mr->mr, 0);
        if (rval)
                goto bail_mregion;
@@ -170,52 +167,6 @@ bail:
        goto done;
 }
 
-/**
- * qib_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
-                             struct ib_phys_buf *buffer_list,
-                             int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct qib_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, pd);
-       if (IS_ERR(mr)) {
-               ret = (struct ib_mr *)mr;
-               goto bail;
-       }
-
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.access_flags = acc;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == QIB_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
 /**
  * qib_reg_user_mr - register a userspace memory region
  * @pd: protection domain for this memory region
index 40f85bb3e0d3bdce5289a5c8c9c2418df33e4ca4..3eff35c2d453f7b11f9475f5120cb4df26f94dda 100644 (file)
@@ -100,9 +100,10 @@ static u32 credit_table[31] = {
        32768                   /* 1E */
 };
 
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+                        gfp_t gfp)
 {
-       unsigned long page = get_zeroed_page(GFP_KERNEL);
+       unsigned long page = get_zeroed_page(gfp);
 
        /*
         * Free the page if someone raced with us installing it.
@@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  */
 static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-                    enum ib_qp_type type, u8 port)
+                    enum ib_qp_type type, u8 port, gfp_t gfp)
 {
        u32 i, offset, max_scan, qpn;
        struct qpn_map *map;
@@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
        max_scan = qpt->nmaps - !offset;
        for (i = 0;;) {
                if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
+                       get_map_page(qpt, map, gfp);
                        if (unlikely(!map->page))
                                break;
                }
@@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
        size_t sz;
        size_t sg_list_sz;
        struct ib_qp *ret;
+       gfp_t gfp;
+
 
        if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
            init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
-           init_attr->create_flags) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
+           init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+               return ERR_PTR(-EINVAL);
+
+       /* GFP_NOIO is applicable in RC QPs only */
+       if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+           init_attr->qp_type != IB_QPT_RC)
+               return ERR_PTR(-EINVAL);
+
+       gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+                       GFP_NOIO : GFP_KERNEL;
 
        /* Check receive queue parameters if no SRQ is specified. */
        if (!init_attr->srq) {
@@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                sz = sizeof(struct qib_sge) *
                        init_attr->cap.max_send_sge +
                        sizeof(struct qib_swqe);
-               swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+               swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
+                               gfp, PAGE_KERNEL);
                if (swq == NULL) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail;
@@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                } else if (init_attr->cap.max_recv_sge > 1)
                        sg_list_sz = sizeof(*qp->r_sg_list) *
                                (init_attr->cap.max_recv_sge - 1);
-               qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+               qp = kzalloc(sz + sg_list_sz, gfp);
                if (!qp) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_swq;
                }
                RCU_INIT_POINTER(qp->next, NULL);
-               qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+               qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
                if (!qp->s_hdr) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_qp;
@@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                        qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
                        sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
                                sizeof(struct qib_rwqe);
-                       qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
-                                                  qp->r_rq.size * sz);
+                       if (gfp != GFP_NOIO)
+                               qp->r_rq.wq = vmalloc_user(
+                                               sizeof(struct qib_rwq) +
+                                               qp->r_rq.size * sz);
+                       else
+                               qp->r_rq.wq = __vmalloc(
+                                               sizeof(struct qib_rwq) +
+                                               qp->r_rq.size * sz,
+                                               gfp, PAGE_KERNEL);
+
                        if (!qp->r_rq.wq) {
                                ret = ERR_PTR(-ENOMEM);
                                goto bail_qp;
@@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                dev = to_idev(ibpd->device);
                dd = dd_from_dev(dev);
                err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
-                               init_attr->port_num);
+                               init_attr->port_num, gfp);
                if (err < 0) {
                        ret = ERR_PTR(err);
                        vfree(qp->r_rq.wq);
index de6cb6fcda8df3d5a1060aceecf20c832ba81ace..baf1e42b6896cbd0efcf29e0d52aeed3c20e1e21 100644 (file)
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
        unsigned long flags;
        struct qib_lkey_table *rkt;
        struct qib_pd *pd;
+       int avoid_schedule = 0;
 
        spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -438,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
            qp->ibqp.qp_type == IB_QPT_RC) {
                if (wqe->length > 0x80000000U)
                        goto bail_inval_free;
+               if (wqe->length <= qp->pmtu)
+                       avoid_schedule = 1;
        } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
-                                 qp->port_num - 1)->ibmtu)
+                                 qp->port_num - 1)->ibmtu) {
                goto bail_inval_free;
-       else
+       } else {
                atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
+               avoid_schedule = 1;
+       }
        wqe->ssn = qp->s_ssn++;
        qp->s_head = next;
 
@@ -458,7 +463,7 @@ bail_inval_free:
 bail_inval:
        ret = -EINVAL;
 bail:
-       if (!ret && !wr->next &&
+       if (!ret && !wr->next && !avoid_schedule &&
         !qib_sdma_empty(
           dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
                qib_schedule_send(qp);
@@ -2256,7 +2261,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
        ibdev->poll_cq = qib_poll_cq;
        ibdev->req_notify_cq = qib_req_notify_cq;
        ibdev->get_dma_mr = qib_get_dma_mr;
-       ibdev->reg_phys_mr = qib_reg_phys_mr;
        ibdev->reg_user_mr = qib_reg_user_mr;
        ibdev->dereg_mr = qib_dereg_mr;
        ibdev->alloc_mr = qib_alloc_mr;
index bc803f33d5f6883200ca21e462febebb94393cd4..6c5e77753d858da2d9adb0ddfc1cd753b1e3fb07 100644 (file)
@@ -1032,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 
 struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
 
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
-                             struct ib_phys_buf *buffer_list,
-                             int num_phys_buf, int acc, u64 *iova_start);
-
 struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                              u64 virt_addr, int mr_access_flags,
                              struct ib_udata *udata);
index f8ea069a3eafca4078ab70848c3804867609143c..b2fb5286dbd98250534964887a8c2c46678cd725 100644 (file)
@@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        struct qib_ibdev *dev = to_idev(ibqp->device);
        struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
        struct qib_mcast *mcast = NULL;
-       struct qib_mcast_qp *p, *tmp;
+       struct qib_mcast_qp *p, *tmp, *delp = NULL;
        struct rb_node *n;
        int last = 0;
        int ret;
 
-       if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-               ret = -EINVAL;
-               goto bail;
-       }
+       if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+               return -EINVAL;
 
        spin_lock_irq(&ibp->lock);
 
@@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        while (1) {
                if (n == NULL) {
                        spin_unlock_irq(&ibp->lock);
-                       ret = -EINVAL;
-                       goto bail;
+                       return -EINVAL;
                }
 
                mcast = rb_entry(n, struct qib_mcast, rb_node);
@@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                 */
                list_del_rcu(&p->list);
                mcast->n_attached--;
+               delp = p;
 
                /* If this was the last attached QP, remove the GID too. */
                if (list_empty(&mcast->qp_list)) {
@@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        }
 
        spin_unlock_irq(&ibp->lock);
+       /* QP not attached */
+       if (!delp)
+               return -EINVAL;
+       /*
+        * Wait for any list walkers to finish before freeing the
+        * list element.
+        */
+       wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+       qib_mcast_qp_free(delp);
 
-       if (p) {
-               /*
-                * Wait for any list walkers to finish before freeing the
-                * list element.
-                */
-               wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-               qib_mcast_qp_free(p);
-       }
        if (last) {
                atomic_dec(&mcast->refcount);
                wait_event(mcast->wait, !atomic_read(&mcast->refcount));
@@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                dev->n_mcast_grps_allocated--;
                spin_unlock_irq(&dev->n_mcast_grps_lock);
        }
-
-       ret = 0;
-
-bail:
-       return ret;
+       return 0;
 }
 
 int qib_mcast_tree_empty(struct qib_ibport *ibp)
index 5e55b8bc6fe402af423118c1454b5bc67d21d8ba..92dc66cc2d50a06ea69fb259f713784a26868e2b 100644 (file)
@@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
                                                        qp_flow,
                                                        &flowinfo_ops);
        if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
-               usnic_err("Failed to create dbg fs entry for flow %u\n",
-                               qp_flow->flow->flow_id);
+               usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+                               qp_flow->flow->flow_id,
+                               PTR_ERR(qp_flow->dbgfs_dentry));
        }
 }
 
index fcea3a24d3eb310ef0ee3c573a3ef9e161c8e809..5f44b66ccb86481774733960b3577e83fcdfabc0 100644 (file)
@@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
 
        if (!status) {
                qp_grp->state = new_state;
-               usnic_info("Transistioned %u from %s to %s",
+               usnic_info("Transitioned %u from %s to %s",
                qp_grp->grp_id,
                usnic_ib_qp_grp_state_to_string(old_state),
                usnic_ib_qp_grp_state_to_string(new_state));
@@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic,
        return res_chunk_list;
 
 out_free_res:
-       for (i--; i > 0; i--)
+       for (i--; i >= 0; i--)
                usnic_vnic_put_resources(res_chunk_list[i]);
        kfree(res_chunk_list);
        return ERR_PTR(err);
index f8e3211689a3453164c9044f3bef4601053c1dba..6cdb4d23f78f99c0282be7d202083f5aa63091e7 100644 (file)
@@ -51,7 +51,7 @@
 
 static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
 {
-       *fw_ver = (u64) *fw_ver_str;
+       *fw_ver = *((u64 *)fw_ver_str);
 }
 
 static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
@@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
        qp_grp = to_uqp_grp(ibqp);
 
-       /* TODO: Future Support All States */
        mutex_lock(&qp_grp->vf->pf->usdev_lock);
-       if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
-       } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
-       } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+       if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+               /* usnic devices only have one port */
+               status = -EINVAL;
+               goto out_unlock;
+       }
+       if (attr_mask & IB_QP_STATE) {
+               status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
        } else {
-               usnic_err("Unexpected combination mask: %u state: %u\n",
-                               attr_mask & IB_QP_STATE, attr->qp_state);
+               usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
                status = -EINVAL;
        }
 
+out_unlock:
        mutex_unlock(&qp_grp->vf->pf->usdev_lock);
        return status;
 }
@@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
                        virt_addr, length);
 
        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (IS_ERR_OR_NULL(mr))
-               return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
 
        mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
                                        access_flags, 0);
index 414eaa566bd94e5f05a796a5416ef8f76e7939f6..0d9d2e6a14d5e73137e351036486dbfa2b21af60 100644 (file)
@@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev,
                          struct ib_udata *uhw);
 int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
                                struct ib_port_attr *props);
-enum rdma_protocol_type
-usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
 int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
                                int qp_attr_mask,
                                struct ib_qp_init_attr *qp_init_attr);
index 66de93fb8ea934c15051f7b33fa7808306f19f05..8875107186902fe8092b24d15a2372aedb6592c1 100644 (file)
@@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
        struct usnic_vnic_res *res;
        int i;
 
-       if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+       if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
                return ERR_PTR(-EINVAL);
 
        ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
@@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
                return ERR_PTR(-ENOMEM);
        }
 
-       ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
-       if (!ret->res) {
-               usnic_err("Failed to allocate resources for %s. Out of memory\n",
-                               usnic_vnic_pci_name(vnic));
-               kfree(ret);
-               return ERR_PTR(-ENOMEM);
-       }
+       if (cnt > 0) {
+               ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+               if (!ret->res) {
+                       usnic_err("Failed to allocate resources for %s. Out of memory\n",
+                                       usnic_vnic_pci_name(vnic));
+                       kfree(ret);
+                       return ERR_PTR(-ENOMEM);
+               }
 
-       spin_lock(&vnic->res_lock);
-       src = &vnic->chunks[type];
-       for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
-               res = src->res[i];
-               if (!res->owner) {
-                       src->free_cnt--;
-                       res->owner = owner;
-                       ret->res[ret->cnt++] = res;
+               spin_lock(&vnic->res_lock);
+               src = &vnic->chunks[type];
+               for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+                       res = src->res[i];
+                       if (!res->owner) {
+                               src->free_cnt--;
+                               res->owner = owner;
+                               ret->res[ret->cnt++] = res;
+                       }
                }
-       }
 
-       spin_unlock(&vnic->res_lock);
+               spin_unlock(&vnic->res_lock);
+       }
        ret->type = type;
        ret->vnic = vnic;
        WARN_ON(ret->cnt != cnt);
@@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
        int i;
        struct usnic_vnic *vnic = chunk->vnic;
 
-       spin_lock(&vnic->res_lock);
-       while ((i = --chunk->cnt) >= 0) {
-               res = chunk->res[i];
-               chunk->res[i] = NULL;
-               res->owner = NULL;
-               vnic->chunks[res->type].free_cnt++;
+       if (chunk->cnt > 0) {
+               spin_lock(&vnic->res_lock);
+               while ((i = --chunk->cnt) >= 0) {
+                       res = chunk->res[i];
+                       chunk->res[i] = NULL;
+                       res->owner = NULL;
+                       vnic->chunks[res->type].free_cnt++;
+               }
+               spin_unlock(&vnic->res_lock);
        }
-       spin_unlock(&vnic->res_lock);
 
        kfree(chunk->res);
        kfree(chunk);
index 3ede103097547d4355646d322b5570ebb421d2ec..a6f3eab0f350ef036cfb44a47fbbafe5f9bd1fc1 100644 (file)
@@ -495,7 +495,6 @@ void ipoib_dev_cleanup(struct net_device *dev);
 void ipoib_mcast_join_task(struct work_struct *work);
 void ipoib_mcast_carrier_on_task(struct work_struct *work);
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
-void ipoib_mcast_free(struct ipoib_mcast *mc);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
@@ -549,8 +548,9 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
                       union ib_gid *mgid, int set_qkey);
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
+void ipoib_mcast_remove_list(struct list_head *remove_list);
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+                               struct list_head *remove_list);
 
 int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
index 3ae9726efb9837512a62214705bf5e8e9561a02c..917e46ea3bf681681a4abba5e9ce6e86514d07eb 100644 (file)
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
 #define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
 
 static struct ib_send_wr ipoib_cm_rx_drain_wr = {
-       .wr_id = IPOIB_CM_RX_DRAIN_WRID,
        .opcode = IB_WR_SEND,
 };
 
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
         * error" WC will be immediately generated for each WR we post.
         */
        p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+       ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
        if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
                ipoib_warn(priv, "failed to post drain wr\n");
 
@@ -1522,8 +1522,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 int ipoib_cm_dev_init(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       int i, ret;
-       struct ib_device_attr attr;
+       int max_srq_sge, i;
 
        INIT_LIST_HEAD(&priv->cm.passive_ids);
        INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1540,19 +1539,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
 
        skb_queue_head_init(&priv->cm.skb_queue);
 
-       ret = ib_query_device(priv->ca, &attr);
-       if (ret) {
-               printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
-               return ret;
-       }
-
-       ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+       ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
 
-       attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
-       ipoib_cm_create_srq(dev, attr.max_srq_sge);
+       max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
+       ipoib_cm_create_srq(dev, max_srq_sge);
        if (ipoib_cm_has_srq(dev)) {
-               priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
-               priv->cm.num_frags  = attr.max_srq_sge;
+               priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
+               priv->cm.num_frags  = max_srq_sge;
                ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
                          priv->cm.max_cm_mtu, priv->cm.num_frags);
        } else {
index 078cadd6c797afeb0e22267fb76e5362a4b97326..a53fa5fc0dec7d65900cce1133b2d4e2326183cc 100644 (file)
@@ -40,15 +40,11 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
                              struct ethtool_drvinfo *drvinfo)
 {
        struct ipoib_dev_priv *priv = netdev_priv(netdev);
-       struct ib_device_attr *attr;
-
-       attr = kmalloc(sizeof(*attr), GFP_KERNEL);
-       if (attr && !ib_query_device(priv->ca, attr))
-               snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-                        "%d.%d.%d", (int)(attr->fw_ver >> 32),
-                        (int)(attr->fw_ver >> 16) & 0xffff,
-                        (int)attr->fw_ver & 0xffff);
-       kfree(attr);
+
+       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
+                (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
+                (int)priv->ca->attrs.fw_ver & 0xffff);
 
        strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
                sizeof(drvinfo->bus_info));
index 5ea0c14070d1f2d8af36a05c15206c63f406b97d..fa9c42ff1fb00963a47a71868ff96abb6e15189b 100644 (file)
@@ -245,8 +245,6 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        skb_reset_mac_header(skb);
        skb_pull(skb, IPOIB_ENCAP_LEN);
 
-       skb->truesize = SKB_TRUESIZE(skb->len);
-
        ++dev->stats.rx_packets;
        dev->stats.rx_bytes += skb->len;
 
index 7d3281866ffcd520d4bca543a9ad0544f2ec159f..25509bbd4a050a076dba5d8bd88b718fa1a72778 100644 (file)
@@ -1150,8 +1150,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
        unsigned long flags;
        int i;
        LIST_HEAD(remove_list);
-       struct ipoib_mcast *mcast, *tmcast;
-       struct net_device *dev = priv->dev;
 
        if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
                return;
@@ -1179,18 +1177,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
                                                          lockdep_is_held(&priv->lock))) != NULL) {
                        /* was the neigh idle for two GC periods */
                        if (time_after(neigh_obsolete, neigh->alive)) {
-                               u8 *mgid = neigh->daddr + 4;
 
-                               /* Is this multicast ? */
-                               if (*mgid == 0xff) {
-                                       mcast = __ipoib_mcast_find(dev, mgid);
-
-                                       if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
-                                               list_del(&mcast->list);
-                                               rb_erase(&mcast->rb_node, &priv->multicast_tree);
-                                               list_add_tail(&mcast->list, &remove_list);
-                                       }
-                               }
+                               ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
 
                                rcu_assign_pointer(*np,
                                                   rcu_dereference_protected(neigh->hnext,
@@ -1207,10 +1195,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 
 out_unlock:
        spin_unlock_irqrestore(&priv->lock, flags);
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 }
 
 static void ipoib_reap_neigh(struct work_struct *work)
@@ -1777,26 +1762,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
 
 int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 {
-       struct ib_device_attr *device_attr;
-       int result = -ENOMEM;
-
-       device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
-       if (!device_attr) {
-               printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
-                      hca->name, sizeof *device_attr);
-               return result;
-       }
-
-       result = ib_query_device(hca, device_attr);
-       if (result) {
-               printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
-                      hca->name, result);
-               kfree(device_attr);
-               return result;
-       }
-       priv->hca_caps = device_attr->device_cap_flags;
-
-       kfree(device_attr);
+       priv->hca_caps = hca->attrs.device_cap_flags;
 
        if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
                priv->dev->hw_features = NETIF_F_SG |
index f357ca67a41cd859b2ff5e704621f72813b91fa8..25889311b1e9c8db412f784a77de5aa8ef0a29cb 100644 (file)
@@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
                queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 }
 
-void ipoib_mcast_free(struct ipoib_mcast *mcast)
+static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 {
        struct net_device *dev = mcast->dev;
        int tx_dropped = 0;
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
        return mcast;
 }
 
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct rb_node *n = priv->multicast_tree.rb_node;
@@ -456,7 +456,10 @@ out_locked:
        return status;
 }
 
-static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
+/*
+ * Caller must hold 'priv->lock'
+ */
+static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_sa_multicast *multicast;
@@ -466,6 +469,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
        ib_sa_comp_mask comp_mask;
        int ret = 0;
 
+       if (!priv->broadcast ||
+           !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               return -EINVAL;
+
        ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
 
        rec.mgid     = mcast->mcmember.mgid;
@@ -525,20 +532,23 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
                        rec.join_state = 4;
 #endif
        }
+       spin_unlock_irq(&priv->lock);
 
        multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
                                         &rec, comp_mask, GFP_KERNEL,
                                         ipoib_mcast_join_complete, mcast);
+       spin_lock_irq(&priv->lock);
        if (IS_ERR(multicast)) {
                ret = PTR_ERR(multicast);
                ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-               spin_lock_irq(&priv->lock);
                /* Requeue this join task with a backoff delay */
                __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
                clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
                spin_unlock_irq(&priv->lock);
                complete(&mcast->done);
+               spin_lock_irq(&priv->lock);
        }
+       return 0;
 }
 
 void ipoib_mcast_join_task(struct work_struct *work)
@@ -620,9 +630,10 @@ void ipoib_mcast_join_task(struct work_struct *work)
                                /* Found the next unjoined group */
                                init_completion(&mcast->done);
                                set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-                               spin_unlock_irq(&priv->lock);
-                               ipoib_mcast_join(dev, mcast);
-                               spin_lock_irq(&priv->lock);
+                               if (ipoib_mcast_join(dev, mcast)) {
+                                       spin_unlock_irq(&priv->lock);
+                                       return;
+                               }
                        } else if (!delay_until ||
                                 time_before(mcast->delay_until, delay_until))
                                delay_until = mcast->delay_until;
@@ -641,10 +652,9 @@ out:
        if (mcast) {
                init_completion(&mcast->done);
                set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+               ipoib_mcast_join(dev, mcast);
        }
        spin_unlock_irq(&priv->lock);
-       if (mcast)
-               ipoib_mcast_join(dev, mcast);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)
@@ -677,7 +687,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
        return 0;
 }
 
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret = 0;
@@ -704,6 +714,35 @@ int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
        return 0;
 }
 
+/*
+ * Check if the multicast group is sendonly. If so remove it from the maps
+ * and add to the remove list
+ */
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+                               struct list_head *remove_list)
+{
+       /* Is this multicast ? */
+       if (*mgid == 0xff) {
+               struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
+
+               if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+                       list_del(&mcast->list);
+                       rb_erase(&mcast->rb_node, &priv->multicast_tree);
+                       list_add_tail(&mcast->list, remove_list);
+               }
+       }
+}
+
+void ipoib_mcast_remove_list(struct list_head *remove_list)
+{
+       struct ipoib_mcast *mcast, *tmcast;
+
+       list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
+               ipoib_mcast_leave(mcast->dev, mcast);
+               ipoib_mcast_free(mcast);
+       }
+}
+
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -810,10 +849,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
                if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 }
 
 static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -939,10 +975,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
                if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(mcast->dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 
        /*
         * Double check that we are still up
index 9080161e01af1614afa5796a134d7fd890a1be45..c827c93f46c547ce7ec810a2636723a7f3b582a8 100644 (file)
@@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 
                ib_conn = &iser_conn->ib_conn;
                if (ib_conn->pi_support) {
-                       u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+                       u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
 
                        scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
                        scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
@@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
                 * max fastreg page list length.
                 */
                shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
-                       ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+                       ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
                shost->max_sectors = min_t(unsigned int,
                        1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
 
@@ -1059,7 +1059,8 @@ static int __init iser_init(void)
        release_wq = alloc_workqueue("release workqueue", 0, 0);
        if (!release_wq) {
                iser_err("failed to allocate release workqueue\n");
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto err_alloc_wq;
        }
 
        iscsi_iser_scsi_transport = iscsi_register_transport(
@@ -1067,12 +1068,14 @@ static int __init iser_init(void)
        if (!iscsi_iser_scsi_transport) {
                iser_err("iscsi_register_transport failed\n");
                err = -EINVAL;
-               goto register_transport_failure;
+               goto err_reg;
        }
 
        return 0;
 
-register_transport_failure:
+err_reg:
+       destroy_workqueue(release_wq);
+err_alloc_wq:
        kmem_cache_destroy(ig.desc_cache);
 
        return err;
index 8a5998e6a407997906e45864ac9799fdac468084..95f0a64e076b9addc5b01fc17e268f211c9e6f04 100644 (file)
@@ -48,6 +48,7 @@
 #include <scsi/scsi_transport_iscsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
+#include <scsi/iser.h>
 
 #include <linux/interrupt.h>
 #include <linux/wait.h>
                                         - ISER_MAX_RX_MISC_PDUS) /     \
                                         (1 + ISER_INFLIGHT_DATAOUTS))
 
-#define ISER_WC_BATCH_COUNT   16
 #define ISER_SIGNAL_CMD_COUNT 32
 
-#define ISER_VER                       0x10
-#define ISER_WSV                       0x08
-#define ISER_RSV                       0x04
-
-#define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
-#define ISER_BEACON_WRID               0xfffffffffffffffeULL
-
-/**
- * struct iser_hdr - iSER header
- *
- * @flags:        flags support (zbva, remote_inv)
- * @rsvd:         reserved
- * @write_stag:   write rkey
- * @write_va:     write virtual address
- * @reaf_stag:    read rkey
- * @read_va:      read virtual address
- */
-struct iser_hdr {
-       u8      flags;
-       u8      rsvd[3];
-       __be32  write_stag;
-       __be64  write_va;
-       __be32  read_stag;
-       __be64  read_va;
-} __attribute__((packed));
-
-
-#define ISER_ZBVA_NOT_SUPPORTED                0x80
-#define ISER_SEND_W_INV_NOT_SUPPORTED  0x40
-
-struct iser_cm_hdr {
-       u8      flags;
-       u8      rsvd[3];
-} __packed;
-
 /* Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN  (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+#define ISER_HEADERS_LEN       (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
 
 #define ISER_RECV_DATA_SEG_LEN 128
 #define ISER_RX_PAYLOAD_SIZE   (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
@@ -269,7 +234,7 @@ enum iser_desc_type {
 #define ISER_MAX_WRS 7
 
 /**
- * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ * struct iser_tx_desc - iSER TX descriptor
  *
  * @iser_header:   iser header
  * @iscsi_header:  iscsi header
@@ -287,12 +252,13 @@ enum iser_desc_type {
  * @sig_attrs:     Signature attributes
  */
 struct iser_tx_desc {
-       struct iser_hdr              iser_header;
+       struct iser_ctrl             iser_header;
        struct iscsi_hdr             iscsi_header;
        enum   iser_desc_type        type;
        u64                          dma_addr;
        struct ib_sge                tx_sg[2];
        int                          num_sge;
+       struct ib_cqe                cqe;
        bool                         mapped;
        u8                           wr_idx;
        union iser_wr {
@@ -306,9 +272,10 @@ struct iser_tx_desc {
 };
 
 #define ISER_RX_PAD_SIZE       (256 - (ISER_RX_PAYLOAD_SIZE + \
-                                       sizeof(u64) + sizeof(struct ib_sge)))
+                                sizeof(u64) + sizeof(struct ib_sge) + \
+                                sizeof(struct ib_cqe)))
 /**
- * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ * struct iser_rx_desc - iSER RX descriptor
  *
  * @iser_header:   iser header
  * @iscsi_header:  iscsi header
@@ -318,12 +285,32 @@ struct iser_tx_desc {
  * @pad:           for sense data TODO: Modify to maximum sense length supported
  */
 struct iser_rx_desc {
-       struct iser_hdr              iser_header;
+       struct iser_ctrl             iser_header;
        struct iscsi_hdr             iscsi_header;
        char                         data[ISER_RECV_DATA_SEG_LEN];
        u64                          dma_addr;
        struct ib_sge                rx_sg;
+       struct ib_cqe                cqe;
        char                         pad[ISER_RX_PAD_SIZE];
+} __packed;
+
+/**
+ * struct iser_login_desc - iSER login descriptor
+ *
+ * @req:           pointer to login request buffer
+ * @resp:          pointer to login response buffer
+ * @req_dma:       DMA address of login request buffer
+ * @rsp_dma:      DMA address of login response buffer
+ * @sge:           IB sge for login post recv
+ * @cqe:           completion handler
+ */
+struct iser_login_desc {
+       void                         *req;
+       void                         *rsp;
+       u64                          req_dma;
+       u64                          rsp_dma;
+       struct ib_sge                sge;
+       struct ib_cqe                cqe;
 } __attribute__((packed));
 
 struct iser_conn;
@@ -333,18 +320,12 @@ struct iscsi_iser_task;
 /**
  * struct iser_comp - iSER completion context
  *
- * @device:     pointer to device handle
  * @cq:         completion queue
- * @wcs:        work completion array
- * @tasklet:    Tasklet handle
  * @active_qps: Number of active QPs attached
  *              to completion context
  */
 struct iser_comp {
-       struct iser_device      *device;
        struct ib_cq            *cq;
-       struct ib_wc             wcs[ISER_WC_BATCH_COUNT];
-       struct tasklet_struct    tasklet;
        int                      active_qps;
 };
 
@@ -380,7 +361,6 @@ struct iser_reg_ops {
  *
  * @ib_device:     RDMA device
  * @pd:            Protection Domain for this device
- * @dev_attr:      Device attributes container
  * @mr:            Global DMA memory region
  * @event_handler: IB events handle routine
  * @ig_list:      entry in devices list
@@ -389,18 +369,19 @@ struct iser_reg_ops {
  *                 cpus and device max completion vectors
  * @comps:         Dinamically allocated array of completion handlers
  * @reg_ops:       Registration ops
+ * @remote_inv_sup: Remote invalidate is supported on this device
  */
 struct iser_device {
        struct ib_device             *ib_device;
        struct ib_pd                 *pd;
-       struct ib_device_attr        dev_attr;
        struct ib_mr                 *mr;
        struct ib_event_handler      event_handler;
        struct list_head             ig_list;
        int                          refcount;
        int                          comps_used;
        struct iser_comp             *comps;
-       struct iser_reg_ops          *reg_ops;
+       const struct iser_reg_ops    *reg_ops;
+       bool                         remote_inv_sup;
 };
 
 #define ISER_CHECK_GUARD       0xc0
@@ -475,10 +456,11 @@ struct iser_fr_pool {
  * @rx_wr:               receive work request for batch posts
  * @device:              reference to iser device
  * @comp:                iser completion context
- * @pi_support:          Indicate device T10-PI support
- * @beacon:              beacon send wr to signal all flush errors were drained
- * @flush_comp:          completes when all connection completions consumed
  * @fr_pool:             connection fast registration poool
+ * @pi_support:          Indicate device T10-PI support
+ * @last:                last send wr to signal all flush errors were drained
+ * @last_cqe:            cqe handler for last wr
+ * @last_comp:           completes when all connection completions consumed
  */
 struct ib_conn {
        struct rdma_cm_id           *cma_id;
@@ -488,10 +470,12 @@ struct ib_conn {
        struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
        struct iser_device          *device;
        struct iser_comp            *comp;
-       bool                         pi_support;
-       struct ib_send_wr            beacon;
-       struct completion            flush_comp;
        struct iser_fr_pool          fr_pool;
+       bool                         pi_support;
+       struct ib_send_wr            last;
+       struct ib_cqe                last_cqe;
+       struct ib_cqe                reg_cqe;
+       struct completion            last_comp;
 };
 
 /**
@@ -514,11 +498,7 @@ struct ib_conn {
  * @up_completion:    connection establishment completed
  *                    (state is ISER_CONN_UP)
  * @conn_list:        entry in ig conn list
- * @login_buf:        login data buffer (stores login parameters)
- * @login_req_buf:    login request buffer
- * @login_req_dma:    login request buffer dma address
- * @login_resp_buf:   login response buffer
- * @login_resp_dma:   login response buffer dma address
+ * @login_desc:       login descriptor
  * @rx_desc_head:     head of rx_descs cyclic buffer
  * @rx_descs:         rx buffers array (cyclic buffer)
  * @num_rx_descs:     number of rx descriptors
@@ -541,15 +521,13 @@ struct iser_conn {
        struct completion            ib_completion;
        struct completion            up_completion;
        struct list_head             conn_list;
-
-       char                         *login_buf;
-       char                         *login_req_buf, *login_resp_buf;
-       u64                          login_req_dma, login_resp_dma;
+       struct iser_login_desc       login_desc;
        unsigned int                 rx_desc_head;
        struct iser_rx_desc          *rx_descs;
        u32                          num_rx_descs;
        unsigned short               scsi_sg_tablesize;
        unsigned int                 scsi_max_sectors;
+       bool                         snd_w_inv;
 };
 
 /**
@@ -579,9 +557,8 @@ struct iscsi_iser_task {
 
 struct iser_page_vec {
        u64 *pages;
-       int length;
-       int offset;
-       int data_size;
+       int npages;
+       struct ib_mr fake_mr;
 };
 
 /**
@@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
 
 void iser_release_work(struct work_struct *work);
 
-void iser_rcv_completion(struct iser_rx_desc *desc,
-                        unsigned long dto_xfer_len,
-                        struct ib_conn *ib_conn);
-
-void iser_snd_completion(struct iser_tx_desc *desc,
-                        struct ib_conn *ib_conn);
+void iser_err_comp(struct ib_wc *wc, const char *type);
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
 
 void iser_task_rdma_init(struct iscsi_iser_task *task);
 
@@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                     enum iser_data_dir cmd_dir);
 
 int iser_reg_rdma_mem(struct iscsi_iser_task *task,
-                     enum iser_data_dir dir);
+                     enum iser_data_dir dir,
+                     bool all_imm);
 void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
                         enum iser_data_dir dir);
 
@@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
        return cur_wr;
 }
 
+static inline struct iser_conn *
+to_iser_conn(struct ib_conn *ib_conn)
+{
+       return container_of(ib_conn, struct iser_conn, ib_conn);
+}
+
+static inline struct iser_rx_desc *
+iser_rx(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_rx_desc, cqe);
+}
+
+static inline struct iser_tx_desc *
+iser_tx(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_tx_desc, cqe);
+}
+
+static inline struct iser_login_desc *
+iser_login(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_login_desc, cqe);
+}
+
 #endif
index ffd00c42072959ed6747b5094e159cb34532675f..ed54b388e7adca899bfaec181c2f4c2ba75b1f47 100644 (file)
@@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_mem_reg *mem_reg;
        int err;
-       struct iser_hdr *hdr = &iser_task->desc.iser_header;
+       struct iser_ctrl *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
 
        err = iser_dma_map_task_data(iser_task,
@@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
                        return err;
        }
 
-       err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+       err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
        if (err) {
                iser_err("Failed to set up Data-IN RDMA\n");
                return err;
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_mem_reg *mem_reg;
        int err;
-       struct iser_hdr *hdr = &iser_task->desc.iser_header;
+       struct iser_ctrl *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
        struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
 
@@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                        return err;
        }
 
-       err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+       err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
+                               buf_out->data_len == imm_sz);
        if (err != 0) {
                iser_err("Failed to register write cmd RDMA mem\n");
                return err;
@@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn  *iser_conn,
        ib_dma_sync_single_for_cpu(device->ib_device,
                tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
-       memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
+       memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
        tx_desc->iser_header.flags = ISER_VER;
        tx_desc->num_sge = 1;
 }
@@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn        *iser_conn,
 static void iser_free_login_buf(struct iser_conn *iser_conn)
 {
        struct iser_device *device = iser_conn->ib_conn.device;
+       struct iser_login_desc *desc = &iser_conn->login_desc;
 
-       if (!iser_conn->login_buf)
+       if (!desc->req)
                return;
 
-       if (iser_conn->login_req_dma)
-               ib_dma_unmap_single(device->ib_device,
-                                   iser_conn->login_req_dma,
-                                   ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+       ib_dma_unmap_single(device->ib_device, desc->req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
 
-       if (iser_conn->login_resp_dma)
-               ib_dma_unmap_single(device->ib_device,
-                                   iser_conn->login_resp_dma,
-                                   ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+       ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
+                           ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
 
-       kfree(iser_conn->login_buf);
+       kfree(desc->req);
+       kfree(desc->rsp);
 
        /* make sure we never redo any unmapping */
-       iser_conn->login_req_dma = 0;
-       iser_conn->login_resp_dma = 0;
-       iser_conn->login_buf = NULL;
+       desc->req = NULL;
+       desc->rsp = NULL;
 }
 
 static int iser_alloc_login_buf(struct iser_conn *iser_conn)
 {
        struct iser_device *device = iser_conn->ib_conn.device;
-       int                     req_err, resp_err;
-
-       BUG_ON(device == NULL);
-
-       iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
-                                    ISER_RX_LOGIN_SIZE, GFP_KERNEL);
-       if (!iser_conn->login_buf)
-               goto out_err;
-
-       iser_conn->login_req_buf  = iser_conn->login_buf;
-       iser_conn->login_resp_buf = iser_conn->login_buf +
-                                               ISCSI_DEF_MAX_RECV_SEG_LEN;
-
-       iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
-                                                    iser_conn->login_req_buf,
-                                                    ISCSI_DEF_MAX_RECV_SEG_LEN,
-                                                    DMA_TO_DEVICE);
-
-       iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
-                                                     iser_conn->login_resp_buf,
-                                                     ISER_RX_LOGIN_SIZE,
-                                                     DMA_FROM_DEVICE);
-
-       req_err  = ib_dma_mapping_error(device->ib_device,
-                                       iser_conn->login_req_dma);
-       resp_err = ib_dma_mapping_error(device->ib_device,
-                                       iser_conn->login_resp_dma);
-
-       if (req_err || resp_err) {
-               if (req_err)
-                       iser_conn->login_req_dma = 0;
-               if (resp_err)
-                       iser_conn->login_resp_dma = 0;
-               goto free_login_buf;
-       }
+       struct iser_login_desc *desc = &iser_conn->login_desc;
+
+       desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+       if (!desc->req)
+               return -ENOMEM;
+
+       desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
+                                         ISCSI_DEF_MAX_RECV_SEG_LEN,
+                                         DMA_TO_DEVICE);
+       if (ib_dma_mapping_error(device->ib_device,
+                               desc->req_dma))
+               goto free_req;
+
+       desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+       if (!desc->rsp)
+               goto unmap_req;
+
+       desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
+                                          ISER_RX_LOGIN_SIZE,
+                                          DMA_FROM_DEVICE);
+       if (ib_dma_mapping_error(device->ib_device,
+                               desc->rsp_dma))
+               goto free_rsp;
+
        return 0;
 
-free_login_buf:
-       iser_free_login_buf(iser_conn);
+free_rsp:
+       kfree(desc->rsp);
+unmap_req:
+       ib_dma_unmap_single(device->ib_device, desc->req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN,
+                           DMA_TO_DEVICE);
+free_req:
+       kfree(desc->req);
 
-out_err:
-       iser_err("unable to alloc or map login buf\n");
        return -ENOMEM;
 }
 
@@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
                        goto rx_desc_dma_map_failed;
 
                rx_desc->dma_addr = dma_addr;
-
+               rx_desc->cqe.done = iser_task_rsp;
                rx_sg = &rx_desc->rx_sg;
-               rx_sg->addr   = rx_desc->dma_addr;
+               rx_sg->addr = rx_desc->dma_addr;
                rx_sg->length = ISER_RX_PAYLOAD_SIZE;
-               rx_sg->lkey   = device->pd->local_dma_lkey;
+               rx_sg->lkey = device->pd->local_dma_lkey;
        }
 
        iser_conn->rx_desc_head = 0;
@@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+       tx_desc->cqe.done = iser_cmd_comp;
        iser_create_send_desc(iser_conn, tx_desc);
 
        if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
        }
 
        tx_desc->type = ISCSI_TX_DATAOUT;
+       tx_desc->cqe.done = iser_dataout_comp;
        tx_desc->iser_header.flags = ISER_VER;
        memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
 
@@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        mdesc->type = ISCSI_TX_CONTROL;
+       mdesc->cqe.done = iser_ctrl_comp;
        iser_create_send_desc(iser_conn, mdesc);
 
        device = iser_conn->ib_conn.device;
@@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn,
        data_seg_len = ntoh24(task->hdr->dlength);
 
        if (data_seg_len > 0) {
+               struct iser_login_desc *desc = &iser_conn->login_desc;
                struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+
                if (task != conn->login_task) {
                        iser_err("data present on non login task!!!\n");
                        goto send_control_error;
                }
 
-               ib_dma_sync_single_for_cpu(device->ib_device,
-                       iser_conn->login_req_dma, task->data_count,
-                       DMA_TO_DEVICE);
+               ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
+                                          task->data_count, DMA_TO_DEVICE);
 
-               memcpy(iser_conn->login_req_buf, task->data, task->data_count);
+               memcpy(desc->req, task->data, task->data_count);
 
-               ib_dma_sync_single_for_device(device->ib_device,
-                       iser_conn->login_req_dma, task->data_count,
-                       DMA_TO_DEVICE);
+               ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
+                                             task->data_count, DMA_TO_DEVICE);
 
-               tx_dsg->addr    = iser_conn->login_req_dma;
-               tx_dsg->length  = task->data_count;
-               tx_dsg->lkey    = device->pd->local_dma_lkey;
+               tx_dsg->addr = desc->req_dma;
+               tx_dsg->length = task->data_count;
+               tx_dsg->lkey = device->pd->local_dma_lkey;
                mdesc->num_sge = 2;
        }
 
@@ -562,41 +556,126 @@ send_control_error:
        return err;
 }
 
-/**
- * iser_rcv_dto_completion - recv DTO completion
- */
-void iser_rcv_completion(struct iser_rx_desc *rx_desc,
-                        unsigned long rx_xfer_len,
-                        struct ib_conn *ib_conn)
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+       struct iser_login_desc *desc = iser_login(wc->wr_cqe);
        struct iscsi_hdr *hdr;
-       u64 rx_dma;
-       int rx_buflen, outstanding, count, err;
+       char *data;
+       int length;
 
-       /* differentiate between login to all other PDUs */
-       if ((char *)rx_desc == iser_conn->login_resp_buf) {
-               rx_dma = iser_conn->login_resp_dma;
-               rx_buflen = ISER_RX_LOGIN_SIZE;
-       } else {
-               rx_dma = rx_desc->dma_addr;
-               rx_buflen = ISER_RX_PAYLOAD_SIZE;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "login_rsp");
+               return;
+       }
+
+       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+                                  desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+                                  DMA_FROM_DEVICE);
+
+       hdr = desc->rsp + sizeof(struct iser_ctrl);
+       data = desc->rsp + ISER_HEADERS_LEN;
+       length = wc->byte_len - ISER_HEADERS_LEN;
+
+       iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+                hdr->itt, length);
+
+       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
+
+       ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+                                     desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+                                     DMA_FROM_DEVICE);
+
+       ib_conn->post_recv_buf_count--;
+}
+
+static inline void
+iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+{
+       if (likely(rkey == desc->rsc.mr->rkey))
+               desc->rsc.mr_valid = 0;
+       else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+               desc->pi_ctx->sig_mr_valid = 0;
+}
+
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+                     struct ib_wc *wc,
+                     struct iscsi_hdr *hdr)
+{
+       if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+               struct iscsi_task *task;
+               u32 rkey = wc->ex.invalidate_rkey;
+
+               iser_dbg("conn %p: remote invalidation for rkey %#x\n",
+                        iser_conn, rkey);
+
+               if (unlikely(!iser_conn->snd_w_inv)) {
+                       iser_err("conn %p: unexepected remote invalidation, "
+                                "terminating connection\n", iser_conn);
+                       return -EPROTO;
+               }
+
+               task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+               if (likely(task)) {
+                       struct iscsi_iser_task *iser_task = task->dd_data;
+                       struct iser_fr_desc *desc;
+
+                       if (iser_task->dir[ISER_DIR_IN]) {
+                               desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+                               iser_inv_desc(desc, rkey);
+                       }
+
+                       if (iser_task->dir[ISER_DIR_OUT]) {
+                               desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+                               iser_inv_desc(desc, rkey);
+                       }
+               } else {
+                       iser_err("failed to get task for itt=%d\n", hdr->itt);
+                       return -EINVAL;
+               }
        }
 
-       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
-                                  rx_buflen, DMA_FROM_DEVICE);
+       return 0;
+}
 
-       hdr = &rx_desc->iscsi_header;
+
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+       struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
+       struct iscsi_hdr *hdr;
+       int length;
+       int outstanding, count, err;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "task_rsp");
+               return;
+       }
+
+       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+                                  desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+                                  DMA_FROM_DEVICE);
+
+       hdr = &desc->iscsi_header;
+       length = wc->byte_len - ISER_HEADERS_LEN;
 
        iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
-                       hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
+                hdr->itt, length);
+
+       if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+               iscsi_conn_failure(iser_conn->iscsi_conn,
+                                  ISCSI_ERR_CONN_FAILED);
+               return;
+       }
 
-       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
-                       rx_xfer_len - ISER_HEADERS_LEN);
+       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
 
-       ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
-                                     rx_buflen, DMA_FROM_DEVICE);
+       ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+                                     desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+                                     DMA_FROM_DEVICE);
 
        /* decrementing conn->post_recv_buf_count only --after-- freeing the   *
         * task eliminates the need to worry on tasks which are completed in   *
@@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
         * for the posted rx bufs refcount to become zero handles everything   */
        ib_conn->post_recv_buf_count--;
 
-       if (rx_dma == iser_conn->login_resp_dma)
-               return;
-
        outstanding = ib_conn->post_recv_buf_count;
        if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
                count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
        }
 }
 
-void iser_snd_completion(struct iser_tx_desc *tx_desc,
-                       struct ib_conn *ib_conn)
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
 {
+       if (unlikely(wc->status != IB_WC_SUCCESS))
+               iser_err_comp(wc, "command");
+}
+
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
        struct iscsi_task *task;
-       struct iser_device *device = ib_conn->device;
 
-       if (tx_desc->type == ISCSI_TX_DATAOUT) {
-               ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
-                                       ISER_HEADERS_LEN, DMA_TO_DEVICE);
-               kmem_cache_free(ig.desc_cache, tx_desc);
-               tx_desc = NULL;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "control");
+               return;
        }
 
-       if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
-               /* this arithmetic is legal by libiscsi dd_data allocation */
-               task = (void *) ((long)(void *)tx_desc -
-                                 sizeof(struct iscsi_task));
-               if (task->hdr->itt == RESERVED_ITT)
-                       iscsi_put_task(task);
-       }
+       /* this arithmetic is legal by libiscsi dd_data allocation */
+       task = (void *)desc - sizeof(struct iscsi_task);
+       if (task->hdr->itt == RESERVED_ITT)
+               iscsi_put_task(task);
+}
+
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_device *device = ib_conn->device;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS))
+               iser_err_comp(wc, "dataout");
+
+       ib_dma_unmap_single(device->ib_device, desc->dma_addr,
+                           ISER_HEADERS_LEN, DMA_TO_DEVICE);
+       kmem_cache_free(ig.desc_cache, desc);
+}
+
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+
+       complete(&ib_conn->last_comp);
 }
 
 void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
index ea765fb9664d36759ff1a90d11272c1bc13ca457..9a391cc5b9b3f45f16f0a49138aeda5b96030b01 100644 (file)
@@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                     struct iser_reg_resources *rsc,
                     struct iser_mem_reg *mem_reg);
 
-static struct iser_reg_ops fastreg_ops = {
+static const struct iser_reg_ops fastreg_ops = {
        .alloc_reg_res  = iser_alloc_fastreg_pool,
        .free_reg_res   = iser_free_fastreg_pool,
        .reg_mem        = iser_fast_reg_mr,
@@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = {
        .reg_desc_put   = iser_reg_desc_put_fr,
 };
 
-static struct iser_reg_ops fmr_ops = {
+static const struct iser_reg_ops fmr_ops = {
        .alloc_reg_res  = iser_alloc_fmr_pool,
        .free_reg_res   = iser_free_fmr_pool,
        .reg_mem        = iser_fast_reg_fmr,
@@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = {
        .reg_desc_put   = iser_reg_desc_put_fmr,
 };
 
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       iser_err_comp(wc, "memreg");
+}
+
 int iser_assign_reg_ops(struct iser_device *device)
 {
-       struct ib_device_attr *dev_attr = &device->dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
 
        /* Assign function handles  - based on FMR support */
-       if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
-           device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+       if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
+           ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
                iser_info("FMR supported, using FMR for registration\n");
                device->reg_ops = &fmr_ops;
-       } else
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+       } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
                iser_info("FastReg supported, using FastReg for registration\n");
                device->reg_ops = &fastreg_ops;
+               device->remote_inv_sup = iser_always_reg;
        } else {
                iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
                return -1;
@@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
 {
 }
 
-#define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
-
-/**
- * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
- * and returns the length of resulting physical address array (may be less than
- * the original due to possible compaction).
- *
- * we build a "page vec" under the assumption that the SG meets the RDMA
- * alignment requirements. Other then the first and last SG elements, all
- * the "internal" elements can be compacted into a list whose elements are
- * dma addresses of physical pages. The code supports also the weird case
- * where --few fragments of the same page-- are present in the SG as
- * consecutive elements. Also, it handles one entry SG.
- */
-
-static int iser_sg_to_page_vec(struct iser_data_buf *data,
-                              struct ib_device *ibdev, u64 *pages,
-                              int *offset, int *data_size)
-{
-       struct scatterlist *sg, *sgl = data->sg;
-       u64 start_addr, end_addr, page, chunk_start = 0;
-       unsigned long total_sz = 0;
-       unsigned int dma_len;
-       int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
-
-       /* compute the offset of first element */
-       *offset = (u64) sgl[0].offset & ~MASK_4K;
-
-       new_chunk = 1;
-       cur_page  = 0;
-       for_each_sg(sgl, sg, data->dma_nents, i) {
-               start_addr = ib_sg_dma_address(ibdev, sg);
-               if (new_chunk)
-                       chunk_start = start_addr;
-               dma_len = ib_sg_dma_len(ibdev, sg);
-               end_addr = start_addr + dma_len;
-               total_sz += dma_len;
-
-               /* collect page fragments until aligned or end of SG list */
-               if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
-                       new_chunk = 0;
-                       continue;
-               }
-               new_chunk = 1;
-
-               /* address of the first page in the contiguous chunk;
-                  masking relevant for the very first SG entry,
-                  which might be unaligned */
-               page = chunk_start & MASK_4K;
-               do {
-                       pages[cur_page++] = page;
-                       page += SIZE_4K;
-               } while (page < end_addr);
-       }
-
-       *data_size = total_sz;
-       iser_dbg("page_vec->data_size:%d cur_page %d\n",
-                *data_size, cur_page);
-       return cur_page;
-}
-
 static void iser_data_buf_dump(struct iser_data_buf *data,
                               struct ib_device *ibdev)
 {
@@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
 {
        int i;
 
-       iser_err("page vec length %d data size %d\n",
-                page_vec->length, page_vec->data_size);
-       for (i = 0; i < page_vec->length; i++)
-               iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
+       iser_err("page vec npages %d data length %d\n",
+                page_vec->npages, page_vec->fake_mr.length);
+       for (i = 0; i < page_vec->npages; i++)
+               iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
 }
 
 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
@@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
        struct scatterlist *sg = mem->sg;
 
        reg->sge.lkey = device->pd->local_dma_lkey;
-       reg->rkey = device->mr->rkey;
+       /*
+        * FIXME: rework the registration code path to differentiate
+        * rkey/lkey use cases
+        */
+       reg->rkey = device->mr ? device->mr->rkey : 0;
        reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
        reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
 
@@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
        return 0;
 }
 
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
+static int iser_set_page(struct ib_mr *mr, u64 addr)
+{
+       struct iser_page_vec *page_vec =
+               container_of(mr, struct iser_page_vec, fake_mr);
+
+       page_vec->pages[page_vec->npages++] = addr;
+
+       return 0;
+}
+
 static
 int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
                      struct iser_data_buf *mem,
@@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
        struct ib_pool_fmr *fmr;
        int ret, plen;
 
-       plen = iser_sg_to_page_vec(mem, device->ib_device,
-                                  page_vec->pages,
-                                  &page_vec->offset,
-                                  &page_vec->data_size);
-       page_vec->length = plen;
-       if (plen * SIZE_4K < page_vec->data_size) {
+       page_vec->npages = 0;
+       page_vec->fake_mr.page_size = SIZE_4K;
+       plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
+                             mem->size, iser_set_page);
+       if (unlikely(plen < mem->size)) {
                iser_err("page vec too short to hold this SG\n");
                iser_data_buf_dump(mem, device->ib_device);
                iser_dump_page_vec(page_vec);
                return -EINVAL;
        }
 
-       fmr  = ib_fmr_pool_map_phys(fmr_pool,
-                                   page_vec->pages,
-                                   page_vec->length,
-                                   page_vec->pages[0]);
+       fmr  = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
+                                   page_vec->npages, page_vec->pages[0]);
        if (IS_ERR(fmr)) {
                ret = PTR_ERR(fmr);
                iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
@@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
 
        reg->sge.lkey = fmr->fmr->lkey;
        reg->rkey = fmr->fmr->rkey;
-       reg->sge.addr = page_vec->pages[0] + page_vec->offset;
-       reg->sge.length = page_vec->data_size;
+       reg->sge.addr = page_vec->fake_mr.iova;
+       reg->sge.length = page_vec->fake_mr.length;
        reg->mem_h = fmr;
 
        iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
@@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
                *mask |= ISER_CHECK_GUARD;
 }
 
-static void
-iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+static inline void
+iser_inv_rkey(struct ib_send_wr *inv_wr,
+             struct ib_mr *mr,
+             struct ib_cqe *cqe)
 {
-       u32 rkey;
-
        inv_wr->opcode = IB_WR_LOCAL_INV;
-       inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+       inv_wr->wr_cqe = cqe;
        inv_wr->ex.invalidate_rkey = mr->rkey;
        inv_wr->send_flags = 0;
        inv_wr->num_sge = 0;
-
-       rkey = ib_inc_rkey(mr->rkey);
-       ib_update_fast_reg_key(mr, rkey);
 }
 
 static int
@@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 {
        struct iser_tx_desc *tx_desc = &iser_task->desc;
        struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+       struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
        struct ib_sig_handover_wr *wr;
+       struct ib_mr *mr = pi_ctx->sig_mr;
        int ret;
 
        memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 
        iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
 
-       if (!pi_ctx->sig_mr_valid)
-               iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+       if (pi_ctx->sig_mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+       ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
        wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
        wr->wr.opcode = IB_WR_REG_SIG_MR;
-       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.wr_cqe = cqe;
        wr->wr.sg_list = &data_reg->sge;
        wr->wr.num_sge = 1;
        wr->wr.send_flags = 0;
        wr->sig_attrs = sig_attrs;
-       wr->sig_mr = pi_ctx->sig_mr;
+       wr->sig_mr = mr;
        if (scsi_prot_sg_count(iser_task->sc))
                wr->prot = &prot_reg->sge;
        else
@@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
        wr->access_flags = IB_ACCESS_LOCAL_WRITE |
                           IB_ACCESS_REMOTE_READ |
                           IB_ACCESS_REMOTE_WRITE;
-       pi_ctx->sig_mr_valid = 0;
+       pi_ctx->sig_mr_valid = 1;
 
-       sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
-       sig_reg->rkey = pi_ctx->sig_mr->rkey;
+       sig_reg->sge.lkey = mr->lkey;
+       sig_reg->rkey = mr->rkey;
        sig_reg->sge.addr = 0;
        sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
 
@@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                            struct iser_mem_reg *reg)
 {
        struct iser_tx_desc *tx_desc = &iser_task->desc;
+       struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
        struct ib_mr *mr = rsc->mr;
        struct ib_reg_wr *wr;
        int n;
 
-       if (!rsc->mr_valid)
-               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
+       if (rsc->mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+       ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
        n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
        if (unlikely(n != mem->size)) {
@@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 
        wr = reg_wr(iser_tx_next_wr(tx_desc));
        wr->wr.opcode = IB_WR_REG_MR;
-       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.wr_cqe = cqe;
        wr->wr.send_flags = 0;
        wr->wr.num_sge = 0;
        wr->mr = mr;
@@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                     IB_ACCESS_REMOTE_WRITE |
                     IB_ACCESS_REMOTE_READ;
 
-       rsc->mr_valid = 0;
+       rsc->mr_valid = 1;
 
        reg->sge.lkey = mr->lkey;
        reg->rkey = mr->rkey;
@@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task,
 }
 
 int iser_reg_rdma_mem(struct iscsi_iser_task *task,
-                     enum iser_data_dir dir)
+                     enum iser_data_dir dir,
+                     bool all_imm)
 {
        struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
@@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
        bool use_dma_key;
        int err;
 
-       use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
-                      scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+       use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
+                     scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
 
        if (!use_dma_key) {
                desc = device->reg_ops->reg_desc_get(ib_conn);
index 42f4da620f2e9f97062b9183b6ebf3ed54e56cfe..40c0f4978e2f00b5173001f54ec41a4bc651b839 100644 (file)
 #define ISER_MAX_CQ_LEN                (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
                                 ISCSI_ISER_MAX_CONN)
 
-static int iser_cq_poll_limit = 512;
-
-static void iser_cq_tasklet_fn(unsigned long data);
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-
-static void iser_cq_event_callback(struct ib_event *cause, void *context)
-{
-       iser_err("cq event %s (%d)\n",
-                ib_event_msg(cause->event), cause->event);
-}
-
 static void iser_qp_event_callback(struct ib_event *cause, void *context)
 {
        iser_err("qp event %s (%d)\n",
@@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler,
  */
 static int iser_create_device_ib_res(struct iser_device *device)
 {
-       struct ib_device_attr *dev_attr = &device->dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
        int ret, i, max_cqe;
 
-       ret = ib_query_device(device->ib_device, dev_attr);
-       if (ret) {
-               pr_warn("Query device failed for %s\n", device->ib_device->name);
-               return ret;
-       }
-
        ret = iser_assign_reg_ops(device);
        if (ret)
                return ret;
 
        device->comps_used = min_t(int, num_online_cpus(),
-                                device->ib_device->num_comp_vectors);
+                                ib_dev->num_comp_vectors);
 
        device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
                                GFP_KERNEL);
        if (!device->comps)
                goto comps_err;
 
-       max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+       max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
 
        iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
-                 device->comps_used, device->ib_device->name,
-                 device->ib_device->num_comp_vectors, max_cqe);
+                 device->comps_used, ib_dev->name,
+                 ib_dev->num_comp_vectors, max_cqe);
 
-       device->pd = ib_alloc_pd(device->ib_device);
+       device->pd = ib_alloc_pd(ib_dev);
        if (IS_ERR(device->pd))
                goto pd_err;
 
        for (i = 0; i < device->comps_used; i++) {
-               struct ib_cq_init_attr cq_attr = {};
                struct iser_comp *comp = &device->comps[i];
 
-               comp->device = device;
-               cq_attr.cqe = max_cqe;
-               cq_attr.comp_vector = i;
-               comp->cq = ib_create_cq(device->ib_device,
-                                       iser_cq_callback,
-                                       iser_cq_event_callback,
-                                       (void *)comp,
-                                       &cq_attr);
+               comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
+                                      IB_POLL_SOFTIRQ);
                if (IS_ERR(comp->cq)) {
                        comp->cq = NULL;
                        goto cq_err;
                }
-
-               if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
-                       goto cq_err;
-
-               tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
-                            (unsigned long)comp);
        }
 
        if (!iser_always_reg) {
@@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device)
 
                device->mr = ib_get_dma_mr(device->pd, access);
                if (IS_ERR(device->mr))
-                       goto dma_mr_err;
+                       goto cq_err;
        }
 
-       INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
-                               iser_event_handler);
+       INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
+                             iser_event_handler);
        if (ib_register_event_handler(&device->event_handler))
                goto handler_err;
 
@@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
 handler_err:
        if (device->mr)
                ib_dereg_mr(device->mr);
-dma_mr_err:
-       for (i = 0; i < device->comps_used; i++)
-               tasklet_kill(&device->comps[i].tasklet);
 cq_err:
        for (i = 0; i < device->comps_used; i++) {
                struct iser_comp *comp = &device->comps[i];
 
                if (comp->cq)
-                       ib_destroy_cq(comp->cq);
+                       ib_free_cq(comp->cq);
        }
        ib_dealloc_pd(device->pd);
 pd_err:
@@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
        for (i = 0; i < device->comps_used; i++) {
                struct iser_comp *comp = &device->comps[i];
 
-               tasklet_kill(&comp->tasklet);
-               ib_destroy_cq(comp->cq);
+               ib_free_cq(comp->cq);
                comp->cq = NULL;
        }
 
@@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device,
                iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
                return ret;
        }
-       res->mr_valid = 1;
+       res->mr_valid = 0;
 
        return 0;
 }
@@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
                ret = PTR_ERR(pi_ctx->sig_mr);
                goto sig_mr_failure;
        }
-       pi_ctx->sig_mr_valid = 1;
+       pi_ctx->sig_mr_valid = 0;
        desc->pi_ctx->sig_protected = 0;
 
        return 0;
@@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
  */
 static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 {
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
        struct iser_device      *device;
-       struct ib_device_attr *dev_attr;
+       struct ib_device        *ib_dev;
        struct ib_qp_init_attr  init_attr;
        int                     ret = -ENOMEM;
        int index, min_index = 0;
@@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
        BUG_ON(ib_conn->device == NULL);
 
        device = ib_conn->device;
-       dev_attr = &device->dev_attr;
+       ib_dev = device->ib_device;
 
        memset(&init_attr, 0, sizeof init_attr);
 
@@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
                iser_conn->max_cmds =
                        ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
        } else {
-               if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+               if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
                        init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
                        iser_conn->max_cmds =
                                ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
                } else {
-                       init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+                       init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
                        iser_conn->max_cmds =
-                               ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+                               ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
                        iser_dbg("device %s supports max_send_wr %d\n",
-                                device->ib_device->name, dev_attr->max_qp_wr);
+                                device->ib_device->name, ib_dev->attrs.max_qp_wr);
                }
        }
 
@@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
                                 iser_conn, err);
 
                /* post an indication that all flush errors were consumed */
-               err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+               err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
                if (err) {
-                       iser_err("conn %p failed to post beacon", ib_conn);
+                       iser_err("conn %p failed to post last wr", ib_conn);
                        return 1;
                }
 
-               wait_for_completion(&ib_conn->flush_comp);
+               wait_for_completion(&ib_conn->last_comp);
        }
 
        return 1;
@@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
 
        sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
        sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
-                                device->dev_attr.max_fast_reg_page_list_len);
+                                device->ib_device->attrs.max_fast_reg_page_list_len);
 
        if (sg_tablesize > sup_sg_tablesize) {
                sg_tablesize = sup_sg_tablesize;
@@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
 
        /* connection T10-PI support */
        if (iser_pi_enable) {
-               if (!(device->dev_attr.device_cap_flags &
+               if (!(device->ib_device->attrs.device_cap_flags &
                      IB_DEVICE_SIGNATURE_HANDOVER)) {
                        iser_warn("T10-PI requested but not supported on %s, "
                                  "continue without T10-PI\n",
@@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
                goto failure;
 
        memset(&conn_param, 0, sizeof conn_param);
-       conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
+       conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
        conn_param.initiator_depth     = 1;
        conn_param.retry_count         = 7;
        conn_param.rnr_retry_count     = 6;
 
        memset(&req_hdr, 0, sizeof(req_hdr));
-       req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
-                       ISER_SEND_W_INV_NOT_SUPPORTED);
-       conn_param.private_data         = (void *)&req_hdr;
-       conn_param.private_data_len     = sizeof(struct iser_cm_hdr);
+       req_hdr.flags = ISER_ZBVA_NOT_SUP;
+       if (!device->remote_inv_sup)
+               req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
+       conn_param.private_data = (void *)&req_hdr;
+       conn_param.private_data_len = sizeof(struct iser_cm_hdr);
 
        ret = rdma_connect(cma_id, &conn_param);
        if (ret) {
@@ -863,7 +829,8 @@ failure:
        iser_connect_error(cma_id);
 }
 
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+                                  const void *private_data)
 {
        struct iser_conn *iser_conn;
        struct ib_qp_attr attr;
@@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
        (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
        iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
+       if (private_data) {
+               u8 flags = *(u8 *)private_data;
+
+               iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+       }
+
+       iser_info("conn %p: negotiated %s invalidation\n",
+                 iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
        iser_conn->state = ISER_CONN_UP;
        complete(&iser_conn->up_completion);
 }
@@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
                iser_route_handler(cma_id);
                break;
        case RDMA_CM_EVENT_ESTABLISHED:
-               iser_connected_handler(cma_id);
+               iser_connected_handler(cma_id, event->param.conn.private_data);
                break;
        case RDMA_CM_EVENT_ADDR_ERROR:
        case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
 
 void iser_conn_init(struct iser_conn *iser_conn)
 {
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
+
        iser_conn->state = ISER_CONN_INIT;
-       iser_conn->ib_conn.post_recv_buf_count = 0;
-       init_completion(&iser_conn->ib_conn.flush_comp);
        init_completion(&iser_conn->stop_completion);
        init_completion(&iser_conn->ib_completion);
        init_completion(&iser_conn->up_completion);
        INIT_LIST_HEAD(&iser_conn->conn_list);
        mutex_init(&iser_conn->state_mutex);
+
+       ib_conn->post_recv_buf_count = 0;
+       ib_conn->reg_cqe.done = iser_reg_comp;
+       ib_conn->last_cqe.done = iser_last_comp;
+       ib_conn->last.wr_cqe = &ib_conn->last_cqe;
+       ib_conn->last.opcode = IB_WR_SEND;
+       init_completion(&ib_conn->last_comp);
 }
 
  /**
@@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn   *iser_conn,
 
        iser_conn->state = ISER_CONN_PENDING;
 
-       ib_conn->beacon.wr_id = ISER_BEACON_WRID;
-       ib_conn->beacon.opcode = IB_WR_SEND;
-
        ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
                                         (void *)iser_conn,
                                         RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1025,60 @@ connect_failure:
 
 int iser_post_recvl(struct iser_conn *iser_conn)
 {
-       struct ib_recv_wr rx_wr, *rx_wr_failed;
        struct ib_conn *ib_conn = &iser_conn->ib_conn;
-       struct ib_sge     sge;
+       struct iser_login_desc *desc = &iser_conn->login_desc;
+       struct ib_recv_wr wr, *wr_failed;
        int ib_ret;
 
-       sge.addr   = iser_conn->login_resp_dma;
-       sge.length = ISER_RX_LOGIN_SIZE;
-       sge.lkey   = ib_conn->device->pd->local_dma_lkey;
+       desc->sge.addr = desc->rsp_dma;
+       desc->sge.length = ISER_RX_LOGIN_SIZE;
+       desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
 
-       rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
-       rx_wr.sg_list = &sge;
-       rx_wr.num_sge = 1;
-       rx_wr.next    = NULL;
+       desc->cqe.done = iser_login_rsp;
+       wr.wr_cqe = &desc->cqe;
+       wr.sg_list = &desc->sge;
+       wr.num_sge = 1;
+       wr.next = NULL;
 
        ib_conn->post_recv_buf_count++;
-       ib_ret  = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
+       ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
        if (ib_ret) {
                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
                ib_conn->post_recv_buf_count--;
        }
+
        return ib_ret;
 }
 
 int iser_post_recvm(struct iser_conn *iser_conn, int count)
 {
-       struct ib_recv_wr *rx_wr, *rx_wr_failed;
-       int i, ib_ret;
        struct ib_conn *ib_conn = &iser_conn->ib_conn;
        unsigned int my_rx_head = iser_conn->rx_desc_head;
        struct iser_rx_desc *rx_desc;
+       struct ib_recv_wr *wr, *wr_failed;
+       int i, ib_ret;
 
-       for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
-               rx_desc         = &iser_conn->rx_descs[my_rx_head];
-               rx_wr->wr_id    = (uintptr_t)rx_desc;
-               rx_wr->sg_list  = &rx_desc->rx_sg;
-               rx_wr->num_sge  = 1;
-               rx_wr->next     = rx_wr + 1;
+       for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
+               rx_desc = &iser_conn->rx_descs[my_rx_head];
+               rx_desc->cqe.done = iser_task_rsp;
+               wr->wr_cqe = &rx_desc->cqe;
+               wr->sg_list = &rx_desc->rx_sg;
+               wr->num_sge = 1;
+               wr->next = wr + 1;
                my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
        }
 
-       rx_wr--;
-       rx_wr->next = NULL; /* mark end of work requests list */
+       wr--;
+       wr->next = NULL; /* mark end of work requests list */
 
        ib_conn->post_recv_buf_count += count;
-       ib_ret  = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+       ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
        if (ib_ret) {
                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
                ib_conn->post_recv_buf_count -= count;
        } else
                iser_conn->rx_desc_head = my_rx_head;
+
        return ib_ret;
 }
 
@@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
                                      DMA_TO_DEVICE);
 
        wr->next = NULL;
-       wr->wr_id = (uintptr_t)tx_desc;
+       wr->wr_cqe = &tx_desc->cqe;
        wr->sg_list = tx_desc->tx_sg;
        wr->num_sge = tx_desc->num_sge;
        wr->opcode = IB_WR_SEND;
@@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
        return ib_ret;
 }
 
-/**
- * is_iser_tx_desc - Indicate if the completion wr_id
- *     is a TX descriptor or not.
- * @iser_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
-{
-       void *start = iser_conn->rx_descs;
-       int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
-
-       if (wr_id >= start && wr_id < start + len)
-               return false;
-
-       return true;
-}
-
-/**
- * iser_handle_comp_error() - Handle error completion
- * @ib_conn:   connection RDMA resources
- * @wc:        work completion
- *
- * Notes: We may handle a FLUSH error completion and in this case
- *        we only cleanup in case TX type was DATAOUT. For non-FLUSH
- *        error completion we should also notify iscsi layer that
- *        connection is failed (in case we passed bind stage).
- */
-static void
-iser_handle_comp_error(struct ib_conn *ib_conn,
-                      struct ib_wc *wc)
-{
-       void *wr_id = (void *)(uintptr_t)wc->wr_id;
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
-
-       if (wc->status != IB_WC_WR_FLUSH_ERR)
-               if (iser_conn->iscsi_conn)
-                       iscsi_conn_failure(iser_conn->iscsi_conn,
-                                          ISCSI_ERR_CONN_FAILED);
-
-       if (wc->wr_id == ISER_FASTREG_LI_WRID)
-               return;
-
-       if (is_iser_tx_desc(iser_conn, wr_id)) {
-               struct iser_tx_desc *desc = wr_id;
-
-               if (desc->type == ISCSI_TX_DATAOUT)
-                       kmem_cache_free(ig.desc_cache, desc);
-       } else {
-               ib_conn->post_recv_buf_count--;
-       }
-}
-
-/**
- * iser_handle_wc - handle a single work completion
- * @wc: work completion
- *
- * Soft-IRQ context, work completion can be either
- * SEND or RECV, and can turn out successful or
- * with error (or flush error).
- */
-static void iser_handle_wc(struct ib_wc *wc)
-{
-       struct ib_conn *ib_conn;
-       struct iser_tx_desc *tx_desc;
-       struct iser_rx_desc *rx_desc;
-
-       ib_conn = wc->qp->qp_context;
-       if (likely(wc->status == IB_WC_SUCCESS)) {
-               if (wc->opcode == IB_WC_RECV) {
-                       rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
-                       iser_rcv_completion(rx_desc, wc->byte_len,
-                                           ib_conn);
-               } else
-               if (wc->opcode == IB_WC_SEND) {
-                       tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
-                       iser_snd_completion(tx_desc, ib_conn);
-               } else {
-                       iser_err("Unknown wc opcode %d\n", wc->opcode);
-               }
-       } else {
-               if (wc->status != IB_WC_WR_FLUSH_ERR)
-                       iser_err("%s (%d): wr id %llx vend_err %x\n",
-                                ib_wc_status_msg(wc->status), wc->status,
-                                wc->wr_id, wc->vendor_err);
-               else
-                       iser_dbg("%s (%d): wr id %llx\n",
-                                ib_wc_status_msg(wc->status), wc->status,
-                                wc->wr_id);
-
-               if (wc->wr_id == ISER_BEACON_WRID)
-                       /* all flush errors were consumed */
-                       complete(&ib_conn->flush_comp);
-               else
-                       iser_handle_comp_error(ib_conn, wc);
-       }
-}
-
-/**
- * iser_cq_tasklet_fn - iSER completion polling loop
- * @data: iSER completion context
- *
- * Soft-IRQ context, polling connection CQ until
- * either CQ was empty or we exausted polling budget
- */
-static void iser_cq_tasklet_fn(unsigned long data)
-{
-       struct iser_comp *comp = (struct iser_comp *)data;
-       struct ib_cq *cq = comp->cq;
-       struct ib_wc *const wcs = comp->wcs;
-       int i, n, completed = 0;
-
-       while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
-               for (i = 0; i < n; i++)
-                       iser_handle_wc(&wcs[i]);
-
-               completed += n;
-               if (completed >= iser_cq_poll_limit)
-                       break;
-       }
-
-       /*
-        * It is assumed here that arming CQ only once its empty
-        * would not cause interrupts to be missed.
-        */
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
-       iser_dbg("got %d completions\n", completed);
-}
-
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
-{
-       struct iser_comp *comp = cq_context;
-
-       tasklet_schedule(&comp->tasklet);
-}
-
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                             enum iser_data_dir cmd_dir, sector_t *sector)
 {
@@ -1319,3 +1160,21 @@ err:
        /* Not alot we can do here, return ambiguous guard error */
        return 0x1;
 }
+
+void iser_err_comp(struct ib_wc *wc, const char *type)
+{
+       if (wc->status != IB_WC_WR_FLUSH_ERR) {
+               struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
+
+               iser_err("%s failure: %s (%d) vend_err %x\n", type,
+                        ib_wc_status_msg(wc->status), wc->status,
+                        wc->vendor_err);
+
+               if (iser_conn->iscsi_conn)
+                       iscsi_conn_failure(iser_conn->iscsi_conn,
+                                          ISCSI_ERR_CONN_FAILED);
+       } else {
+               iser_dbg("%s failure: %s (%d)\n", type,
+                        ib_wc_status_msg(wc->status), wc->status);
+       }
+}
index 468c5e13256368c8b23b5ab25c903ead808f490f..f121e612933913dc9b24ccdbb41f63a5d8f09b84 100644 (file)
@@ -29,7 +29,6 @@
 #include <target/iscsi/iscsi_transport.h>
 #include <linux/semaphore.h>
 
-#include "isert_proto.h"
 #include "ib_isert.h"
 
 #define        ISERT_MAX_CONN          8
@@ -95,22 +94,6 @@ isert_qp_event_callback(struct ib_event *e, void *context)
        }
 }
 
-static int
-isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
-{
-       int ret;
-
-       ret = ib_query_device(ib_dev, devattr);
-       if (ret) {
-               isert_err("ib_query_device() failed: %d\n", ret);
-               return ret;
-       }
-       isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
-       isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
-
-       return 0;
-}
-
 static struct isert_comp *
 isert_comp_get(struct isert_conn *isert_conn)
 {
@@ -157,9 +140,9 @@ isert_create_qp(struct isert_conn *isert_conn,
        attr.recv_cq = comp->cq;
        attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
        attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
-       attr.cap.max_send_sge = device->dev_attr.max_sge;
-       isert_conn->max_sge = min(device->dev_attr.max_sge,
-                                 device->dev_attr.max_sge_rd);
+       attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+       isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
+                                 device->ib_device->attrs.max_sge_rd);
        attr.cap.max_recv_sge = 1;
        attr.sq_sig_type = IB_SIGNAL_REQ_WR;
        attr.qp_type = IB_QPT_RC;
@@ -287,8 +270,7 @@ isert_free_comps(struct isert_device *device)
 }
 
 static int
-isert_alloc_comps(struct isert_device *device,
-                 struct ib_device_attr *attr)
+isert_alloc_comps(struct isert_device *device)
 {
        int i, max_cqe, ret = 0;
 
@@ -308,7 +290,7 @@ isert_alloc_comps(struct isert_device *device,
                return -ENOMEM;
        }
 
-       max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+       max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
 
        for (i = 0; i < device->comps_used; i++) {
                struct ib_cq_init_attr cq_attr = {};
@@ -344,17 +326,15 @@ out_cq:
 static int
 isert_create_device_ib_res(struct isert_device *device)
 {
-       struct ib_device_attr *dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
        int ret;
 
-       dev_attr = &device->dev_attr;
-       ret = isert_query_device(device->ib_device, dev_attr);
-       if (ret)
-               goto out;
+       isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+       isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
 
        /* asign function handlers */
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
-           dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+       if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+           ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
                device->use_fastreg = 1;
                device->reg_rdma_mem = isert_reg_rdma;
                device->unreg_rdma_mem = isert_unreg_rdma;
@@ -364,11 +344,11 @@ isert_create_device_ib_res(struct isert_device *device)
                device->unreg_rdma_mem = isert_unmap_cmd;
        }
 
-       ret = isert_alloc_comps(device, dev_attr);
+       ret = isert_alloc_comps(device);
        if (ret)
                goto out;
 
-       device->pd = ib_alloc_pd(device->ib_device);
+       device->pd = ib_alloc_pd(ib_dev);
        if (IS_ERR(device->pd)) {
                ret = PTR_ERR(device->pd);
                isert_err("failed to allocate pd, device %p, ret=%d\n",
@@ -377,7 +357,7 @@ isert_create_device_ib_res(struct isert_device *device)
        }
 
        /* Check signature cap */
-       device->pi_capable = dev_attr->device_cap_flags &
+       device->pi_capable = ib_dev->attrs.device_cap_flags &
                             IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
 
        return 0;
@@ -676,6 +656,32 @@ out_login_buf:
        return ret;
 }
 
+static void
+isert_set_nego_params(struct isert_conn *isert_conn,
+                     struct rdma_conn_param *param)
+{
+       struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
+
+       /* Set max inflight RDMA READ requests */
+       isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
+                               attr->max_qp_init_rd_atom);
+       isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
+       if (param->private_data) {
+               u8 flags = *(u8 *)param->private_data;
+
+               /*
+                * use remote invalidation if the both initiator
+                * and the HCA support it
+                */
+               isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
+                                         (attr->device_cap_flags &
+                                          IB_DEVICE_MEM_MGT_EXTENSIONS);
+               if (isert_conn->snd_w_inv)
+                       isert_info("Using remote invalidation\n");
+       }
+}
+
 static int
 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
@@ -714,11 +720,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        }
        isert_conn->device = device;
 
-       /* Set max inflight RDMA READ requests */
-       isert_conn->initiator_depth = min_t(u8,
-                               event->param.conn.initiator_depth,
-                               device->dev_attr.max_qp_init_rd_atom);
-       isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+       isert_set_nego_params(isert_conn, &event->param.conn);
 
        ret = isert_conn_setup_qp(isert_conn, cma_id);
        if (ret)
@@ -1050,8 +1052,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
        ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
                                   ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
-       memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
-       tx_desc->iser_header.flags = ISER_VER;
+       memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
+       tx_desc->iser_header.flags = ISCSI_CTRL;
 
        tx_desc->num_sge = 1;
        tx_desc->isert_cmd = isert_cmd;
@@ -1097,7 +1099,14 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 
        isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
        send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
-       send_wr->opcode = IB_WR_SEND;
+
+       if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
+               send_wr->opcode  = IB_WR_SEND_WITH_INV;
+               send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
+       } else {
+               send_wr->opcode = IB_WR_SEND;
+       }
+
        send_wr->sg_list = &tx_desc->tx_sg[0];
        send_wr->num_sge = isert_cmd->tx_desc.num_sge;
        send_wr->send_flags = IB_SEND_SIGNALED;
@@ -1486,6 +1495,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
                isert_cmd->read_va = read_va;
                isert_cmd->write_stag = write_stag;
                isert_cmd->write_va = write_va;
+               isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
 
                ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
                                        rx_desc, (unsigned char *)hdr);
@@ -1543,21 +1553,21 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 static void
 isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
 {
-       struct iser_hdr *iser_hdr = &rx_desc->iser_header;
+       struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
        uint64_t read_va = 0, write_va = 0;
        uint32_t read_stag = 0, write_stag = 0;
 
-       switch (iser_hdr->flags & 0xF0) {
+       switch (iser_ctrl->flags & 0xF0) {
        case ISCSI_CTRL:
-               if (iser_hdr->flags & ISER_RSV) {
-                       read_stag = be32_to_cpu(iser_hdr->read_stag);
-                       read_va = be64_to_cpu(iser_hdr->read_va);
+               if (iser_ctrl->flags & ISER_RSV) {
+                       read_stag = be32_to_cpu(iser_ctrl->read_stag);
+                       read_va = be64_to_cpu(iser_ctrl->read_va);
                        isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
                                  read_stag, (unsigned long long)read_va);
                }
-               if (iser_hdr->flags & ISER_WSV) {
-                       write_stag = be32_to_cpu(iser_hdr->write_stag);
-                       write_va = be64_to_cpu(iser_hdr->write_va);
+               if (iser_ctrl->flags & ISER_WSV) {
+                       write_stag = be32_to_cpu(iser_ctrl->write_stag);
+                       write_va = be64_to_cpu(iser_ctrl->write_va);
                        isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
                                  write_stag, (unsigned long long)write_va);
                }
@@ -1568,7 +1578,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
                isert_err("iSER Hello message\n");
                break;
        default:
-               isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+               isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
                break;
        }
 
@@ -3095,12 +3105,20 @@ isert_rdma_accept(struct isert_conn *isert_conn)
        struct rdma_cm_id *cm_id = isert_conn->cm_id;
        struct rdma_conn_param cp;
        int ret;
+       struct iser_cm_hdr rsp_hdr;
 
        memset(&cp, 0, sizeof(struct rdma_conn_param));
        cp.initiator_depth = isert_conn->initiator_depth;
        cp.retry_count = 7;
        cp.rnr_retry_count = 7;
 
+       memset(&rsp_hdr, 0, sizeof(rsp_hdr));
+       rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
+       if (!isert_conn->snd_w_inv)
+               rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
+       cp.private_data = (void *)&rsp_hdr;
+       cp.private_data_len = sizeof(rsp_hdr);
+
        ret = rdma_accept(cm_id, &cp);
        if (ret) {
                isert_err("rdma_accept() failed with: %d\n", ret);
index 3d7fbc47c3434c32308136b7faec65590b74d439..8d50453eef66ce11d3cf321a535be0abe6b16d43 100644 (file)
@@ -3,6 +3,8 @@
 #include <linux/in6.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+#include <scsi/iser.h>
+
 
 #define DRV_NAME       "isert"
 #define PFX            DRV_NAME ": "
 #define isert_err(fmt, arg...) \
        pr_err(PFX "%s: " fmt, __func__ , ## arg)
 
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN       (sizeof(struct iser_ctrl) + \
+                                sizeof(struct iscsi_hdr))
+#define ISER_RECV_DATA_SEG_LEN 8192
+#define ISER_RX_PAYLOAD_SIZE   (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE     (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+
+/* QP settings */
+/* Maximal bounds on received asynchronous PDUs */
+#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
+
+#define ISERT_MAX_RX_MISC_PDUS 6 /*
+                                  * NOOP_OUT(2), TEXT(1),
+                                  * SCSI_TMFUNC(2), LOGOUT(1)
+                                  */
+
+#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
+
+#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISERT_MIN_POSTED_RX    (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+
+#define ISERT_INFLIGHT_DATAOUTS        8
+
+#define ISERT_QP_MAX_REQ_DTOS  (ISCSI_DEF_XMIT_CMDS_MAX *    \
+                               (1 + ISERT_INFLIGHT_DATAOUTS) + \
+                               ISERT_MAX_TX_MISC_PDUS  + \
+                               ISERT_MAX_RX_MISC_PDUS)
+
+#define ISER_RX_PAD_SIZE       (ISER_RECV_DATA_SEG_LEN + 4096 - \
+               (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+
 #define ISCSI_ISER_SG_TABLESIZE                256
 #define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
 #define ISER_BEACON_WRID               0xfffffffffffffffeULL
@@ -56,7 +90,7 @@ enum iser_conn_state {
 };
 
 struct iser_rx_desc {
-       struct iser_hdr iser_header;
+       struct iser_ctrl iser_header;
        struct iscsi_hdr iscsi_header;
        char            data[ISER_RECV_DATA_SEG_LEN];
        u64             dma_addr;
@@ -65,7 +99,7 @@ struct iser_rx_desc {
 } __packed;
 
 struct iser_tx_desc {
-       struct iser_hdr iser_header;
+       struct iser_ctrl iser_header;
        struct iscsi_hdr iscsi_header;
        enum isert_desc_type type;
        u64             dma_addr;
@@ -129,6 +163,7 @@ struct isert_cmd {
        uint32_t                write_stag;
        uint64_t                read_va;
        uint64_t                write_va;
+       uint32_t                inv_rkey;
        u64                     pdu_buf_dma;
        u32                     pdu_buf_len;
        struct isert_conn       *conn;
@@ -176,6 +211,7 @@ struct isert_conn {
        struct work_struct      release_work;
        struct ib_recv_wr       beacon;
        bool                    logout_posted;
+       bool                    snd_w_inv;
 };
 
 #define ISERT_MAX_CQ 64
@@ -207,7 +243,6 @@ struct isert_device {
        struct isert_comp       *comps;
        int                     comps_used;
        struct list_head        dev_node;
-       struct ib_device_attr   dev_attr;
        int                     (*reg_rdma_mem)(struct iscsi_conn *conn,
                                                    struct iscsi_cmd *cmd,
                                                    struct isert_rdma_wr *wr);
diff --git a/drivers/infiniband/ulp/isert/isert_proto.h b/drivers/infiniband/ulp/isert/isert_proto.h
deleted file mode 100644 (file)
index 4dccd31..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/* From iscsi_iser.h */
-
-struct iser_hdr {
-       u8      flags;
-       u8      rsvd[3];
-       __be32  write_stag; /* write rkey */
-       __be64  write_va;
-       __be32  read_stag;  /* read rkey */
-       __be64  read_va;
-} __packed;
-
-/*Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN  (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
-
-#define ISER_RECV_DATA_SEG_LEN  8192
-#define ISER_RX_PAYLOAD_SIZE    (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE      (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
-
-/* QP settings */
-/* Maximal bounds on received asynchronous PDUs */
-#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
-
-#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1),         *
-                                  * SCSI_TMFUNC(2), LOGOUT(1) */
-
-#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
-
-#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
-
-#define ISERT_MIN_POSTED_RX    (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-
-#define ISERT_INFLIGHT_DATAOUTS        8
-
-#define ISERT_QP_MAX_REQ_DTOS  (ISCSI_DEF_XMIT_CMDS_MAX *    \
-                               (1 + ISERT_INFLIGHT_DATAOUTS) + \
-                               ISERT_MAX_TX_MISC_PDUS  + \
-                               ISERT_MAX_RX_MISC_PDUS)
-
-#define ISER_RX_PAD_SIZE       (ISER_RECV_DATA_SEG_LEN + 4096 - \
-               (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
-
-#define ISER_VER       0x10
-#define ISER_WSV       0x08
-#define ISER_RSV       0x04
-#define ISCSI_CTRL     0x10
-#define ISER_HELLO     0x20
-#define ISER_HELLORPLY 0x30
index 3db9a659719b0f6283af610bf01b8f65d27292a9..03022f6420d770a469d818ee07fb7a4a4babda3b 100644 (file)
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
 
 static void srp_add_one(struct ib_device *device);
 static void srp_remove_one(struct ib_device *device, void *client_data);
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+               const char *opname);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
 
 static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
                                  dev->max_pages_per_mr);
 }
 
+static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct srp_rdma_ch *ch = cq->cq_context;
+
+       complete(&ch->done);
+}
+
+static struct ib_cqe srp_drain_cqe = {
+       .done           = srp_drain_done,
+};
+
 /**
  * srp_destroy_qp() - destroy an RDMA queue pair
  * @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
 static void srp_destroy_qp(struct srp_rdma_ch *ch)
 {
        static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
-       static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+       static struct ib_recv_wr wr = { 0 };
        struct ib_recv_wr *bad_wr;
        int ret;
 
+       wr.wr_cqe = &srp_drain_cqe;
        /* Destroying a QP and reusing ch->done is only safe if not connected */
        WARN_ON_ONCE(ch->connected);
 
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        struct ib_fmr_pool *fmr_pool = NULL;
        struct srp_fr_pool *fr_pool = NULL;
        const int m = dev->use_fast_reg ? 3 : 1;
-       struct ib_cq_init_attr cq_attr = {};
        int ret;
 
        init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
        if (!init_attr)
                return -ENOMEM;
 
-       /* + 1 for SRP_LAST_WR_ID */
-       cq_attr.cqe = target->queue_size + 1;
-       cq_attr.comp_vector = ch->comp_vector;
-       recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
-                              &cq_attr);
+       /* queue_size + 1 for ib_drain_qp */
+       recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
+                               ch->comp_vector, IB_POLL_SOFTIRQ);
        if (IS_ERR(recv_cq)) {
                ret = PTR_ERR(recv_cq);
                goto err;
        }
 
-       cq_attr.cqe = m * target->queue_size;
-       cq_attr.comp_vector = ch->comp_vector;
-       send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
-                              &cq_attr);
+       send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
+                               ch->comp_vector, IB_POLL_DIRECT);
        if (IS_ERR(send_cq)) {
                ret = PTR_ERR(send_cq);
                goto err_recv_cq;
        }
 
-       ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
-
        init_attr->event_handler       = srp_qp_event;
        init_attr->cap.max_send_wr     = m * target->queue_size;
        init_attr->cap.max_recv_wr     = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        if (ch->qp)
                srp_destroy_qp(ch);
        if (ch->recv_cq)
-               ib_destroy_cq(ch->recv_cq);
+               ib_free_cq(ch->recv_cq);
        if (ch->send_cq)
-               ib_destroy_cq(ch->send_cq);
+               ib_free_cq(ch->send_cq);
 
        ch->qp = qp;
        ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        return 0;
 
 err_qp:
-       ib_destroy_qp(qp);
+       srp_destroy_qp(ch);
 
 err_send_cq:
-       ib_destroy_cq(send_cq);
+       ib_free_cq(send_cq);
 
 err_recv_cq:
-       ib_destroy_cq(recv_cq);
+       ib_free_cq(recv_cq);
 
 err:
        kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
                if (ch->fmr_pool)
                        ib_destroy_fmr_pool(ch->fmr_pool);
        }
+
        srp_destroy_qp(ch);
-       ib_destroy_cq(ch->send_cq);
-       ib_destroy_cq(ch->recv_cq);
+       ib_free_cq(ch->send_cq);
+       ib_free_cq(ch->recv_cq);
 
        /*
         * Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
        return ret <= 0 ? ret : -ENODEV;
 }
 
-static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
+static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       srp_handle_qp_err(cq, wc, "INV RKEY");
+}
+
+static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
+               u32 rkey)
 {
        struct ib_send_wr *bad_wr;
        struct ib_send_wr wr = {
                .opcode             = IB_WR_LOCAL_INV,
-               .wr_id              = LOCAL_INV_WR_ID_MASK,
                .next               = NULL,
                .num_sge            = 0,
                .send_flags         = 0,
                .ex.invalidate_rkey = rkey,
        };
 
+       wr.wr_cqe = &req->reg_cqe;
+       req->reg_cqe.done = srp_inv_rkey_err_done;
        return ib_post_send(ch->qp, &wr, &bad_wr);
 }
 
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
                struct srp_fr_desc **pfr;
 
                for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
-                       res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
+                       res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
                        if (res < 0) {
                                shost_printk(KERN_ERR, target->scsi_host, PFX
                                  "Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
        return 0;
 }
 
+static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       srp_handle_qp_err(cq, wc, "FAST REG");
+}
+
 static int srp_map_finish_fr(struct srp_map_state *state,
+                            struct srp_request *req,
                             struct srp_rdma_ch *ch, int sg_nents)
 {
        struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        if (unlikely(n < 0))
                return n;
 
+       req->reg_cqe.done = srp_reg_mr_err_done;
+
        wr.wr.next = NULL;
        wr.wr.opcode = IB_WR_REG_MR;
-       wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+       wr.wr.wr_cqe = &req->reg_cqe;
        wr.wr.num_sge = 0;
        wr.wr.send_flags = 0;
        wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
        while (count) {
                int i, n;
 
-               n = srp_map_finish_fr(state, ch, count);
+               n = srp_map_finish_fr(state, req, ch, count);
                if (unlikely(n < 0))
                        return n;
 
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
                idb_sg->dma_length = idb_sg->length;          /* hack^2 */
 #endif
-               ret = srp_map_finish_fr(&state, ch, 1);
+               ret = srp_map_finish_fr(&state, req, ch, 1);
                if (ret < 0)
                        return ret;
        } else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
        s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
        struct srp_iu *iu;
 
-       srp_send_completion(ch->send_cq, ch);
+       ib_process_cq_direct(ch->send_cq, -1);
 
        if (list_empty(&ch->free_tx))
                return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
        return iu;
 }
 
+static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+       struct srp_rdma_ch *ch = cq->cq_context;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               srp_handle_qp_err(cq, wc, "SEND");
+               return;
+       }
+
+       list_add(&iu->list, &ch->free_tx);
+}
+
 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
 {
        struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
        list.length = len;
        list.lkey   = target->lkey;
 
+       iu->cqe.done = srp_send_done;
+
        wr.next       = NULL;
-       wr.wr_id      = (uintptr_t) iu;
+       wr.wr_cqe     = &iu->cqe;
        wr.sg_list    = &list;
        wr.num_sge    = 1;
        wr.opcode     = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
        list.length = iu->size;
        list.lkey   = target->lkey;
 
+       iu->cqe.done = srp_recv_done;
+
        wr.next     = NULL;
-       wr.wr_id    = (uintptr_t) iu;
+       wr.wr_cqe   = &iu->cqe;
        wr.sg_list  = &list;
        wr.num_sge  = 1;
 
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
                             "problems processing SRP_AER_REQ\n");
 }
 
-static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+       struct srp_rdma_ch *ch = cq->cq_context;
        struct srp_target_port *target = ch->target;
        struct ib_device *dev = target->srp_host->srp_dev->dev;
-       struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
        int res;
        u8 opcode;
 
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               srp_handle_qp_err(cq, wc, "RECV");
+               return;
+       }
+
        ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
                                   DMA_FROM_DEVICE);
 
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
                srp_start_tl_fail_timers(target->rport);
 }
 
-static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
-                             bool send_err, struct srp_rdma_ch *ch)
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+               const char *opname)
 {
+       struct srp_rdma_ch *ch = cq->cq_context;
        struct srp_target_port *target = ch->target;
 
-       if (wr_id == SRP_LAST_WR_ID) {
-               complete(&ch->done);
-               return;
-       }
-
        if (ch->connected && !target->qp_in_error) {
-               if (wr_id & LOCAL_INV_WR_ID_MASK) {
-                       shost_printk(KERN_ERR, target->scsi_host, PFX
-                                    "LOCAL_INV failed with status %s (%d)\n",
-                                    ib_wc_status_msg(wc_status), wc_status);
-               } else if (wr_id & FAST_REG_WR_ID_MASK) {
-                       shost_printk(KERN_ERR, target->scsi_host, PFX
-                                    "FAST_REG_MR failed status %s (%d)\n",
-                                    ib_wc_status_msg(wc_status), wc_status);
-               } else {
-                       shost_printk(KERN_ERR, target->scsi_host,
-                                    PFX "failed %s status %s (%d) for iu %p\n",
-                                    send_err ? "send" : "receive",
-                                    ib_wc_status_msg(wc_status), wc_status,
-                                    (void *)(uintptr_t)wr_id);
-               }
+               shost_printk(KERN_ERR, target->scsi_host,
+                            PFX "failed %s status %s (%d) for CQE %p\n",
+                            opname, ib_wc_status_msg(wc->status), wc->status,
+                            wc->wr_cqe);
                queue_work(system_long_wq, &target->tl_err_work);
        }
        target->qp_in_error = true;
 }
 
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
-{
-       struct srp_rdma_ch *ch = ch_ptr;
-       struct ib_wc wc;
-
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-       while (ib_poll_cq(cq, 1, &wc) > 0) {
-               if (likely(wc.status == IB_WC_SUCCESS)) {
-                       srp_handle_recv(ch, &wc);
-               } else {
-                       srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
-               }
-       }
-}
-
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
-{
-       struct srp_rdma_ch *ch = ch_ptr;
-       struct ib_wc wc;
-       struct srp_iu *iu;
-
-       while (ib_poll_cq(cq, 1, &wc) > 0) {
-               if (likely(wc.status == IB_WC_SUCCESS)) {
-                       iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
-                       list_add(&iu->list, &ch->free_tx);
-               } else {
-                       srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
-               }
-       }
-}
-
 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 {
        struct srp_target_port *target = host_to_target(shost);
@@ -3439,27 +3438,17 @@ free_host:
 static void srp_add_one(struct ib_device *device)
 {
        struct srp_device *srp_dev;
-       struct ib_device_attr *dev_attr;
        struct srp_host *host;
        int mr_page_shift, p;
        u64 max_pages_per_mr;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               pr_warn("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
        if (!srp_dev)
-               goto free_attr;
+               return;
 
        srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
                            device->map_phys_fmr && device->unmap_fmr);
-       srp_dev->has_fr = (dev_attr->device_cap_flags &
+       srp_dev->has_fr = (device->attrs.device_cap_flags &
                           IB_DEVICE_MEM_MGT_EXTENSIONS);
        if (!srp_dev->has_fmr && !srp_dev->has_fr)
                dev_warn(&device->dev, "neither FMR nor FR is supported\n");
@@ -3473,23 +3462,23 @@ static void srp_add_one(struct ib_device *device)
         * minimum of 4096 bytes. We're unlikely to build large sglists
         * out of smaller entries.
         */
-       mr_page_shift           = max(12, ffs(dev_attr->page_size_cap) - 1);
+       mr_page_shift           = max(12, ffs(device->attrs.page_size_cap) - 1);
        srp_dev->mr_page_size   = 1 << mr_page_shift;
        srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
-       max_pages_per_mr        = dev_attr->max_mr_size;
+       max_pages_per_mr        = device->attrs.max_mr_size;
        do_div(max_pages_per_mr, srp_dev->mr_page_size);
        srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
                                          max_pages_per_mr);
        if (srp_dev->use_fast_reg) {
                srp_dev->max_pages_per_mr =
                        min_t(u32, srp_dev->max_pages_per_mr,
-                             dev_attr->max_fast_reg_page_list_len);
+                             device->attrs.max_fast_reg_page_list_len);
        }
        srp_dev->mr_max_size    = srp_dev->mr_page_size *
                                   srp_dev->max_pages_per_mr;
-       pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
-                device->name, mr_page_shift, dev_attr->max_mr_size,
-                dev_attr->max_fast_reg_page_list_len,
+       pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+                device->name, mr_page_shift, device->attrs.max_mr_size,
+                device->attrs.max_fast_reg_page_list_len,
                 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
 
        INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -3517,17 +3506,13 @@ static void srp_add_one(struct ib_device *device)
        }
 
        ib_set_client_data(device, &srp_client, srp_dev);
-
-       goto free_attr;
+       return;
 
 err_pd:
        ib_dealloc_pd(srp_dev->pd);
 
 free_dev:
        kfree(srp_dev);
-
-free_attr:
-       kfree(dev_attr);
 }
 
 static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -3587,8 +3572,6 @@ static int __init srp_init_module(void)
 {
        int ret;
 
-       BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
-
        if (srp_sg_tablesize) {
                pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
                if (!cmd_sg_entries)
index f6af531f9f32c990cd30c7139d247c643144e55d..9e05ce4a04fd08b0a450f5a546b7fb51901c5afe 100644 (file)
@@ -66,11 +66,6 @@ enum {
        SRP_TAG_TSK_MGMT        = 1U << 31,
 
        SRP_MAX_PAGES_PER_MR    = 512,
-
-       LOCAL_INV_WR_ID_MASK    = 1,
-       FAST_REG_WR_ID_MASK     = 2,
-
-       SRP_LAST_WR_ID          = 0xfffffffcU,
 };
 
 enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
        struct srp_direct_buf  *indirect_desc;
        dma_addr_t              indirect_dma_addr;
        short                   nmdesc;
+       struct ib_cqe           reg_cqe;
 };
 
 /**
@@ -231,6 +227,7 @@ struct srp_iu {
        void                   *buf;
        size_t                  size;
        enum dma_data_direction direction;
+       struct ib_cqe           cqe;
 };
 
 /**
index bc5470c43d26080c5fa0f9d0e6427e848ae2771a..0c37fee363b1d60eb3dd63ebfdc6330b2e14b562 100644 (file)
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
 static struct ib_client srpt_client;
 static void srpt_release_channel(struct srpt_rdma_ch *ch);
 static int srpt_queue_status(struct se_cmd *cmd);
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 /**
  * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -341,10 +343,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
        memset(iocp, 0, sizeof *iocp);
        strcpy(iocp->id_string, SRPT_ID_STRING);
        iocp->guid = cpu_to_be64(srpt_service_guid);
-       iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
-       iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
-       iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
-       iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
+       iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
+       iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
+       iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
+       iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
        iocp->subsys_device_id = 0x0;
        iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
        iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
@@ -453,6 +455,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
  * srpt_mad_recv_handler() - MAD reception callback function.
  */
 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
+                                 struct ib_mad_send_buf *send_buf,
                                  struct ib_mad_recv_wc *mad_wc)
 {
        struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
@@ -778,12 +781,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
        struct ib_recv_wr wr, *bad_wr;
 
        BUG_ON(!sdev);
-       wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
-
        list.addr = ioctx->ioctx.dma;
        list.length = srp_max_req_size;
        list.lkey = sdev->pd->local_dma_lkey;
 
+       ioctx->ioctx.cqe.done = srpt_recv_done;
+       wr.wr_cqe = &ioctx->ioctx.cqe;
        wr.next = NULL;
        wr.sg_list = &list;
        wr.num_sge = 1;
@@ -819,8 +822,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
        list.length = len;
        list.lkey = sdev->pd->local_dma_lkey;
 
+       ioctx->ioctx.cqe.done = srpt_send_done;
        wr.next = NULL;
-       wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
+       wr.wr_cqe = &ioctx->ioctx.cqe;
        wr.sg_list = &list;
        wr.num_sge = 1;
        wr.opcode = IB_WR_SEND;
@@ -1052,13 +1056,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
 
        BUG_ON(!ch);
        BUG_ON(!ioctx);
-       BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
+       BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
 
        while (ioctx->n_rdma)
-               kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
+               kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
 
-       kfree(ioctx->rdma_ius);
-       ioctx->rdma_ius = NULL;
+       kfree(ioctx->rdma_wrs);
+       ioctx->rdma_wrs = NULL;
 
        if (ioctx->mapped_sg_count) {
                sg = ioctx->sg;
@@ -1082,7 +1086,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
        struct scatterlist *sg, *sg_orig;
        int sg_cnt;
        enum dma_data_direction dir;
-       struct rdma_iu *riu;
+       struct ib_rdma_wr *riu;
        struct srp_direct_buf *db;
        dma_addr_t dma_addr;
        struct ib_sge *sge;
@@ -1109,23 +1113,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
 
        ioctx->mapped_sg_count = count;
 
-       if (ioctx->rdma_ius && ioctx->n_rdma_ius)
-               nrdma = ioctx->n_rdma_ius;
+       if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
+               nrdma = ioctx->n_rdma_wrs;
        else {
                nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
                        + ioctx->n_rbuf;
 
-               ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
-               if (!ioctx->rdma_ius)
+               ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
+                               GFP_KERNEL);
+               if (!ioctx->rdma_wrs)
                        goto free_mem;
 
-               ioctx->n_rdma_ius = nrdma;
+               ioctx->n_rdma_wrs = nrdma;
        }
 
        db = ioctx->rbufs;
        tsize = cmd->data_length;
        dma_len = ib_sg_dma_len(dev, &sg[0]);
-       riu = ioctx->rdma_ius;
+       riu = ioctx->rdma_wrs;
 
        /*
         * For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1144,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
             j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
                rsize = be32_to_cpu(db->len);
                raddr = be64_to_cpu(db->va);
-               riu->raddr = raddr;
+               riu->remote_addr = raddr;
                riu->rkey = be32_to_cpu(db->key);
-               riu->sge_cnt = 0;
+               riu->wr.num_sge = 0;
 
                /* calculate how many sge required for this remote_buf */
                while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1170,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
                                rsize = 0;
                        }
 
-                       ++riu->sge_cnt;
+                       ++riu->wr.num_sge;
 
-                       if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
+                       if (rsize > 0 &&
+                           riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
                                ++ioctx->n_rdma;
-                               riu->sge =
-                                   kmalloc(riu->sge_cnt * sizeof *riu->sge,
-                                           GFP_KERNEL);
-                               if (!riu->sge)
+                               riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+                                               sizeof(*riu->wr.sg_list),
+                                               GFP_KERNEL);
+                               if (!riu->wr.sg_list)
                                        goto free_mem;
 
                                ++riu;
-                               riu->sge_cnt = 0;
-                               riu->raddr = raddr;
+                               riu->wr.num_sge = 0;
+                               riu->remote_addr = raddr;
                                riu->rkey = be32_to_cpu(db->key);
                        }
                }
 
                ++ioctx->n_rdma;
-               riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
-                                  GFP_KERNEL);
-               if (!riu->sge)
+               riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+                                       sizeof(*riu->wr.sg_list),
+                                       GFP_KERNEL);
+               if (!riu->wr.sg_list)
                        goto free_mem;
        }
 
        db = ioctx->rbufs;
        tsize = cmd->data_length;
-       riu = ioctx->rdma_ius;
+       riu = ioctx->rdma_wrs;
        sg = sg_orig;
        dma_len = ib_sg_dma_len(dev, &sg[0]);
        dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1207,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
        for (i = 0, j = 0;
             j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
                rsize = be32_to_cpu(db->len);
-               sge = riu->sge;
+               sge = riu->wr.sg_list;
                k = 0;
 
                while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1239,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
                        }
 
                        ++k;
-                       if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
+                       if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
                                ++riu;
-                               sge = riu->sge;
+                               sge = riu->wr.sg_list;
                                k = 0;
                        } else if (rsize > 0 && tsize > 0)
                                ++sge;
@@ -1277,8 +1284,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
        ioctx->n_rbuf = 0;
        ioctx->rbufs = NULL;
        ioctx->n_rdma = 0;
-       ioctx->n_rdma_ius = 0;
-       ioctx->rdma_ius = NULL;
+       ioctx->n_rdma_wrs = 0;
+       ioctx->rdma_wrs = NULL;
        ioctx->mapped_sg_count = 0;
        init_completion(&ioctx->tx_done);
        ioctx->queue_status_only = false;
@@ -1380,118 +1387,44 @@ out:
 }
 
 /**
- * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
- */
-static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
-{
-       struct srpt_send_ioctx *ioctx;
-       enum srpt_command_state state;
-       u32 index;
-
-       atomic_inc(&ch->sq_wr_avail);
-
-       index = idx_from_wr_id(wr_id);
-       ioctx = ch->ioctx_ring[index];
-       state = srpt_get_cmd_state(ioctx);
-
-       WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
-               && state != SRPT_STATE_MGMT_RSP_SENT
-               && state != SRPT_STATE_NEED_DATA
-               && state != SRPT_STATE_DONE);
-
-       /* If SRP_RSP sending failed, undo the ch->req_lim change. */
-       if (state == SRPT_STATE_CMD_RSP_SENT
-           || state == SRPT_STATE_MGMT_RSP_SENT)
-               atomic_dec(&ch->req_lim);
-
-       srpt_abort_cmd(ioctx);
-}
-
-/**
- * srpt_handle_send_comp() - Process an IB send completion notification.
- */
-static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
-                                 struct srpt_send_ioctx *ioctx)
-{
-       enum srpt_command_state state;
-
-       atomic_inc(&ch->sq_wr_avail);
-
-       state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
-
-       if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
-                   && state != SRPT_STATE_MGMT_RSP_SENT
-                   && state != SRPT_STATE_DONE))
-               pr_debug("state = %d\n", state);
-
-       if (state != SRPT_STATE_DONE) {
-               srpt_unmap_sg_to_ib_sge(ch, ioctx);
-               transport_generic_free_cmd(&ioctx->cmd, 0);
-       } else {
-               pr_err("IB completion has been received too late for"
-                      " wr_id = %u.\n", ioctx->ioctx.index);
-       }
-}
-
-/**
- * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
- *
  * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
  * the data that has been transferred via IB RDMA had to be postponed until the
  * check_stop_free() callback.  None of this is necessary anymore and needs to
  * be cleaned up.
  */
-static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
-                                 struct srpt_send_ioctx *ioctx,
-                                 enum srpt_opcode opcode)
+static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
+
        WARN_ON(ioctx->n_rdma <= 0);
        atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
 
-       if (opcode == SRPT_RDMA_READ_LAST) {
-               if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
-                                               SRPT_STATE_DATA_IN))
-                       target_execute_cmd(&ioctx->cmd);
-               else
-                       pr_err("%s[%d]: wrong state = %d\n", __func__,
-                              __LINE__, srpt_get_cmd_state(ioctx));
-       } else if (opcode == SRPT_RDMA_ABORT) {
-               ioctx->rdma_aborted = true;
-       } else {
-               WARN(true, "unexpected opcode %d\n", opcode);
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
+                       ioctx, wc->status);
+               srpt_abort_cmd(ioctx);
+               return;
        }
+
+       if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+                                       SRPT_STATE_DATA_IN))
+               target_execute_cmd(&ioctx->cmd);
+       else
+               pr_err("%s[%d]: wrong state = %d\n", __func__,
+                      __LINE__, srpt_get_cmd_state(ioctx));
 }
 
-/**
- * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
- */
-static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
-                                     struct srpt_send_ioctx *ioctx,
-                                     enum srpt_opcode opcode)
+static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       enum srpt_command_state state;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
 
-       state = srpt_get_cmd_state(ioctx);
-       switch (opcode) {
-       case SRPT_RDMA_READ_LAST:
-               if (ioctx->n_rdma <= 0) {
-                       pr_err("Received invalid RDMA read"
-                              " error completion with idx %d\n",
-                              ioctx->ioctx.index);
-                       break;
-               }
-               atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
-               if (state == SRPT_STATE_NEED_DATA)
-                       srpt_abort_cmd(ioctx);
-               else
-                       pr_err("%s[%d]: wrong state = %d\n",
-                              __func__, __LINE__, state);
-               break;
-       case SRPT_RDMA_WRITE_LAST:
-               break;
-       default:
-               pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
-               break;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
+                       ioctx, wc->status);
+               srpt_abort_cmd(ioctx);
        }
 }
 
@@ -1926,32 +1859,26 @@ out:
        return;
 }
 
-static void srpt_process_rcv_completion(struct ib_cq *cq,
-                                       struct srpt_rdma_ch *ch,
-                                       struct ib_wc *wc)
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct srpt_device *sdev = ch->sport->sdev;
-       struct srpt_recv_ioctx *ioctx;
-       u32 index;
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_recv_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
 
-       index = idx_from_wr_id(wc->wr_id);
        if (wc->status == IB_WC_SUCCESS) {
                int req_lim;
 
                req_lim = atomic_dec_return(&ch->req_lim);
                if (unlikely(req_lim < 0))
                        pr_err("req_lim = %d < 0\n", req_lim);
-               ioctx = sdev->ioctx_ring[index];
                srpt_handle_new_iu(ch, ioctx, NULL);
        } else {
-               pr_info("receiving failed for idx %u with status %d\n",
-                       index, wc->status);
+               pr_info("receiving failed for ioctx %p with status %d\n",
+                       ioctx, wc->status);
        }
 }
 
 /**
- * srpt_process_send_completion() - Process an IB send completion.
- *
  * Note: Although this has not yet been observed during tests, at least in
  * theory it is possible that the srpt_get_send_ioctx() call invoked by
  * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,108 +1891,51 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
  * are queued on cmd_wait_list. The code below processes these delayed
  * requests one at a time.
  */
-static void srpt_process_send_completion(struct ib_cq *cq,
-                                        struct srpt_rdma_ch *ch,
-                                        struct ib_wc *wc)
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct srpt_send_ioctx *send_ioctx;
-       uint32_t index;
-       enum srpt_opcode opcode;
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+       enum srpt_command_state state;
 
-       index = idx_from_wr_id(wc->wr_id);
-       opcode = opcode_from_wr_id(wc->wr_id);
-       send_ioctx = ch->ioctx_ring[index];
-       if (wc->status == IB_WC_SUCCESS) {
-               if (opcode == SRPT_SEND)
-                       srpt_handle_send_comp(ch, send_ioctx);
-               else {
-                       WARN_ON(opcode != SRPT_RDMA_ABORT &&
-                               wc->opcode != IB_WC_RDMA_READ);
-                       srpt_handle_rdma_comp(ch, send_ioctx, opcode);
-               }
+       state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+
+       WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
+               state != SRPT_STATE_MGMT_RSP_SENT);
+
+       atomic_inc(&ch->sq_wr_avail);
+
+       if (wc->status != IB_WC_SUCCESS) {
+               pr_info("sending response for ioctx 0x%p failed"
+                       " with status %d\n", ioctx, wc->status);
+
+               atomic_dec(&ch->req_lim);
+               srpt_abort_cmd(ioctx);
+               goto out;
+       }
+
+       if (state != SRPT_STATE_DONE) {
+               srpt_unmap_sg_to_ib_sge(ch, ioctx);
+               transport_generic_free_cmd(&ioctx->cmd, 0);
        } else {
-               if (opcode == SRPT_SEND) {
-                       pr_info("sending response for idx %u failed"
-                               " with status %d\n", index, wc->status);
-                       srpt_handle_send_err_comp(ch, wc->wr_id);
-               } else if (opcode != SRPT_RDMA_MID) {
-                       pr_info("RDMA t %d for idx %u failed with"
-                               " status %d\n", opcode, index, wc->status);
-                       srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
-               }
+               pr_err("IB completion has been received too late for"
+                      " wr_id = %u.\n", ioctx->ioctx.index);
        }
 
-       while (unlikely(opcode == SRPT_SEND
-                       && !list_empty(&ch->cmd_wait_list)
-                       && srpt_get_ch_state(ch) == CH_LIVE
-                       && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
+out:
+       while (!list_empty(&ch->cmd_wait_list) &&
+              srpt_get_ch_state(ch) == CH_LIVE &&
+              (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
                struct srpt_recv_ioctx *recv_ioctx;
 
                recv_ioctx = list_first_entry(&ch->cmd_wait_list,
                                              struct srpt_recv_ioctx,
                                              wait_list);
                list_del(&recv_ioctx->wait_list);
-               srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
-       }
-}
-
-static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
-{
-       struct ib_wc *const wc = ch->wc;
-       int i, n;
-
-       WARN_ON(cq != ch->cq);
-
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-       while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
-               for (i = 0; i < n; i++) {
-                       if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
-                               srpt_process_rcv_completion(cq, ch, &wc[i]);
-                       else
-                               srpt_process_send_completion(cq, ch, &wc[i]);
-               }
+               srpt_handle_new_iu(ch, recv_ioctx, ioctx);
        }
 }
 
-/**
- * srpt_completion() - IB completion queue callback function.
- *
- * Notes:
- * - It is guaranteed that a completion handler will never be invoked
- *   concurrently on two different CPUs for the same completion queue. See also
- *   Documentation/infiniband/core_locking.txt and the implementation of
- *   handle_edge_irq() in kernel/irq/chip.c.
- * - When threaded IRQs are enabled, completion handlers are invoked in thread
- *   context instead of interrupt context.
- */
-static void srpt_completion(struct ib_cq *cq, void *ctx)
-{
-       struct srpt_rdma_ch *ch = ctx;
-
-       wake_up_interruptible(&ch->wait_queue);
-}
-
-static int srpt_compl_thread(void *arg)
-{
-       struct srpt_rdma_ch *ch;
-
-       /* Hibernation / freezing of the SRPT kernel thread is not supported. */
-       current->flags |= PF_NOFREEZE;
-
-       ch = arg;
-       BUG_ON(!ch);
-       pr_info("Session %s: kernel thread %s (PID %d) started\n",
-               ch->sess_name, ch->thread->comm, current->pid);
-       while (!kthread_should_stop()) {
-               wait_event_interruptible(ch->wait_queue,
-                       (srpt_process_completion(ch->cq, ch),
-                        kthread_should_stop()));
-       }
-       pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
-               ch->sess_name, ch->thread->comm, current->pid);
-       return 0;
-}
-
 /**
  * srpt_create_ch_ib() - Create receive and send completion queues.
  */
@@ -2075,7 +1945,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
        struct srpt_port *sport = ch->sport;
        struct srpt_device *sdev = sport->sdev;
        u32 srp_sq_size = sport->port_attrib.srp_sq_size;
-       struct ib_cq_init_attr cq_attr = {};
        int ret;
 
        WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1955,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
                goto out;
 
 retry:
-       cq_attr.cqe = ch->rq_size + srp_sq_size;
-       ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
-                             &cq_attr);
+       ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+                       0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
        if (IS_ERR(ch->cq)) {
                ret = PTR_ERR(ch->cq);
                pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1999,6 @@ retry:
        if (ret)
                goto err_destroy_qp;
 
-       init_waitqueue_head(&ch->wait_queue);
-
-       pr_debug("creating thread for session %s\n", ch->sess_name);
-
-       ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
-       if (IS_ERR(ch->thread)) {
-               pr_err("failed to create kernel thread %ld\n",
-                      PTR_ERR(ch->thread));
-               ch->thread = NULL;
-               goto err_destroy_qp;
-       }
-
 out:
        kfree(qp_init);
        return ret;
@@ -2150,17 +2006,14 @@ out:
 err_destroy_qp:
        ib_destroy_qp(ch->qp);
 err_destroy_cq:
-       ib_destroy_cq(ch->cq);
+       ib_free_cq(ch->cq);
        goto out;
 }
 
 static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
 {
-       if (ch->thread)
-               kthread_stop(ch->thread);
-
        ib_destroy_qp(ch->qp);
-       ib_destroy_cq(ch->cq);
+       ib_free_cq(ch->cq);
 }
 
 /**
@@ -2808,12 +2661,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                              struct srpt_send_ioctx *ioctx)
 {
-       struct ib_rdma_wr wr;
        struct ib_send_wr *bad_wr;
-       struct rdma_iu *riu;
-       int i;
-       int ret;
-       int sq_wr_avail;
+       int sq_wr_avail, ret, i;
        enum dma_data_direction dir;
        const int n_rdma = ioctx->n_rdma;
 
@@ -2829,59 +2678,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                }
        }
 
-       ioctx->rdma_aborted = false;
-       ret = 0;
-       riu = ioctx->rdma_ius;
-       memset(&wr, 0, sizeof wr);
-
-       for (i = 0; i < n_rdma; ++i, ++riu) {
-               if (dir == DMA_FROM_DEVICE) {
-                       wr.wr.opcode = IB_WR_RDMA_WRITE;
-                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
-                                               SRPT_RDMA_WRITE_LAST :
-                                               SRPT_RDMA_MID,
-                                               ioctx->ioctx.index);
-               } else {
-                       wr.wr.opcode = IB_WR_RDMA_READ;
-                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
-                                               SRPT_RDMA_READ_LAST :
-                                               SRPT_RDMA_MID,
-                                               ioctx->ioctx.index);
-               }
-               wr.wr.next = NULL;
-               wr.remote_addr = riu->raddr;
-               wr.rkey = riu->rkey;
-               wr.wr.num_sge = riu->sge_cnt;
-               wr.wr.sg_list = riu->sge;
+       for (i = 0; i < n_rdma; i++) {
+               struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
 
-               /* only get completion event for the last rdma write */
-               if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
-                       wr.wr.send_flags = IB_SEND_SIGNALED;
+               wr->opcode = (dir == DMA_FROM_DEVICE) ?
+                               IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
 
-               ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
-               if (ret)
-                       break;
+               if (i == n_rdma - 1) {
+                       /* only get completion event for the last rdma read */
+                       if (dir == DMA_TO_DEVICE) {
+                               wr->send_flags = IB_SEND_SIGNALED;
+                               ioctx->rdma_cqe.done = srpt_rdma_read_done;
+                       } else {
+                               ioctx->rdma_cqe.done = srpt_rdma_write_done;
+                       }
+                       wr->wr_cqe = &ioctx->rdma_cqe;
+                       wr->next = NULL;
+               } else {
+                       wr->wr_cqe = NULL;
+                       wr->next = &ioctx->rdma_wrs[i + 1].wr;
+               }
        }
 
+       ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
        if (ret)
                pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
                                 __func__, __LINE__, ret, i, n_rdma);
-       if (ret && i > 0) {
-               wr.wr.num_sge = 0;
-               wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
-               wr.wr.send_flags = IB_SEND_SIGNALED;
-               while (ch->state == CH_LIVE &&
-                       ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
-                       pr_info("Trying to abort failed RDMA transfer [%d]\n",
-                               ioctx->ioctx.index);
-                       msleep(1000);
-               }
-               while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
-                       pr_info("Waiting until RDMA abort finished [%d]\n",
-                               ioctx->ioctx.index);
-                       msleep(1000);
-               }
-       }
 out:
        if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
                atomic_add(n_rdma, &ch->sq_wr_avail);
@@ -3190,14 +3012,11 @@ static void srpt_add_one(struct ib_device *device)
        init_waitqueue_head(&sdev->ch_releaseQ);
        spin_lock_init(&sdev->spinlock);
 
-       if (ib_query_device(device, &sdev->dev_attr))
-               goto free_dev;
-
        sdev->pd = ib_alloc_pd(device);
        if (IS_ERR(sdev->pd))
                goto free_dev;
 
-       sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
+       sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
 
        srq_attr.event_handler = srpt_srq_event;
        srq_attr.srq_context = (void *)sdev;
@@ -3211,7 +3030,7 @@ static void srpt_add_one(struct ib_device *device)
                goto err_pd;
 
        pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
-                __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
+                __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
                 device->name);
 
        if (!srpt_service_guid)
index 5366e0a9fd6d42d298cf296ed70dd70c618e8de0..09037f2b0b51430d568e847e7ebd5f24e7fd906e 100644 (file)
@@ -128,36 +128,6 @@ enum {
        DEFAULT_MAX_RDMA_SIZE = 65536,
 };
 
-enum srpt_opcode {
-       SRPT_RECV,
-       SRPT_SEND,
-       SRPT_RDMA_MID,
-       SRPT_RDMA_ABORT,
-       SRPT_RDMA_READ_LAST,
-       SRPT_RDMA_WRITE_LAST,
-};
-
-static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{
-       return ((u64)opcode << 32) | idx;
-}
-static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
-{
-       return wr_id >> 32;
-}
-static inline u32 idx_from_wr_id(u64 wr_id)
-{
-       return (u32)wr_id;
-}
-
-struct rdma_iu {
-       u64             raddr;
-       u32             rkey;
-       struct ib_sge   *sge;
-       u32             sge_cnt;
-       int             mem_id;
-};
-
 /**
  * enum srpt_command_state - SCSI command state managed by SRPT.
  * @SRPT_STATE_NEW:           New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
  * @index: Index of the I/O context in its ioctx_ring array.
  */
 struct srpt_ioctx {
+       struct ib_cqe           cqe;
        void                    *buf;
        dma_addr_t              dma;
        uint32_t                index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
  * @sg:          Pointer to sg-list associated with this I/O context.
  * @sg_cnt:      SG-list size.
  * @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_ius:  Number of elements in the rdma_ius array.
- * @rdma_ius:    Array with information about the RDMA mapping.
+ * @n_rdma_wrs:  Number of elements in the rdma_wrs array.
+ * @rdma_wrs:    Array with information about the RDMA mapping.
  * @tag:         Tag of the received SRP information unit.
  * @spinlock:    Protects 'state'.
  * @state:       I/O context state.
- * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
- *              the already initiated transfers have finished.
  * @cmd:         Target core command data structure.
  * @sense_data:  SCSI sense data.
  */
 struct srpt_send_ioctx {
        struct srpt_ioctx       ioctx;
        struct srpt_rdma_ch     *ch;
-       struct rdma_iu          *rdma_ius;
+       struct ib_rdma_wr       *rdma_wrs;
+       struct ib_cqe           rdma_cqe;
        struct srp_direct_buf   *rbufs;
        struct srp_direct_buf   single_rbuf;
        struct scatterlist      *sg;
        struct list_head        free_list;
        spinlock_t              spinlock;
        enum srpt_command_state state;
-       bool                    rdma_aborted;
        struct se_cmd           cmd;
        struct completion       tx_done;
        int                     sg_cnt;
        int                     mapped_sg_count;
-       u16                     n_rdma_ius;
+       u16                     n_rdma_wrs;
        u8                      n_rdma;
        u8                      n_rbuf;
        bool                    queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
 
 /**
  * struct srpt_rdma_ch - RDMA channel.
- * @wait_queue:    Allows the kernel thread to wait for more work.
- * @thread:        Kernel thread that processes the IB queues associated with
- *                 the channel.
  * @cm_id:         IB CM ID associated with the channel.
  * @qp:            IB queue pair used for communicating over this channel.
  * @cq:            IB completion queue for this channel.
@@ -288,7 +254,6 @@ enum rdma_ch_state {
  * @free_list:     Head of list with free send I/O contexts.
  * @state:         channel state. See also enum rdma_ch_state.
  * @ioctx_ring:    Send ring.
- * @wc:            IB work completion array for srpt_process_completion().
  * @list:          Node for insertion in the srpt_device.rch_list list.
  * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
  *                 list contains struct srpt_ioctx elements and is protected
@@ -299,8 +264,6 @@ enum rdma_ch_state {
  * @release_done:  Enables waiting for srpt_release_channel() completion.
  */
 struct srpt_rdma_ch {
-       wait_queue_head_t       wait_queue;
-       struct task_struct      *thread;
        struct ib_cm_id         *cm_id;
        struct ib_qp            *qp;
        struct ib_cq            *cq;
@@ -317,7 +280,6 @@ struct srpt_rdma_ch {
        struct list_head        free_list;
        enum rdma_ch_state      state;
        struct srpt_send_ioctx  **ioctx_ring;
-       struct ib_wc            wc[16];
        struct list_head        list;
        struct list_head        cmd_wait_list;
        struct se_session       *sess;
@@ -377,8 +339,6 @@ struct srpt_port {
  * @mr:            L_Key (local key) with write access to all local memory.
  * @srq:           Per-HCA SRQ (shared receive queue).
  * @cm_id:         Connection identifier.
- * @dev_attr:      Attributes of the InfiniBand device as obtained during the
- *                 ib_client.add() callback.
  * @srq_size:      SRQ size.
  * @ioctx_ring:    Per-HCA SRQ.
  * @rch_list:      Per-device channel list -- see also srpt_rdma_ch.list.
@@ -393,7 +353,6 @@ struct srpt_device {
        struct ib_pd            *pd;
        struct ib_srq           *srq;
        struct ib_cm_id         *cm_id;
-       struct ib_device_attr   dev_attr;
        int                     srq_size;
        struct srpt_recv_ioctx  **ioctx_ring;
        struct list_head        rch_list;
index fd4100d56d8c5509986be23e40d2af137cd840b1..e8a84d12b7fffe812cd329a88da26f6922c219af 100644 (file)
  */
 
 #include <linux/kernel.h>
+#include <linux/input.h>
+#include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
 #include <linux/usb/input.h>
+#include <linux/usb/quirks.h>
 
 #define DRIVER_AUTHOR "Marko Friedemann <mfr@bmx-chemnitz.de>"
 #define DRIVER_DESC "X-Box pad driver"
@@ -125,7 +128,7 @@ static const struct xpad_device {
        { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
        { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
        { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
-       { 0x045e, 0x02dd, "Microsoft X-Box One pad (Covert Forces)", 0, XTYPE_XBOXONE },
+       { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
        { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
@@ -317,21 +320,42 @@ static struct usb_device_id xpad_table[] = {
 
 MODULE_DEVICE_TABLE(usb, xpad_table);
 
+struct xpad_output_packet {
+       u8 data[XPAD_PKT_LEN];
+       u8 len;
+       bool pending;
+};
+
+#define XPAD_OUT_CMD_IDX       0
+#define XPAD_OUT_FF_IDX                1
+#define XPAD_OUT_LED_IDX       (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF))
+#define XPAD_NUM_OUT_PACKETS   (1 + \
+                                IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \
+                                IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS))
+
 struct usb_xpad {
        struct input_dev *dev;          /* input device interface */
+       struct input_dev __rcu *x360w_dev;
        struct usb_device *udev;        /* usb device */
        struct usb_interface *intf;     /* usb interface */
 
-       int pad_present;
+       bool pad_present;
+       bool input_created;
 
        struct urb *irq_in;             /* urb for interrupt in report */
        unsigned char *idata;           /* input data */
        dma_addr_t idata_dma;
 
        struct urb *irq_out;            /* urb for interrupt out report */
+       struct usb_anchor irq_out_anchor;
+       bool irq_out_active;            /* we must not use an active URB */
+       u8 odata_serial;                /* serial number for xbox one protocol */
        unsigned char *odata;           /* output data */
        dma_addr_t odata_dma;
-       struct mutex odata_mutex;
+       spinlock_t odata_lock;
+
+       struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS];
+       int last_out_packet;
 
 #if defined(CONFIG_JOYSTICK_XPAD_LEDS)
        struct xpad_led *led;
@@ -343,8 +367,12 @@ struct usb_xpad {
        int xtype;                      /* type of xbox device */
        int pad_nr;                     /* the order x360 pads were attached */
        const char *name;               /* name of the device */
+       struct work_struct work;        /* init/remove device from callback */
 };
 
+static int xpad_init_input(struct usb_xpad *xpad);
+static void xpad_deinit_input(struct usb_xpad *xpad);
+
 /*
  *     xpad_process_packet
  *
@@ -424,11 +452,9 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
  *             http://www.free60.org/wiki/Gamepad
  */
 
-static void xpad360_process_packet(struct usb_xpad *xpad,
+static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev,
                                   u16 cmd, unsigned char *data)
 {
-       struct input_dev *dev = xpad->dev;
-
        /* digital pad */
        if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
                /* dpad as buttons (left, right, up, down) */
@@ -495,7 +521,30 @@ static void xpad360_process_packet(struct usb_xpad *xpad,
        input_sync(dev);
 }
 
-static void xpad_identify_controller(struct usb_xpad *xpad);
+static void xpad_presence_work(struct work_struct *work)
+{
+       struct usb_xpad *xpad = container_of(work, struct usb_xpad, work);
+       int error;
+
+       if (xpad->pad_present) {
+               error = xpad_init_input(xpad);
+               if (error) {
+                       /* complain only, not much else we can do here */
+                       dev_err(&xpad->dev->dev,
+                               "unable to init device: %d\n", error);
+               } else {
+                       rcu_assign_pointer(xpad->x360w_dev, xpad->dev);
+               }
+       } else {
+               RCU_INIT_POINTER(xpad->x360w_dev, NULL);
+               synchronize_rcu();
+               /*
+                * Now that we are sure xpad360w_process_packet is not
+                * using input device we can get rid of it.
+                */
+               xpad_deinit_input(xpad);
+       }
+}
 
 /*
  * xpad360w_process_packet
@@ -513,24 +562,28 @@ static void xpad_identify_controller(struct usb_xpad *xpad);
  */
 static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
 {
+       struct input_dev *dev;
+       bool present;
+
        /* Presence change */
        if (data[0] & 0x08) {
-               if (data[1] & 0x80) {
-                       xpad->pad_present = 1;
-                       /*
-                        * Light up the segment corresponding to
-                        * controller number.
-                        */
-                       xpad_identify_controller(xpad);
-               } else
-                       xpad->pad_present = 0;
+               present = (data[1] & 0x80) != 0;
+
+               if (xpad->pad_present != present) {
+                       xpad->pad_present = present;
+                       schedule_work(&xpad->work);
+               }
        }
 
        /* Valid pad data */
-       if (!(data[1] & 0x1))
+       if (data[1] != 0x1)
                return;
 
-       xpad360_process_packet(xpad, cmd, &data[4]);
+       rcu_read_lock();
+       dev = rcu_dereference(xpad->x360w_dev);
+       if (dev)
+               xpad360_process_packet(xpad, dev, cmd, &data[4]);
+       rcu_read_unlock();
 }
 
 /*
@@ -659,7 +712,7 @@ static void xpad_irq_in(struct urb *urb)
 
        switch (xpad->xtype) {
        case XTYPE_XBOX360:
-               xpad360_process_packet(xpad, 0, xpad->idata);
+               xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata);
                break;
        case XTYPE_XBOX360W:
                xpad360w_process_packet(xpad, 0, xpad->idata);
@@ -678,18 +731,73 @@ exit:
                        __func__, retval);
 }
 
+/* Callers must hold xpad->odata_lock spinlock */
+static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad)
+{
+       struct xpad_output_packet *pkt, *packet = NULL;
+       int i;
+
+       for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) {
+               if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS)
+                       xpad->last_out_packet = 0;
+
+               pkt = &xpad->out_packets[xpad->last_out_packet];
+               if (pkt->pending) {
+                       dev_dbg(&xpad->intf->dev,
+                               "%s - found pending output packet %d\n",
+                               __func__, xpad->last_out_packet);
+                       packet = pkt;
+                       break;
+               }
+       }
+
+       if (packet) {
+               memcpy(xpad->odata, packet->data, packet->len);
+               xpad->irq_out->transfer_buffer_length = packet->len;
+               return true;
+       }
+
+       return false;
+}
+
+/* Callers must hold xpad->odata_lock spinlock */
+static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad)
+{
+       int error;
+
+       if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) {
+               usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor);
+               error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+               if (error) {
+                       dev_err(&xpad->intf->dev,
+                               "%s - usb_submit_urb failed with result %d\n",
+                               __func__, error);
+                       usb_unanchor_urb(xpad->irq_out);
+                       return -EIO;
+               }
+
+               xpad->irq_out_active = true;
+       }
+
+       return 0;
+}
+
 static void xpad_irq_out(struct urb *urb)
 {
        struct usb_xpad *xpad = urb->context;
        struct device *dev = &xpad->intf->dev;
-       int retval, status;
+       int status = urb->status;
+       int error;
+       unsigned long flags;
 
-       status = urb->status;
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
        switch (status) {
        case 0:
                /* success */
-               return;
+               xpad->out_packets[xpad->last_out_packet].pending = false;
+               xpad->irq_out_active = xpad_prepare_next_out_packet(xpad);
+               break;
 
        case -ECONNRESET:
        case -ENOENT:
@@ -697,19 +805,28 @@ static void xpad_irq_out(struct urb *urb)
                /* this urb is terminated, clean up */
                dev_dbg(dev, "%s - urb shutting down with status: %d\n",
                        __func__, status);
-               return;
+               xpad->irq_out_active = false;
+               break;
 
        default:
                dev_dbg(dev, "%s - nonzero urb status received: %d\n",
                        __func__, status);
-               goto exit;
+               break;
        }
 
-exit:
-       retval = usb_submit_urb(urb, GFP_ATOMIC);
-       if (retval)
-               dev_err(dev, "%s - usb_submit_urb failed with result %d\n",
-                       __func__, retval);
+       if (xpad->irq_out_active) {
+               usb_anchor_urb(urb, &xpad->irq_out_anchor);
+               error = usb_submit_urb(urb, GFP_ATOMIC);
+               if (error) {
+                       dev_err(dev,
+                               "%s - usb_submit_urb failed with result %d\n",
+                               __func__, error);
+                       usb_unanchor_urb(urb);
+                       xpad->irq_out_active = false;
+               }
+       }
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 }
 
 static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
@@ -721,6 +838,8 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
        if (xpad->xtype == XTYPE_UNKNOWN)
                return 0;
 
+       init_usb_anchor(&xpad->irq_out_anchor);
+
        xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
                                         GFP_KERNEL, &xpad->odata_dma);
        if (!xpad->odata) {
@@ -728,7 +847,7 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
                goto fail1;
        }
 
-       mutex_init(&xpad->odata_mutex);
+       spin_lock_init(&xpad->odata_lock);
 
        xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL);
        if (!xpad->irq_out) {
@@ -755,8 +874,14 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
 
 static void xpad_stop_output(struct usb_xpad *xpad)
 {
-       if (xpad->xtype != XTYPE_UNKNOWN)
-               usb_kill_urb(xpad->irq_out);
+       if (xpad->xtype != XTYPE_UNKNOWN) {
+               if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor,
+                                                  5000)) {
+                       dev_warn(&xpad->intf->dev,
+                                "timed out waiting for output URB to complete, killing\n");
+                       usb_kill_anchored_urbs(&xpad->irq_out_anchor);
+               }
+       }
 }
 
 static void xpad_deinit_output(struct usb_xpad *xpad)
@@ -770,27 +895,60 @@ static void xpad_deinit_output(struct usb_xpad *xpad)
 
 static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
 {
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_CMD_IDX];
+       unsigned long flags;
        int retval;
 
-       mutex_lock(&xpad->odata_mutex);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
+
+       packet->data[0] = 0x08;
+       packet->data[1] = 0x00;
+       packet->data[2] = 0x0F;
+       packet->data[3] = 0xC0;
+       packet->data[4] = 0x00;
+       packet->data[5] = 0x00;
+       packet->data[6] = 0x00;
+       packet->data[7] = 0x00;
+       packet->data[8] = 0x00;
+       packet->data[9] = 0x00;
+       packet->data[10] = 0x00;
+       packet->data[11] = 0x00;
+       packet->len = 12;
+       packet->pending = true;
+
+       /* Reset the sequence so we send out presence first */
+       xpad->last_out_packet = -1;
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 
-       xpad->odata[0] = 0x08;
-       xpad->odata[1] = 0x00;
-       xpad->odata[2] = 0x0F;
-       xpad->odata[3] = 0xC0;
-       xpad->odata[4] = 0x00;
-       xpad->odata[5] = 0x00;
-       xpad->odata[6] = 0x00;
-       xpad->odata[7] = 0x00;
-       xpad->odata[8] = 0x00;
-       xpad->odata[9] = 0x00;
-       xpad->odata[10] = 0x00;
-       xpad->odata[11] = 0x00;
-       xpad->irq_out->transfer_buffer_length = 12;
+       return retval;
+}
+
+static int xpad_start_xbox_one(struct usb_xpad *xpad)
+{
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_CMD_IDX];
+       unsigned long flags;
+       int retval;
 
-       retval = usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
-       mutex_unlock(&xpad->odata_mutex);
+       /* Xbox one controller needs to be initialized. */
+       packet->data[0] = 0x05;
+       packet->data[1] = 0x20;
+       packet->data[2] = xpad->odata_serial++; /* packet serial */
+       packet->data[3] = 0x01; /* rumble bit enable?  */
+       packet->data[4] = 0x00;
+       packet->len = 5;
+       packet->pending = true;
+
+       /* Reset the sequence so we send out start packet first */
+       xpad->last_out_packet = -1;
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 
        return retval;
 }
@@ -799,8 +957,11 @@ static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
 static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect)
 {
        struct usb_xpad *xpad = input_get_drvdata(dev);
+       struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX];
        __u16 strong;
        __u16 weak;
+       int retval;
+       unsigned long flags;
 
        if (effect->type != FF_RUMBLE)
                return 0;
@@ -808,69 +969,81 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
        strong = effect->u.rumble.strong_magnitude;
        weak = effect->u.rumble.weak_magnitude;
 
+       spin_lock_irqsave(&xpad->odata_lock, flags);
+
        switch (xpad->xtype) {
        case XTYPE_XBOX:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x06;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = strong / 256;  /* left actuator */
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = weak / 256;    /* right actuator */
-               xpad->irq_out->transfer_buffer_length = 6;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x06;
+               packet->data[2] = 0x00;
+               packet->data[3] = strong / 256; /* left actuator */
+               packet->data[4] = 0x00;
+               packet->data[5] = weak / 256;   /* right actuator */
+               packet->len = 6;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOX360:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x08;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = strong / 256;  /* left actuator? */
-               xpad->odata[4] = weak / 256;    /* right actuator? */
-               xpad->odata[5] = 0x00;
-               xpad->odata[6] = 0x00;
-               xpad->odata[7] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 8;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x08;
+               packet->data[2] = 0x00;
+               packet->data[3] = strong / 256;  /* left actuator? */
+               packet->data[4] = weak / 256;   /* right actuator? */
+               packet->data[5] = 0x00;
+               packet->data[6] = 0x00;
+               packet->data[7] = 0x00;
+               packet->len = 8;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOX360W:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x01;
-               xpad->odata[2] = 0x0F;
-               xpad->odata[3] = 0xC0;
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = strong / 256;
-               xpad->odata[6] = weak / 256;
-               xpad->odata[7] = 0x00;
-               xpad->odata[8] = 0x00;
-               xpad->odata[9] = 0x00;
-               xpad->odata[10] = 0x00;
-               xpad->odata[11] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x01;
+               packet->data[2] = 0x0F;
+               packet->data[3] = 0xC0;
+               packet->data[4] = 0x00;
+               packet->data[5] = strong / 256;
+               packet->data[6] = weak / 256;
+               packet->data[7] = 0x00;
+               packet->data[8] = 0x00;
+               packet->data[9] = 0x00;
+               packet->data[10] = 0x00;
+               packet->data[11] = 0x00;
+               packet->len = 12;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOXONE:
-               xpad->odata[0] = 0x09; /* activate rumble */
-               xpad->odata[1] = 0x08;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = 0x08; /* continuous effect */
-               xpad->odata[4] = 0x00; /* simple rumble mode */
-               xpad->odata[5] = 0x03; /* L and R actuator only */
-               xpad->odata[6] = 0x00; /* TODO: LT actuator */
-               xpad->odata[7] = 0x00; /* TODO: RT actuator */
-               xpad->odata[8] = strong / 256;  /* left actuator */
-               xpad->odata[9] = weak / 256;    /* right actuator */
-               xpad->odata[10] = 0x80; /* length of pulse */
-               xpad->odata[11] = 0x00; /* stop period of pulse */
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x09; /* activate rumble */
+               packet->data[1] = 0x08;
+               packet->data[2] = xpad->odata_serial++;
+               packet->data[3] = 0x08; /* continuous effect */
+               packet->data[4] = 0x00; /* simple rumble mode */
+               packet->data[5] = 0x03; /* L and R actuator only */
+               packet->data[6] = 0x00; /* TODO: LT actuator */
+               packet->data[7] = 0x00; /* TODO: RT actuator */
+               packet->data[8] = strong / 512; /* left actuator */
+               packet->data[9] = weak / 512;   /* right actuator */
+               packet->data[10] = 0x80;        /* length of pulse */
+               packet->data[11] = 0x00;        /* stop period of pulse */
+               packet->data[12] = 0x00;
+               packet->len = 13;
+               packet->pending = true;
                break;
 
        default:
                dev_dbg(&xpad->dev->dev,
                        "%s - rumble command sent to unsupported xpad type: %d\n",
                        __func__, xpad->xtype);
-               return -EINVAL;
+               retval = -EINVAL;
+               goto out;
        }
 
-       return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+out:
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
+       return retval;
 }
 
 static int xpad_init_ff(struct usb_xpad *xpad)
@@ -921,36 +1094,44 @@ struct xpad_led {
  */
 static void xpad_send_led_command(struct usb_xpad *xpad, int command)
 {
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_LED_IDX];
+       unsigned long flags;
+
        command %= 16;
 
-       mutex_lock(&xpad->odata_mutex);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
        switch (xpad->xtype) {
        case XTYPE_XBOX360:
-               xpad->odata[0] = 0x01;
-               xpad->odata[1] = 0x03;
-               xpad->odata[2] = command;
-               xpad->irq_out->transfer_buffer_length = 3;
+               packet->data[0] = 0x01;
+               packet->data[1] = 0x03;
+               packet->data[2] = command;
+               packet->len = 3;
+               packet->pending = true;
                break;
+
        case XTYPE_XBOX360W:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x00;
-               xpad->odata[2] = 0x08;
-               xpad->odata[3] = 0x40 + command;
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = 0x00;
-               xpad->odata[6] = 0x00;
-               xpad->odata[7] = 0x00;
-               xpad->odata[8] = 0x00;
-               xpad->odata[9] = 0x00;
-               xpad->odata[10] = 0x00;
-               xpad->odata[11] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x00;
+               packet->data[2] = 0x08;
+               packet->data[3] = 0x40 + command;
+               packet->data[4] = 0x00;
+               packet->data[5] = 0x00;
+               packet->data[6] = 0x00;
+               packet->data[7] = 0x00;
+               packet->data[8] = 0x00;
+               packet->data[9] = 0x00;
+               packet->data[10] = 0x00;
+               packet->data[11] = 0x00;
+               packet->len = 12;
+               packet->pending = true;
                break;
        }
 
-       usb_submit_urb(xpad->irq_out, GFP_KERNEL);
-       mutex_unlock(&xpad->odata_mutex);
+       xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 }
 
 /*
@@ -959,7 +1140,7 @@ static void xpad_send_led_command(struct usb_xpad *xpad, int command)
  */
 static void xpad_identify_controller(struct usb_xpad *xpad)
 {
-       xpad_send_led_command(xpad, (xpad->pad_nr % 4) + 2);
+       led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2);
 }
 
 static void xpad_led_set(struct led_classdev *led_cdev,
@@ -1001,14 +1182,7 @@ static int xpad_led_probe(struct usb_xpad *xpad)
        if (error)
                goto err_free_id;
 
-       if (xpad->xtype == XTYPE_XBOX360) {
-               /*
-                * Light up the segment corresponding to controller
-                * number on wired devices. On wireless we'll do that
-                * when they respond to "presence" packet.
-                */
-               xpad_identify_controller(xpad);
-       }
+       xpad_identify_controller(xpad);
 
        return 0;
 
@@ -1033,40 +1207,75 @@ static void xpad_led_disconnect(struct usb_xpad *xpad)
 #else
 static int xpad_led_probe(struct usb_xpad *xpad) { return 0; }
 static void xpad_led_disconnect(struct usb_xpad *xpad) { }
-static void xpad_identify_controller(struct usb_xpad *xpad) { }
 #endif
 
-static int xpad_open(struct input_dev *dev)
+static int xpad_start_input(struct usb_xpad *xpad)
 {
-       struct usb_xpad *xpad = input_get_drvdata(dev);
-
-       /* URB was submitted in probe */
-       if (xpad->xtype == XTYPE_XBOX360W)
-               return 0;
+       int error;
 
-       xpad->irq_in->dev = xpad->udev;
        if (usb_submit_urb(xpad->irq_in, GFP_KERNEL))
                return -EIO;
 
        if (xpad->xtype == XTYPE_XBOXONE) {
-               /* Xbox one controller needs to be initialized. */
-               xpad->odata[0] = 0x05;
-               xpad->odata[1] = 0x20;
-               xpad->irq_out->transfer_buffer_length = 2;
-               return usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+               error = xpad_start_xbox_one(xpad);
+               if (error) {
+                       usb_kill_urb(xpad->irq_in);
+                       return error;
+               }
        }
 
        return 0;
 }
 
-static void xpad_close(struct input_dev *dev)
+static void xpad_stop_input(struct usb_xpad *xpad)
 {
-       struct usb_xpad *xpad = input_get_drvdata(dev);
+       usb_kill_urb(xpad->irq_in);
+}
+
+static int xpad360w_start_input(struct usb_xpad *xpad)
+{
+       int error;
 
-       if (xpad->xtype != XTYPE_XBOX360W)
+       error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+       if (error)
+               return -EIO;
+
+       /*
+        * Send presence packet.
+        * This will force the controller to resend connection packets.
+        * This is useful in the case we activate the module after the
+        * adapter has been plugged in, as it won't automatically
+        * send us info about the controllers.
+        */
+       error = xpad_inquiry_pad_presence(xpad);
+       if (error) {
                usb_kill_urb(xpad->irq_in);
+               return error;
+       }
 
-       xpad_stop_output(xpad);
+       return 0;
+}
+
+static void xpad360w_stop_input(struct usb_xpad *xpad)
+{
+       usb_kill_urb(xpad->irq_in);
+
+       /* Make sure we are done with presence work if it was scheduled */
+       flush_work(&xpad->work);
+}
+
+static int xpad_open(struct input_dev *dev)
+{
+       struct usb_xpad *xpad = input_get_drvdata(dev);
+
+       return xpad_start_input(xpad);
+}
+
+static void xpad_close(struct input_dev *dev)
+{
+       struct usb_xpad *xpad = input_get_drvdata(dev);
+
+       xpad_stop_input(xpad);
 }
 
 static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
@@ -1097,8 +1306,11 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
 
 static void xpad_deinit_input(struct usb_xpad *xpad)
 {
-       xpad_led_disconnect(xpad);
-       input_unregister_device(xpad->dev);
+       if (xpad->input_created) {
+               xpad->input_created = false;
+               xpad_led_disconnect(xpad);
+               input_unregister_device(xpad->dev);
+       }
 }
 
 static int xpad_init_input(struct usb_xpad *xpad)
@@ -1118,8 +1330,10 @@ static int xpad_init_input(struct usb_xpad *xpad)
 
        input_set_drvdata(input_dev, xpad);
 
-       input_dev->open = xpad_open;
-       input_dev->close = xpad_close;
+       if (xpad->xtype != XTYPE_XBOX360W) {
+               input_dev->open = xpad_open;
+               input_dev->close = xpad_close;
+       }
 
        __set_bit(EV_KEY, input_dev->evbit);
 
@@ -1181,6 +1395,7 @@ static int xpad_init_input(struct usb_xpad *xpad)
        if (error)
                goto err_disconnect_led;
 
+       xpad->input_created = true;
        return 0;
 
 err_disconnect_led:
@@ -1241,6 +1456,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
        xpad->mapping = xpad_device[i].mapping;
        xpad->xtype = xpad_device[i].xtype;
        xpad->name = xpad_device[i].name;
+       INIT_WORK(&xpad->work, xpad_presence_work);
 
        if (xpad->xtype == XTYPE_UNKNOWN) {
                if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
@@ -1277,10 +1493,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 
        usb_set_intfdata(intf, xpad);
 
-       error = xpad_init_input(xpad);
-       if (error)
-               goto err_deinit_output;
-
        if (xpad->xtype == XTYPE_XBOX360W) {
                /*
                 * Submit the int URB immediately rather than waiting for open
@@ -1289,28 +1501,24 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
                 * exactly the message that a controller has arrived that
                 * we're waiting for.
                 */
-               xpad->irq_in->dev = xpad->udev;
-               error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+               error = xpad360w_start_input(xpad);
                if (error)
-                       goto err_deinit_input;
-
+                       goto err_deinit_output;
                /*
-                * Send presence packet.
-                * This will force the controller to resend connection packets.
-                * This is useful in the case we activate the module after the
-                * adapter has been plugged in, as it won't automatically
-                * send us info about the controllers.
+                * Wireless controllers require RESET_RESUME to work properly
+                * after suspend. Ideally this quirk should be in usb core
+                * quirk list, but we have too many vendors producing these
+                * controllers and we'd need to maintain 2 identical lists
+                * here in this driver and in usb core.
                 */
-               error = xpad_inquiry_pad_presence(xpad);
+               udev->quirks |= USB_QUIRK_RESET_RESUME;
+       } else {
+               error = xpad_init_input(xpad);
                if (error)
-                       goto err_kill_in_urb;
+                       goto err_deinit_output;
        }
        return 0;
 
-err_kill_in_urb:
-       usb_kill_urb(xpad->irq_in);
-err_deinit_input:
-       xpad_deinit_input(xpad);
 err_deinit_output:
        xpad_deinit_output(xpad);
 err_free_in_urb:
@@ -1320,19 +1528,24 @@ err_free_idata:
 err_free_mem:
        kfree(xpad);
        return error;
-
 }
 
 static void xpad_disconnect(struct usb_interface *intf)
 {
-       struct usb_xpad *xpad = usb_get_intfdata (intf);
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+
+       if (xpad->xtype == XTYPE_XBOX360W)
+               xpad360w_stop_input(xpad);
 
        xpad_deinit_input(xpad);
-       xpad_deinit_output(xpad);
 
-       if (xpad->xtype == XTYPE_XBOX360W) {
-               usb_kill_urb(xpad->irq_in);
-       }
+       /*
+        * Now that both input device and LED device are gone we can
+        * stop output URB.
+        */
+       xpad_stop_output(xpad);
+
+       xpad_deinit_output(xpad);
 
        usb_free_urb(xpad->irq_in);
        usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
@@ -1343,10 +1556,55 @@ static void xpad_disconnect(struct usb_interface *intf)
        usb_set_intfdata(intf, NULL);
 }
 
+static int xpad_suspend(struct usb_interface *intf, pm_message_t message)
+{
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+       struct input_dev *input = xpad->dev;
+
+       if (xpad->xtype == XTYPE_XBOX360W) {
+               /*
+                * Wireless controllers always listen to input so
+                * they are notified when controller shows up
+                * or goes away.
+                */
+               xpad360w_stop_input(xpad);
+       } else {
+               mutex_lock(&input->mutex);
+               if (input->users)
+                       xpad_stop_input(xpad);
+               mutex_unlock(&input->mutex);
+       }
+
+       xpad_stop_output(xpad);
+
+       return 0;
+}
+
+static int xpad_resume(struct usb_interface *intf)
+{
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+       struct input_dev *input = xpad->dev;
+       int retval = 0;
+
+       if (xpad->xtype == XTYPE_XBOX360W) {
+               retval = xpad360w_start_input(xpad);
+       } else {
+               mutex_lock(&input->mutex);
+               if (input->users)
+                       retval = xpad_start_input(xpad);
+               mutex_unlock(&input->mutex);
+       }
+
+       return retval;
+}
+
 static struct usb_driver xpad_driver = {
        .name           = "xpad",
        .probe          = xpad_probe,
        .disconnect     = xpad_disconnect,
+       .suspend        = xpad_suspend,
+       .resume         = xpad_resume,
+       .reset_resume   = xpad_resume,
        .id_table       = xpad_table,
 };
 
index 4d446d5085aad9b110ca7ecb05a3167f859ef12c..c01a1d648f9f087df57aafedf7eb3e56b4df2b8e 100644 (file)
@@ -235,7 +235,7 @@ struct adp5589_kpad {
        unsigned short gpimapsize;
        unsigned extend_cfg;
        bool is_adp5585;
-       bool adp5585_support_row5;
+       bool support_row5;
 #ifdef CONFIG_GPIOLIB
        unsigned char gpiomap[ADP5589_MAXGPIO];
        bool export_gpio;
@@ -485,7 +485,7 @@ static int adp5589_build_gpiomap(struct adp5589_kpad *kpad,
        if (kpad->extend_cfg & C4_EXTEND_CFG)
                pin_used[kpad->var->c4_extend_cfg] = true;
 
-       if (!kpad->adp5585_support_row5)
+       if (!kpad->support_row5)
                pin_used[5] = true;
 
        for (i = 0; i < kpad->var->maxgpio; i++)
@@ -884,12 +884,13 @@ static int adp5589_probe(struct i2c_client *client,
 
        switch (id->driver_data) {
        case ADP5585_02:
-               kpad->adp5585_support_row5 = true;
+               kpad->support_row5 = true;
        case ADP5585_01:
                kpad->is_adp5585 = true;
                kpad->var = &const_adp5585;
                break;
        case ADP5589:
+               kpad->support_row5 = true;
                kpad->var = &const_adp5589;
                break;
        }
index 378db10001df5067adcf4fe5bfc2b38e42098574..4401be225d64b28b8ffa798ba71cce07c8a81282 100644 (file)
@@ -304,8 +304,10 @@ static int cap11xx_init_leds(struct device *dev,
                led->cdev.brightness = LED_OFF;
 
                error = of_property_read_u32(child, "reg", &reg);
-               if (error != 0 || reg >= num_leds)
+               if (error != 0 || reg >= num_leds) {
+                       of_node_put(child);
                        return -EINVAL;
+               }
 
                led->reg = reg;
                led->priv = priv;
@@ -313,8 +315,10 @@ static int cap11xx_init_leds(struct device *dev,
                INIT_WORK(&led->work, cap11xx_led_work);
 
                error = devm_led_classdev_register(dev, &led->cdev);
-               if (error)
+               if (error) {
+                       of_node_put(child);
                        return error;
+               }
 
                priv->num_leds++;
                led++;
index b9f01bd1b7ef1b76a7b4c0d46f71fc3e4ace1818..29093657f2ef8233c667aace8406790a7d859f1d 100644 (file)
@@ -630,7 +630,7 @@ gpio_keys_get_devtree_pdata(struct device *dev)
        if (!node)
                return ERR_PTR(-ENODEV);
 
-       nbuttons = of_get_child_count(node);
+       nbuttons = of_get_available_child_count(node);
        if (nbuttons == 0)
                return ERR_PTR(-ENODEV);
 
@@ -645,8 +645,10 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 
        pdata->rep = !!of_get_property(node, "autorepeat", NULL);
 
+       of_property_read_string(node, "label", &pdata->name);
+
        i = 0;
-       for_each_child_of_node(node, pp) {
+       for_each_available_child_of_node(node, pp) {
                enum of_gpio_flags flags;
 
                button = &pdata->buttons[i++];
index d6d16fa782815481e04609771b09b32e862ce679..1f2337abcf2f333de7b10cc449aaad56aadd5369 100644 (file)
@@ -733,7 +733,7 @@ config INPUT_XEN_KBDDEV_FRONTEND
          module will be called xen-kbdfront.
 
 config INPUT_SIRFSOC_ONKEY
-       bool "CSR SiRFSoC power on/off/suspend key support"
+       tristate "CSR SiRFSoC power on/off/suspend key support"
        depends on ARCH_SIRF && OF
        default y
        help
index 9d5b89befe6fb059e593c6fe1e6265f0b3819199..ed7237f1953966c378c3822faa9a40f7c6a044dd 100644 (file)
@@ -101,7 +101,7 @@ static void sirfsoc_pwrc_close(struct input_dev *input)
 static const struct of_device_id sirfsoc_pwrc_of_match[] = {
        { .compatible = "sirf,prima2-pwrc" },
        {},
-}
+};
 MODULE_DEVICE_TABLE(of, sirfsoc_pwrc_of_match);
 
 static int sirfsoc_pwrc_probe(struct platform_device *pdev)
index e272f06258cefb3c2119058298ad10f2cb77b7ce..a3f0f5a47490e936e31b45594861d692503429b1 100644 (file)
@@ -458,8 +458,6 @@ int vmmouse_init(struct psmouse *psmouse)
        priv->abs_dev = abs_dev;
        psmouse->private = priv;
 
-       input_set_capability(rel_dev, EV_REL, REL_WHEEL);
-
        /* Set up and register absolute device */
        snprintf(priv->phys, sizeof(priv->phys), "%s/input1",
                 psmouse->ps2dev.serio->phys);
@@ -475,10 +473,6 @@ int vmmouse_init(struct psmouse *psmouse)
        abs_dev->id.version = psmouse->model;
        abs_dev->dev.parent = &psmouse->ps2dev.serio->dev;
 
-       error = input_register_device(priv->abs_dev);
-       if (error)
-               goto init_fail;
-
        /* Set absolute device capabilities */
        input_set_capability(abs_dev, EV_KEY, BTN_LEFT);
        input_set_capability(abs_dev, EV_KEY, BTN_RIGHT);
@@ -488,6 +482,13 @@ int vmmouse_init(struct psmouse *psmouse)
        input_set_abs_params(abs_dev, ABS_X, 0, VMMOUSE_MAX_X, 0, 0);
        input_set_abs_params(abs_dev, ABS_Y, 0, VMMOUSE_MAX_Y, 0, 0);
 
+       error = input_register_device(priv->abs_dev);
+       if (error)
+               goto init_fail;
+
+       /* Add wheel capability to the relative device */
+       input_set_capability(rel_dev, EV_REL, REL_WHEEL);
+
        psmouse->protocol_handler = vmmouse_process_byte;
        psmouse->disconnect = vmmouse_disconnect;
        psmouse->reconnect = vmmouse_reconnect;
index 8f828975ab10b03746e700dd26dce1cb03d2c17e..1ca7f551e2dabe73896f780ec53e4c6735d13019 100644 (file)
@@ -134,7 +134,7 @@ static void serio_find_driver(struct serio *serio)
        int error;
 
        error = device_attach(&serio->dev);
-       if (error < 0)
+       if (error < 0 && error != -EPROBE_DEFER)
                dev_warn(&serio->dev,
                         "device_attach() failed for %s (%s), error: %d\n",
                         serio->phys, serio->name, error);
index 2d5794ec338b72a1cea0367bccdaf9654db025d4..2160512e861af57d289fb5873ef80b81c2fbf7c9 100644 (file)
@@ -113,8 +113,8 @@ struct t7_config {
 #define MXT_T9_DETECT          (1 << 7)
 
 struct t9_range {
-       u16 x;
-       u16 y;
+       __le16 x;
+       __le16 y;
 } __packed;
 
 /* MXT_TOUCH_MULTI_T9 orient */
@@ -216,6 +216,7 @@ struct mxt_data {
        unsigned int irq;
        unsigned int max_x;
        unsigned int max_y;
+       bool xy_switch;
        bool in_bootloader;
        u16 mem_size;
        u8 t100_aux_ampl;
@@ -1665,8 +1666,8 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
        if (error)
                return error;
 
-       le16_to_cpus(&range.x);
-       le16_to_cpus(&range.y);
+       data->max_x = get_unaligned_le16(&range.x);
+       data->max_y = get_unaligned_le16(&range.y);
 
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T9_ORIENT,
@@ -1674,23 +1675,7 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
        if (error)
                return error;
 
-       /* Handle default values */
-       if (range.x == 0)
-               range.x = 1023;
-
-       if (range.y == 0)
-               range.y = 1023;
-
-       if (orient & MXT_T9_ORIENT_SWITCH) {
-               data->max_x = range.y;
-               data->max_y = range.x;
-       } else {
-               data->max_x = range.x;
-               data->max_y = range.y;
-       }
-
-       dev_dbg(&client->dev,
-               "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+       data->xy_switch = orient & MXT_T9_ORIENT_SWITCH;
 
        return 0;
 }
@@ -1708,13 +1693,14 @@ static int mxt_read_t100_config(struct mxt_data *data)
        if (!object)
                return -EINVAL;
 
+       /* read touchscreen dimensions */
        error = __mxt_read_reg(client,
                               object->start_address + MXT_T100_XRANGE,
                               sizeof(range_x), &range_x);
        if (error)
                return error;
 
-       le16_to_cpus(&range_x);
+       data->max_x = get_unaligned_le16(&range_x);
 
        error = __mxt_read_reg(client,
                               object->start_address + MXT_T100_YRANGE,
@@ -1722,36 +1708,24 @@ static int mxt_read_t100_config(struct mxt_data *data)
        if (error)
                return error;
 
-       le16_to_cpus(&range_y);
+       data->max_y = get_unaligned_le16(&range_y);
 
+       /* read orientation config */
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T100_CFG1,
                                1, &cfg);
        if (error)
                return error;
 
+       data->xy_switch = cfg & MXT_T100_CFG_SWITCHXY;
+
+       /* allocate aux bytes */
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T100_TCHAUX,
                                1, &tchaux);
        if (error)
                return error;
 
-       /* Handle default values */
-       if (range_x == 0)
-               range_x = 1023;
-
-       if (range_y == 0)
-               range_y = 1023;
-
-       if (cfg & MXT_T100_CFG_SWITCHXY) {
-               data->max_x = range_y;
-               data->max_y = range_x;
-       } else {
-               data->max_x = range_x;
-               data->max_y = range_y;
-       }
-
-       /* allocate aux bytes */
        aux = 6;
 
        if (tchaux & MXT_T100_TCHAUX_VECT)
@@ -1767,9 +1741,6 @@ static int mxt_read_t100_config(struct mxt_data *data)
                "T100 aux mappings vect:%u ampl:%u area:%u\n",
                data->t100_aux_vect, data->t100_aux_ampl, data->t100_aux_area);
 
-       dev_info(&client->dev,
-                "T100 Touchscreen size X%uY%u\n", data->max_x, data->max_y);
-
        return 0;
 }
 
@@ -1828,6 +1799,19 @@ static int mxt_initialize_input_device(struct mxt_data *data)
                return -EINVAL;
        }
 
+       /* Handle default values and orientation switch */
+       if (data->max_x == 0)
+               data->max_x = 1023;
+
+       if (data->max_y == 0)
+               data->max_y = 1023;
+
+       if (data->xy_switch)
+               swap(data->max_x, data->max_y);
+
+       dev_info(dev, "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+       /* Register input device */
        input_dev = input_allocate_device();
        if (!input_dev) {
                dev_err(dev, "Failed to allocate memory\n");
index 5d4903a402cc6a5f183010ded6e7af17c1136a49..69828d015d45ffa4747368e79f6fa5f47108c0c9 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
index 0b0f8c17f3f7e0f4df1e69144d693b46741f4025..23fbe382da8b420791a1fec28962454132d4e7ef 100644 (file)
@@ -822,16 +822,22 @@ static void edt_ft5x06_ts_get_defaults(struct device *dev,
        int error;
 
        error = device_property_read_u32(dev, "threshold", &val);
-       if (!error)
-               reg_addr->reg_threshold = val;
+       if (!error) {
+               edt_ft5x06_register_write(tsdata, reg_addr->reg_threshold, val);
+               tsdata->threshold = val;
+       }
 
        error = device_property_read_u32(dev, "gain", &val);
-       if (!error)
-               reg_addr->reg_gain = val;
+       if (!error) {
+               edt_ft5x06_register_write(tsdata, reg_addr->reg_gain, val);
+               tsdata->gain = val;
+       }
 
        error = device_property_read_u32(dev, "offset", &val);
-       if (!error)
-               reg_addr->reg_offset = val;
+       if (!error) {
+               edt_ft5x06_register_write(tsdata, reg_addr->reg_offset, val);
+               tsdata->offset = val;
+       }
 }
 
 static void
index 539b0dea8034b5d5b61e93f5927b353448197c67..e5e223938eecc9f013e33977b1de50bbb3cfae3d 100644 (file)
@@ -2049,7 +2049,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
        /* Update device table */
        set_dte_entry(dev_data->devid, domain, ats);
        if (alias != dev_data->devid)
-               set_dte_entry(dev_data->devid, domain, ats);
+               set_dte_entry(alias, domain, ats);
 
        device_flush_dte(dev_data);
 }
index 62a400c5ba0614fde00d18ad2207b430d99433ee..fb092f3f11cb3e32ec8462925587e9778f862a25 100644 (file)
@@ -1353,7 +1353,7 @@ void dmar_disable_qi(struct intel_iommu *iommu)
 
        raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
-       sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
+       sts =  readl(iommu->reg + DMAR_GSTS_REG);
        if (!(sts & DMA_GSTS_QIES))
                goto end;
 
index ac7387686ddc7b2a7c7757f2cb3fbd003c8a23af..986a53e3eb96b4bb56faedfdb36b4bd658aacb99 100644 (file)
@@ -1489,7 +1489,7 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
        struct pci_dev *pdev;
 
-       if (dev_is_pci(info->dev))
+       if (!dev_is_pci(info->dev))
                return;
 
        pdev = to_pci_dev(info->dev);
index 50464833d0b84732a4d397cee8b73ed43a721712..d9939fa9b58887567d1fe8a7a94c7e5f1452cdf0 100644 (file)
@@ -249,12 +249,30 @@ static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *s
 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 {
        struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+       struct intel_svm_dev *sdev;
 
+       /* This might end up being called from exit_mmap(), *before* the page
+        * tables are cleared. And __mmu_notifier_release() will delete us from
+        * the list of notifiers so that our invalidate_range() callback doesn't
+        * get called when the page tables are cleared. So we need to protect
+        * against hardware accessing those page tables.
+        *
+        * We do it by clearing the entry in the PASID table and then flushing
+        * the IOTLB and the PASID table caches. This might upset hardware;
+        * perhaps we'll want to point the PASID to a dummy PGD (like the zero
+        * page) so that we end up taking a fault that the hardware really
+        * *has* to handle gracefully without affecting other processes.
+        */
        svm->iommu->pasid_table[svm->pasid].val = 0;
+       wmb();
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(sdev, &svm->devs, list) {
+               intel_flush_pasid_dev(svm, sdev, svm->pasid);
+               intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+       }
+       rcu_read_unlock();
 
-       /* There's no need to do any flush because we can't get here if there
-        * are any devices left anyway. */
-       WARN_ON(!list_empty(&svm->devs));
 }
 
 static const struct mmu_notifier_ops intel_mmuops = {
@@ -379,7 +397,6 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
                                goto out;
                        }
                        iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
-                       mm = NULL;
                } else
                        iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
                wmb();
@@ -442,11 +459,11 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
                                kfree_rcu(sdev, rcu);
 
                                if (list_empty(&svm->devs)) {
-                                       mmu_notifier_unregister(&svm->notifier, svm->mm);
 
                                        idr_remove(&svm->iommu->pasid_idr, svm->pasid);
                                        if (svm->mm)
-                                               mmput(svm->mm);
+                                               mmu_notifier_unregister(&svm->notifier, svm->mm);
+
                                        /* We mandate that no page faults may be outstanding
                                         * for the PASID when intel_svm_unbind_mm() is called.
                                         * If that is not obeyed, subtle errors will happen.
@@ -507,6 +524,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
        struct intel_svm *svm = NULL;
        int head, tail, handled = 0;
 
+       /* Clear PPR bit before reading head/tail registers, to
+        * ensure that we get a new interrupt if needed. */
+       writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
+
        tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
        head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
        while (head != tail) {
@@ -551,6 +572,9 @@ static irqreturn_t prq_event_thread(int irq, void *d)
                 * any faults on kernel addresses. */
                if (!svm->mm)
                        goto bad_req;
+               /* If the mm is already defunct, don't handle faults. */
+               if (!atomic_inc_not_zero(&svm->mm->mm_users))
+                       goto bad_req;
                down_read(&svm->mm->mmap_sem);
                vma = find_extend_vma(svm->mm, address);
                if (!vma || address < vma->vm_start)
@@ -567,6 +591,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
                result = QI_RESP_SUCCESS;
        invalid:
                up_read(&svm->mm->mmap_sem);
+               mmput(svm->mm);
        bad_req:
                /* Accounting for major/minor faults? */
                rcu_read_lock();
index c12ba4516df25b7201731b44ef4a175c78f2d0e0..ac596928f6b40af32e9c44ecf388ebf7ed4b10ed 100644 (file)
@@ -629,7 +629,7 @@ static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
 
        raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
-       sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
+       sts = readl(iommu->reg + DMAR_GSTS_REG);
        if (!(sts & DMA_GSTS_IRES))
                goto end;
 
index 8bbcbfe7695cf82aabda359b7a9036527e4951b3..381ca5a37a7b7de9b338236b28202c6c46f306df 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/barrier.h>
 
index 11fc2a27fa2ea90da2b65ef282a5db139c594c71..fb50911b3940c0bd7234a1312e320ccff020f492 100644 (file)
@@ -130,6 +130,11 @@ config ORION_IRQCHIP
        select IRQ_DOMAIN
        select MULTI_IRQ_HANDLER
 
+config PIC32_EVIC
+       bool
+       select GENERIC_IRQ_CHIP
+       select IRQ_DOMAIN
+
 config RENESAS_INTC_IRQPIN
        bool
        select IRQ_DOMAIN
@@ -154,6 +159,7 @@ config TB10X_IRQC
 config TS4800_IRQ
        tristate "TS-4800 IRQ controller"
        select IRQ_DOMAIN
+       depends on HAS_IOMEM
        help
          Support for the TS-4800 FPGA IRQ controller
 
index d4c2e4ebc30809fcb21b9f705f416e9c8dbb7ee3..18caacb60d581e953407ac6cae347c97bf7c4ce7 100644 (file)
@@ -58,3 +58,4 @@ obj-$(CONFIG_RENESAS_H8S_INTC)                += irq-renesas-h8s.o
 obj-$(CONFIG_ARCH_SA1100)              += irq-sa11x0.o
 obj-$(CONFIG_INGENIC_IRQ)              += irq-ingenic.o
 obj-$(CONFIG_IMX_GPCV2)                        += irq-imx-gpcv2.o
+obj-$(CONFIG_PIC32_EVIC)               += irq-pic32-evic.o
index b12a5d58546f922b460d6aac34c073ff363ef1b8..37199b9b2cfa260659a98da3eb1a48053e7bddfb 100644 (file)
@@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val)
            priority > AT91_AIC_IRQ_MAX_PRIORITY)
                return -EINVAL;
 
-       *val &= AT91_AIC_PRIOR;
+       *val &= ~AT91_AIC_PRIOR;
        *val |= priority;
 
        return 0;
index e23d1d18f9d6a39731bdd34633e2bbc9bc473525..43dfd15c1dd22e4e18b1dbb37e1b3da811b0ceaf 100644 (file)
@@ -66,7 +66,10 @@ struct its_node {
        unsigned long           phys_base;
        struct its_cmd_block    *cmd_base;
        struct its_cmd_block    *cmd_write;
-       void                    *tables[GITS_BASER_NR_REGS];
+       struct {
+               void            *base;
+               u32             order;
+       } tables[GITS_BASER_NR_REGS];
        struct its_collection   *collections;
        struct list_head        its_device_list;
        u64                     flags;
@@ -75,6 +78,9 @@ struct its_node {
 
 #define ITS_ITT_ALIGN          SZ_256
 
+/* Convert page order to size in bytes */
+#define PAGE_ORDER_TO_SIZE(o)  (PAGE_SIZE << (o))
+
 struct event_lpi_map {
        unsigned long           *lpi_map;
        u16                     *col_map;
@@ -597,11 +603,6 @@ static void its_unmask_irq(struct irq_data *d)
        lpi_set_config(d, true);
 }
 
-static void its_eoi_irq(struct irq_data *d)
-{
-       gic_write_eoir(d->hwirq);
-}
-
 static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
                            bool force)
 {
@@ -638,7 +639,7 @@ static struct irq_chip its_irq_chip = {
        .name                   = "ITS",
        .irq_mask               = its_mask_irq,
        .irq_unmask             = its_unmask_irq,
-       .irq_eoi                = its_eoi_irq,
+       .irq_eoi                = irq_chip_eoi_parent,
        .irq_set_affinity       = its_set_affinity,
        .irq_compose_msi_msg    = its_irq_compose_msi_msg,
 };
@@ -807,9 +808,10 @@ static void its_free_tables(struct its_node *its)
        int i;
 
        for (i = 0; i < GITS_BASER_NR_REGS; i++) {
-               if (its->tables[i]) {
-                       free_page((unsigned long)its->tables[i]);
-                       its->tables[i] = NULL;
+               if (its->tables[i].base) {
+                       free_pages((unsigned long)its->tables[i].base,
+                                  its->tables[i].order);
+                       its->tables[i].base = NULL;
                }
        }
 }
@@ -842,7 +844,6 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
                u64 type = GITS_BASER_TYPE(val);
                u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
                int order = get_order(psz);
-               int alloc_size;
                int alloc_pages;
                u64 tmp;
                void *base;
@@ -874,8 +875,8 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
                        }
                }
 
-               alloc_size = (1 << order) * PAGE_SIZE;
-               alloc_pages = (alloc_size / psz);
+retry_alloc_baser:
+               alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
                if (alloc_pages > GITS_BASER_PAGES_MAX) {
                        alloc_pages = GITS_BASER_PAGES_MAX;
                        order = get_order(GITS_BASER_PAGES_MAX * psz);
@@ -889,7 +890,8 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
                        goto out_free;
                }
 
-               its->tables[i] = base;
+               its->tables[i].base = base;
+               its->tables[i].order = order;
 
 retry_baser:
                val = (virt_to_phys(base)                                |
@@ -927,7 +929,7 @@ retry_baser:
                        shr = tmp & GITS_BASER_SHAREABILITY_MASK;
                        if (!shr) {
                                cache = GITS_BASER_nC;
-                               __flush_dcache_area(base, alloc_size);
+                               __flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
                        }
                        goto retry_baser;
                }
@@ -938,13 +940,16 @@ retry_baser:
                         * size and retry. If we reach 4K, then
                         * something is horribly wrong...
                         */
+                       free_pages((unsigned long)base, order);
+                       its->tables[i].base = NULL;
+
                        switch (psz) {
                        case SZ_16K:
                                psz = SZ_4K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        case SZ_64K:
                                psz = SZ_16K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        }
                }
 
@@ -957,7 +962,7 @@ retry_baser:
                }
 
                pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-                       (int)(alloc_size / entry_size),
+                       (int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
                        its_base_type_string[type],
                        (unsigned long)virt_to_phys(base),
                        psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
index 911758c056c14152d5a248110c14f35bd48e9b78..8f9ebf714e2bbc154d7a045d27920f6c95f7cce6 100644 (file)
@@ -384,9 +384,6 @@ static struct irq_chip gic_chip = {
        .irq_unmask             = gic_unmask_irq,
        .irq_eoi                = gic_eoi_irq,
        .irq_set_type           = gic_set_type,
-#ifdef CONFIG_SMP
-       .irq_set_affinity       = gic_set_affinity,
-#endif
        .irq_get_irqchip_state  = gic_irq_get_irqchip_state,
        .irq_set_irqchip_state  = gic_irq_set_irqchip_state,
        .flags                  = IRQCHIP_SET_TYPE_MASKED |
@@ -400,9 +397,6 @@ static struct irq_chip gic_eoimode1_chip = {
        .irq_unmask             = gic_unmask_irq,
        .irq_eoi                = gic_eoimode1_eoi_irq,
        .irq_set_type           = gic_set_type,
-#ifdef CONFIG_SMP
-       .irq_set_affinity       = gic_set_affinity,
-#endif
        .irq_get_irqchip_state  = gic_irq_get_irqchip_state,
        .irq_set_irqchip_state  = gic_irq_set_irqchip_state,
        .irq_set_vcpu_affinity  = gic_irq_set_vcpu_affinity,
@@ -443,7 +437,7 @@ static void gic_cpu_if_up(struct gic_chip_data *gic)
        u32 bypass = 0;
        u32 mode = 0;
 
-       if (static_key_true(&supports_deactivate))
+       if (gic == &gic_data[0] && static_key_true(&supports_deactivate))
                mode = GIC_CPU_CTRL_EOImodeNS;
 
        /*
@@ -1039,6 +1033,11 @@ static void __init __gic_init_bases(unsigned int gic_nr, int irq_start,
                gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d", gic_nr);
        }
 
+#ifdef CONFIG_SMP
+       if (gic_nr == 0)
+               gic->chip.irq_set_affinity = gic_set_affinity;
+#endif
+
 #ifdef CONFIG_GIC_NON_BANKED
        if (percpu_offset) { /* Frankein-GIC without banked registers... */
                unsigned int cpu;
index c22e2d40cb302452624c29efd44c483e577f5333..efe50845939d91fee149acb085912697031dd9a0 100644 (file)
@@ -241,6 +241,7 @@ static int __init asm9260_of_init(struct device_node *np,
                writel(0, icoll_priv.intr + i);
 
        icoll_add_domain(np, ASM9260_NUM_IRQS);
+       set_handle_irq(icoll_handle_irq);
 
        return 0;
 }
diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c
new file mode 100644 (file)
index 0000000..e7155db
--- /dev/null
@@ -0,0 +1,324 @@
+/*
+ * Cristian Birsan <cristian.birsan@microchip.com>
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2016 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irq.h>
+
+#include <asm/irq.h>
+#include <asm/traps.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define REG_INTCON     0x0000
+#define REG_INTSTAT    0x0020
+#define REG_IFS_OFFSET 0x0040
+#define REG_IEC_OFFSET 0x00C0
+#define REG_IPC_OFFSET 0x0140
+#define REG_OFF_OFFSET 0x0540
+
+#define MAJPRI_MASK    0x07
+#define SUBPRI_MASK    0x03
+#define PRIORITY_MASK  0x1F
+
+#define PIC32_INT_PRI(pri, subpri)                             \
+       ((((pri) & MAJPRI_MASK) << 2) | ((subpri) & SUBPRI_MASK))
+
+struct evic_chip_data {
+       u32 irq_types[NR_IRQS];
+       u32 ext_irqs[8];
+};
+
+static struct irq_domain *evic_irq_domain;
+static void __iomem *evic_base;
+
+asmlinkage void __weak plat_irq_dispatch(void)
+{
+       unsigned int irq, hwirq;
+
+       hwirq = readl(evic_base + REG_INTSTAT) & 0xFF;
+       irq = irq_linear_revmap(evic_irq_domain, hwirq);
+       do_IRQ(irq);
+}
+
+static struct evic_chip_data *irqd_to_priv(struct irq_data *data)
+{
+       return (struct evic_chip_data *)data->domain->host_data;
+}
+
+static int pic32_set_ext_polarity(int bit, u32 type)
+{
+       /*
+        * External interrupts can be either edge rising or edge falling,
+        * but not both.
+        */
+       switch (type) {
+       case IRQ_TYPE_EDGE_RISING:
+               writel(BIT(bit), evic_base + PIC32_SET(REG_INTCON));
+               break;
+       case IRQ_TYPE_EDGE_FALLING:
+               writel(BIT(bit), evic_base + PIC32_CLR(REG_INTCON));
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int pic32_set_type_edge(struct irq_data *data,
+                              unsigned int flow_type)
+{
+       struct evic_chip_data *priv = irqd_to_priv(data);
+       int ret;
+       int i;
+
+       if (!(flow_type & IRQ_TYPE_EDGE_BOTH))
+               return -EBADR;
+
+       /* set polarity for external interrupts only */
+       for (i = 0; i < ARRAY_SIZE(priv->ext_irqs); i++) {
+               if (priv->ext_irqs[i] == data->hwirq) {
+                       ret = pic32_set_ext_polarity(i + 1, flow_type);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       irqd_set_trigger_type(data, flow_type);
+
+       return IRQ_SET_MASK_OK;
+}
+
+static void pic32_bind_evic_interrupt(int irq, int set)
+{
+       writel(set, evic_base + REG_OFF_OFFSET + irq * 4);
+}
+
+static void pic32_set_irq_priority(int irq, int priority)
+{
+       u32 reg, shift;
+
+       reg = irq / 4;
+       shift = (irq % 4) * 8;
+
+       writel(PRIORITY_MASK << shift,
+               evic_base + PIC32_CLR(REG_IPC_OFFSET + reg * 0x10));
+       writel(priority << shift,
+               evic_base + PIC32_SET(REG_IPC_OFFSET + reg * 0x10));
+}
+
+#define IRQ_REG_MASK(_hwirq, _reg, _mask)                     \
+       do {                                                   \
+               _reg = _hwirq / 32;                            \
+               _mask = 1 << (_hwirq % 32);                    \
+       } while (0)
+
+static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq,
+                               irq_hw_number_t hw)
+{
+       struct evic_chip_data *priv = d->host_data;
+       struct irq_data *data;
+       int ret;
+       u32 iecclr, ifsclr;
+       u32 reg, mask;
+
+       ret = irq_map_generic_chip(d, virq, hw);
+       if (ret)
+               return ret;
+
+       /*
+        * Piggyback on xlate function to move to an alternate chip as necessary
+        * at time of mapping instead of allowing the flow handler/chip to be
+        * changed later. This requires all interrupts to be configured through
+        * DT.
+        */
+       if (priv->irq_types[hw] & IRQ_TYPE_SENSE_MASK) {
+               data = irq_domain_get_irq_data(d, virq);
+               irqd_set_trigger_type(data, priv->irq_types[hw]);
+               irq_setup_alt_chip(data, priv->irq_types[hw]);
+       }
+
+       IRQ_REG_MASK(hw, reg, mask);
+
+       iecclr = PIC32_CLR(REG_IEC_OFFSET + reg * 0x10);
+       ifsclr = PIC32_CLR(REG_IFS_OFFSET + reg * 0x10);
+
+       /* mask and clear flag */
+       writel(mask, evic_base + iecclr);
+       writel(mask, evic_base + ifsclr);
+
+       /* default priority is required */
+       pic32_set_irq_priority(hw, PIC32_INT_PRI(2, 0));
+
+       return ret;
+}
+
+int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+                          const u32 *intspec, unsigned int intsize,
+                          irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+       struct evic_chip_data *priv = d->host_data;
+
+       if (WARN_ON(intsize < 2))
+               return -EINVAL;
+
+       if (WARN_ON(intspec[0] >= NR_IRQS))
+               return -EINVAL;
+
+       *out_hwirq = intspec[0];
+       *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+       priv->irq_types[intspec[0]] = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+       return 0;
+}
+
+static const struct irq_domain_ops pic32_irq_domain_ops = {
+       .map    = pic32_irq_domain_map,
+       .xlate  = pic32_irq_domain_xlate,
+};
+
+static void __init pic32_ext_irq_of_init(struct irq_domain *domain)
+{
+       struct device_node *node = irq_domain_get_of_node(domain);
+       struct evic_chip_data *priv = domain->host_data;
+       struct property *prop;
+       const __le32 *p;
+       u32 hwirq;
+       int i = 0;
+       const char *pname = "microchip,external-irqs";
+
+       of_property_for_each_u32(node, pname, prop, p, hwirq) {
+               if (i >= ARRAY_SIZE(priv->ext_irqs)) {
+                       pr_warn("More than %d external irq, skip rest\n",
+                               ARRAY_SIZE(priv->ext_irqs));
+                       break;
+               }
+
+               priv->ext_irqs[i] = hwirq;
+               i++;
+       }
+}
+
+static int __init pic32_of_init(struct device_node *node,
+                               struct device_node *parent)
+{
+       struct irq_chip_generic *gc;
+       struct evic_chip_data *priv;
+       unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+       int nchips, ret;
+       int i;
+
+       nchips = DIV_ROUND_UP(NR_IRQS, 32);
+
+       evic_base = of_iomap(node, 0);
+       if (!evic_base)
+               return -ENOMEM;
+
+       priv = kcalloc(nchips, sizeof(*priv), GFP_KERNEL);
+       if (!priv) {
+               ret = -ENOMEM;
+               goto err_iounmap;
+       }
+
+       evic_irq_domain = irq_domain_add_linear(node, nchips * 32,
+                                               &pic32_irq_domain_ops,
+                                               priv);
+       if (!evic_irq_domain) {
+               ret = -ENOMEM;
+               goto err_free_priv;
+       }
+
+       /*
+        * The PIC32 EVIC has a linear list of irqs and the type of each
+        * irq is determined by the hardware peripheral the EVIC is arbitrating.
+        * These irq types are defined in the datasheet as "persistent" and
+        * "non-persistent" which are mapped here to level and edge
+        * respectively. To manage the different flow handler requirements of
+        * each irq type, different chip_types are used.
+        */
+       ret = irq_alloc_domain_generic_chips(evic_irq_domain, 32, 2,
+                                            "evic-level", handle_level_irq,
+                                            clr, 0, 0);
+       if (ret)
+               goto err_domain_remove;
+
+       board_bind_eic_interrupt = &pic32_bind_evic_interrupt;
+
+       for (i = 0; i < nchips; i++) {
+               u32 ifsclr = PIC32_CLR(REG_IFS_OFFSET + (i * 0x10));
+               u32 iec = REG_IEC_OFFSET + (i * 0x10);
+
+               gc = irq_get_domain_generic_chip(evic_irq_domain, i * 32);
+
+               gc->reg_base = evic_base;
+               gc->unused = 0;
+
+               /*
+                * Level/persistent interrupts have a special requirement that
+                * the condition generating the interrupt be cleared before the
+                * interrupt flag (ifs) can be cleared. chip.irq_eoi is used to
+                * complete the interrupt with an ack.
+                */
+               gc->chip_types[0].type                  = IRQ_TYPE_LEVEL_MASK;
+               gc->chip_types[0].handler               = handle_fasteoi_irq;
+               gc->chip_types[0].regs.ack              = ifsclr;
+               gc->chip_types[0].regs.mask             = iec;
+               gc->chip_types[0].chip.name             = "evic-level";
+               gc->chip_types[0].chip.irq_eoi          = irq_gc_ack_set_bit;
+               gc->chip_types[0].chip.irq_mask         = irq_gc_mask_clr_bit;
+               gc->chip_types[0].chip.irq_unmask       = irq_gc_mask_set_bit;
+               gc->chip_types[0].chip.flags            = IRQCHIP_SKIP_SET_WAKE;
+
+               /* Edge interrupts */
+               gc->chip_types[1].type                  = IRQ_TYPE_EDGE_BOTH;
+               gc->chip_types[1].handler               = handle_edge_irq;
+               gc->chip_types[1].regs.ack              = ifsclr;
+               gc->chip_types[1].regs.mask             = iec;
+               gc->chip_types[1].chip.name             = "evic-edge";
+               gc->chip_types[1].chip.irq_ack          = irq_gc_ack_set_bit;
+               gc->chip_types[1].chip.irq_mask         = irq_gc_mask_clr_bit;
+               gc->chip_types[1].chip.irq_unmask       = irq_gc_mask_set_bit;
+               gc->chip_types[1].chip.irq_set_type     = pic32_set_type_edge;
+               gc->chip_types[1].chip.flags            = IRQCHIP_SKIP_SET_WAKE;
+
+               gc->private = &priv[i];
+       }
+
+       irq_set_default_host(evic_irq_domain);
+
+       /*
+        * External interrupts have software configurable edge polarity. These
+        * interrupts are defined in DT allowing polarity to be configured only
+        * for these interrupts when requested.
+        */
+       pic32_ext_irq_of_init(evic_irq_domain);
+
+       return 0;
+
+err_domain_remove:
+       irq_domain_remove(evic_irq_domain);
+
+err_free_priv:
+       kfree(priv);
+
+err_iounmap:
+       iounmap(evic_base);
+
+       return ret;
+}
+
+IRQCHIP_DECLARE(pic32_evic, "microchip,pic32mzda-evic", pic32_of_init);
index c71914e8f596c3700d8c5a4da2448b32acf8c41f..5dc5a760c7236971a81dfcf70cd78542abadf8f4 100644 (file)
@@ -605,7 +605,7 @@ err:
        return ERR_PTR(ret);
 }
 
-static struct s3c_irq_data init_eint[32] = {
+static struct s3c_irq_data __maybe_unused init_eint[32] = {
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
index 0704362f4c824c6ba2b87e42353f0481bcff36e5..376b28074e0d8937c43036b647de218ef45c14b7 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/of_irq.h>
 
 #include <asm/exception.h>
-#include <asm/mach/irq.h>
 
 #define SUN4I_IRQ_VECTOR_REG           0x00
 #define SUN4I_IRQ_PROTECTION_REG       0x08
index 2a506fe0c8a4500a16e83b1aba70a6ed2d192185..d1f8ab915b15cc69b5fc9564777d413dad0f8318 100644 (file)
@@ -373,13 +373,7 @@ static void gigaset_freecshw(struct cardstate *cs)
 
 static void gigaset_device_release(struct device *dev)
 {
-       struct cardstate *cs = dev_get_drvdata(dev);
-
-       if (!cs)
-               return;
-       dev_set_drvdata(dev, NULL);
-       kfree(cs->hw.ser);
-       cs->hw.ser = NULL;
+       kfree(container_of(dev, struct ser_cardstate, dev.dev));
 }
 
 /*
@@ -408,7 +402,6 @@ static int gigaset_initcshw(struct cardstate *cs)
                cs->hw.ser = NULL;
                return rc;
        }
-       dev_set_drvdata(&cs->hw.ser->dev.dev, cs);
 
        tasklet_init(&cs->write_tasklet,
                     gigaset_modem_fill, (unsigned long) cs);
index 8e2944784e0004e4fc933d691607c4e822c3bf66..afde4edef9ae895fa009fb5ad7305cc13af2c22e 100644 (file)
@@ -392,7 +392,7 @@ read_dma(struct tiger_ch *bc, u32 idx, int cnt)
        }
        stat = bchannel_get_rxbuf(&bc->bch, cnt);
        /* only transparent use the count here, HDLC overun is detected later */
-       if (stat == ENOMEM) {
+       if (stat == -ENOMEM) {
                pr_warning("%s.B%d: No memory for %d bytes\n",
                           card->name, bc->bch.nr, cnt);
                return;
index 7e0f42acb7376cda1378c22f9712d7f006c39af2..a7a0a22cf1a59661b22f9997589eab9e73a13ec7 100644 (file)
@@ -2,6 +2,6 @@
 # Makefile for Open-Channel SSDs.
 #
 
-obj-$(CONFIG_NVM)              := core.o
+obj-$(CONFIG_NVM)              := core.o sysblk.o
 obj-$(CONFIG_NVM_GENNVM)       += gennvm.o
 obj-$(CONFIG_NVM_RRPC)         += rrpc.o
index 8f41b245cd55b55862911baaf2220d885fc30142..9f6acd5d1d2e9359730ad595f92d3e902bc40d17 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/miscdevice.h>
 #include <linux/lightnvm.h>
+#include <linux/sched/sysctl.h>
 #include <uapi/linux/lightnvm.h>
 
 static LIST_HEAD(nvm_targets);
@@ -105,6 +106,9 @@ struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
        lockdep_assert_held(&nvm_lock);
 
        list_for_each_entry(mt, &nvm_mgrs, list) {
+               if (strncmp(dev->sb.mmtype, mt->name, NVM_MMTYPE_LEN))
+                       continue;
+
                ret = mt->register_mgr(dev);
                if (ret < 0) {
                        pr_err("nvm: media mgr failed to init (%d) on dev %s\n",
@@ -166,6 +170,20 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
        return NULL;
 }
 
+struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *dev, struct nvm_lun *lun,
+                                                       unsigned long flags)
+{
+       return dev->mt->get_blk_unlocked(dev, lun, flags);
+}
+EXPORT_SYMBOL(nvm_get_blk_unlocked);
+
+/* Assumes that all valid pages have already been moved on release to bm */
+void nvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
+{
+       return dev->mt->put_blk_unlocked(dev, blk);
+}
+EXPORT_SYMBOL(nvm_put_blk_unlocked);
+
 struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
                                                        unsigned long flags)
 {
@@ -192,6 +210,206 @@ int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
 }
 EXPORT_SYMBOL(nvm_erase_blk);
 
+void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+       int i;
+
+       if (rqd->nr_pages > 1) {
+               for (i = 0; i < rqd->nr_pages; i++)
+                       rqd->ppa_list[i] = dev_to_generic_addr(dev,
+                                                       rqd->ppa_list[i]);
+       } else {
+               rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
+       }
+}
+EXPORT_SYMBOL(nvm_addr_to_generic_mode);
+
+void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+       int i;
+
+       if (rqd->nr_pages > 1) {
+               for (i = 0; i < rqd->nr_pages; i++)
+                       rqd->ppa_list[i] = generic_to_dev_addr(dev,
+                                                       rqd->ppa_list[i]);
+       } else {
+               rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
+       }
+}
+EXPORT_SYMBOL(nvm_generic_to_addr_mode);
+
+int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
+                                       struct ppa_addr *ppas, int nr_ppas)
+{
+       int i, plane_cnt, pl_idx;
+
+       if (dev->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
+               rqd->nr_pages = 1;
+               rqd->ppa_addr = ppas[0];
+
+               return 0;
+       }
+
+       plane_cnt = (1 << dev->plane_mode);
+       rqd->nr_pages = plane_cnt * nr_ppas;
+
+       if (dev->ops->max_phys_sect < rqd->nr_pages)
+               return -EINVAL;
+
+       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
+       if (!rqd->ppa_list) {
+               pr_err("nvm: failed to allocate dma memory\n");
+               return -ENOMEM;
+       }
+
+       for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
+               for (i = 0; i < nr_ppas; i++) {
+                       ppas[i].g.pl = pl_idx;
+                       rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppas[i];
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(nvm_set_rqd_ppalist);
+
+void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+       if (!rqd->ppa_list)
+               return;
+
+       nvm_dev_dma_free(dev, rqd->ppa_list, rqd->dma_ppa_list);
+}
+EXPORT_SYMBOL(nvm_free_rqd_ppalist);
+
+int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas)
+{
+       struct nvm_rq rqd;
+       int ret;
+
+       if (!dev->ops->erase_block)
+               return 0;
+
+       memset(&rqd, 0, sizeof(struct nvm_rq));
+
+       ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas);
+       if (ret)
+               return ret;
+
+       nvm_generic_to_addr_mode(dev, &rqd);
+
+       ret = dev->ops->erase_block(dev, &rqd);
+
+       nvm_free_rqd_ppalist(dev, &rqd);
+
+       return ret;
+}
+EXPORT_SYMBOL(nvm_erase_ppa);
+
+void nvm_end_io(struct nvm_rq *rqd, int error)
+{
+       rqd->error = error;
+       rqd->end_io(rqd);
+}
+EXPORT_SYMBOL(nvm_end_io);
+
+static void nvm_end_io_sync(struct nvm_rq *rqd)
+{
+       struct completion *waiting = rqd->wait;
+
+       rqd->wait = NULL;
+
+       complete(waiting);
+}
+
+int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas,
+                               int opcode, int flags, void *buf, int len)
+{
+       DECLARE_COMPLETION_ONSTACK(wait);
+       struct nvm_rq rqd;
+       struct bio *bio;
+       int ret;
+       unsigned long hang_check;
+
+       bio = bio_map_kern(dev->q, buf, len, GFP_KERNEL);
+       if (IS_ERR_OR_NULL(bio))
+               return -ENOMEM;
+
+       memset(&rqd, 0, sizeof(struct nvm_rq));
+       ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas);
+       if (ret) {
+               bio_put(bio);
+               return ret;
+       }
+
+       rqd.opcode = opcode;
+       rqd.bio = bio;
+       rqd.wait = &wait;
+       rqd.dev = dev;
+       rqd.end_io = nvm_end_io_sync;
+       rqd.flags = flags;
+       nvm_generic_to_addr_mode(dev, &rqd);
+
+       ret = dev->ops->submit_io(dev, &rqd);
+
+       /* Prevent hang_check timer from firing at us during very long I/O */
+       hang_check = sysctl_hung_task_timeout_secs;
+       if (hang_check)
+               while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
+       else
+               wait_for_completion_io(&wait);
+
+       nvm_free_rqd_ppalist(dev, &rqd);
+
+       return rqd.error;
+}
+EXPORT_SYMBOL(nvm_submit_ppa);
+
+static int nvm_init_slc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
+{
+       int i;
+
+       dev->lps_per_blk = dev->pgs_per_blk;
+       dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL);
+       if (!dev->lptbl)
+               return -ENOMEM;
+
+       /* Just a linear array */
+       for (i = 0; i < dev->lps_per_blk; i++)
+               dev->lptbl[i] = i;
+
+       return 0;
+}
+
+static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
+{
+       int i, p;
+       struct nvm_id_lp_mlc *mlc = &grp->lptbl.mlc;
+
+       if (!mlc->num_pairs)
+               return 0;
+
+       dev->lps_per_blk = mlc->num_pairs;
+       dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL);
+       if (!dev->lptbl)
+               return -ENOMEM;
+
+       /* The lower page table encoding consists of a list of bytes, where each
+        * has a lower and an upper half. The first half byte maintains the
+        * increment value and every value after is an offset added to the
+        * previous incrementation value */
+       dev->lptbl[0] = mlc->pairs[0] & 0xF;
+       for (i = 1; i < dev->lps_per_blk; i++) {
+               p = mlc->pairs[i >> 1];
+               if (i & 0x1) /* upper */
+                       dev->lptbl[i] = dev->lptbl[i - 1] + ((p & 0xF0) >> 4);
+               else /* lower */
+                       dev->lptbl[i] = dev->lptbl[i - 1] + (p & 0xF);
+       }
+
+       return 0;
+}
+
 static int nvm_core_init(struct nvm_dev *dev)
 {
        struct nvm_id *id = &dev->identity;
@@ -206,6 +424,7 @@ static int nvm_core_init(struct nvm_dev *dev)
        dev->sec_size = grp->csecs;
        dev->oob_size = grp->sos;
        dev->sec_per_pg = grp->fpg_sz / grp->csecs;
+       dev->mccap = grp->mccap;
        memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
 
        dev->plane_mode = NVM_PLANE_SINGLE;
@@ -216,11 +435,23 @@ static int nvm_core_init(struct nvm_dev *dev)
                return -EINVAL;
        }
 
-       if (grp->fmtype != 0 && grp->fmtype != 1) {
+       switch (grp->fmtype) {
+       case NVM_ID_FMTYPE_SLC:
+               if (nvm_init_slc_tbl(dev, grp))
+                       return -ENOMEM;
+               break;
+       case NVM_ID_FMTYPE_MLC:
+               if (nvm_init_mlc_tbl(dev, grp))
+                       return -ENOMEM;
+               break;
+       default:
                pr_err("nvm: flash type not supported\n");
                return -EINVAL;
        }
 
+       if (!dev->lps_per_blk)
+               pr_info("nvm: lower page programming table missing\n");
+
        if (grp->mpos & 0x020202)
                dev->plane_mode = NVM_PLANE_DOUBLE;
        if (grp->mpos & 0x040404)
@@ -238,6 +469,7 @@ static int nvm_core_init(struct nvm_dev *dev)
                                dev->nr_chnls;
        dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
        INIT_LIST_HEAD(&dev->online_targets);
+       mutex_init(&dev->mlock);
 
        return 0;
 }
@@ -249,6 +481,8 @@ static void nvm_free(struct nvm_dev *dev)
 
        if (dev->mt)
                dev->mt->unregister_mgr(dev);
+
+       kfree(dev->lptbl);
 }
 
 static int nvm_init(struct nvm_dev *dev)
@@ -338,9 +572,18 @@ int nvm_register(struct request_queue *q, char *disk_name,
                }
        }
 
+       if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
+               ret = nvm_get_sysblock(dev, &dev->sb);
+               if (!ret)
+                       pr_err("nvm: device not initialized.\n");
+               else if (ret < 0)
+                       pr_err("nvm: err (%d) on device initialization\n", ret);
+       }
+
        /* register device with a supported media manager */
        down_write(&nvm_lock);
-       dev->mt = nvm_init_mgr(dev);
+       if (ret > 0)
+               dev->mt = nvm_init_mgr(dev);
        list_add(&dev->devices, &nvm_devices);
        up_write(&nvm_lock);
 
@@ -788,6 +1031,102 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
        return __nvm_configure_remove(&remove);
 }
 
+static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info)
+{
+       info->seqnr = 1;
+       info->erase_cnt = 0;
+       info->version = 1;
+}
+
+static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init)
+{
+       struct nvm_dev *dev;
+       struct nvm_sb_info info;
+       int ret;
+
+       down_write(&nvm_lock);
+       dev = nvm_find_nvm_dev(init->dev);
+       up_write(&nvm_lock);
+       if (!dev) {
+               pr_err("nvm: device not found\n");
+               return -EINVAL;
+       }
+
+       nvm_setup_nvm_sb_info(&info);
+
+       strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
+       info.fs_ppa.ppa = -1;
+
+       if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
+               ret = nvm_init_sysblock(dev, &info);
+               if (ret)
+                       return ret;
+       }
+
+       memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
+
+       down_write(&nvm_lock);
+       dev->mt = nvm_init_mgr(dev);
+       up_write(&nvm_lock);
+
+       return 0;
+}
+
+static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
+{
+       struct nvm_ioctl_dev_init init;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
+               return -EFAULT;
+
+       if (init.flags != 0) {
+               pr_err("nvm: no flags supported\n");
+               return -EINVAL;
+       }
+
+       init.dev[DISK_NAME_LEN - 1] = '\0';
+
+       return __nvm_ioctl_dev_init(&init);
+}
+
+static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
+{
+       struct nvm_ioctl_dev_factory fact;
+       struct nvm_dev *dev;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
+               return -EFAULT;
+
+       fact.dev[DISK_NAME_LEN - 1] = '\0';
+
+       if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1))
+               return -EINVAL;
+
+       down_write(&nvm_lock);
+       dev = nvm_find_nvm_dev(fact.dev);
+       up_write(&nvm_lock);
+       if (!dev) {
+               pr_err("nvm: device not found\n");
+               return -EINVAL;
+       }
+
+       if (dev->mt) {
+               dev->mt->unregister_mgr(dev);
+               dev->mt = NULL;
+       }
+
+       if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT)
+               return nvm_dev_factory(dev, fact.flags);
+
+       return 0;
+}
+
 static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
 {
        void __user *argp = (void __user *)arg;
@@ -801,6 +1140,10 @@ static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
                return nvm_ioctl_dev_create(file, argp);
        case NVM_DEV_REMOVE:
                return nvm_ioctl_dev_remove(file, argp);
+       case NVM_DEV_INIT:
+               return nvm_ioctl_dev_init(file, argp);
+       case NVM_DEV_FACTORY:
+               return nvm_ioctl_dev_factory(file, argp);
        }
        return 0;
 }
index a54b339951a3e695bfa68e96fb6eaf2e3e451c62..7fb725b16148e4142d3767ce711b60450a3ff707 100644 (file)
@@ -60,7 +60,8 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
                lun->vlun.lun_id = i % dev->luns_per_chnl;
                lun->vlun.chnl_id = i / dev->luns_per_chnl;
                lun->vlun.nr_free_blocks = dev->blks_per_lun;
-               lun->vlun.nr_inuse_blocks = 0;
+               lun->vlun.nr_open_blocks = 0;
+               lun->vlun.nr_closed_blocks = 0;
                lun->vlun.nr_bad_blocks = 0;
        }
        return 0;
@@ -89,6 +90,7 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
 
                list_move_tail(&blk->list, &lun->bb_list);
                lun->vlun.nr_bad_blocks++;
+               lun->vlun.nr_free_blocks--;
        }
 
        return 0;
@@ -133,15 +135,15 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
                pba = pba - (dev->sec_per_lun * lun_id);
                blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
 
-               if (!blk->type) {
+               if (!blk->state) {
                        /* at this point, we don't know anything about the
                         * block. It's up to the FTL on top to re-etablish the
-                        * block state
+                        * block state. The block is assumed to be open.
                         */
                        list_move_tail(&blk->list, &lun->used_list);
-                       blk->type = 1;
+                       blk->state = NVM_BLK_ST_OPEN;
                        lun->vlun.nr_free_blocks--;
-                       lun->vlun.nr_inuse_blocks++;
+                       lun->vlun.nr_open_blocks++;
                }
        }
 
@@ -255,14 +257,14 @@ static void gennvm_unregister(struct nvm_dev *dev)
        module_put(THIS_MODULE);
 }
 
-static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
+static struct nvm_block *gennvm_get_blk_unlocked(struct nvm_dev *dev,
                                struct nvm_lun *vlun, unsigned long flags)
 {
        struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
        struct nvm_block *blk = NULL;
        int is_gc = flags & NVM_IOTYPE_GC;
 
-       spin_lock(&vlun->lock);
+       assert_spin_locked(&vlun->lock);
 
        if (list_empty(&lun->free_list)) {
                pr_err_ratelimited("gennvm: lun %u have no free pages available",
@@ -275,83 +277,64 @@ static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
 
        blk = list_first_entry(&lun->free_list, struct nvm_block, list);
        list_move_tail(&blk->list, &lun->used_list);
-       blk->type = 1;
+       blk->state = NVM_BLK_ST_OPEN;
 
        lun->vlun.nr_free_blocks--;
-       lun->vlun.nr_inuse_blocks++;
+       lun->vlun.nr_open_blocks++;
 
 out:
+       return blk;
+}
+
+static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
+                               struct nvm_lun *vlun, unsigned long flags)
+{
+       struct nvm_block *blk;
+
+       spin_lock(&vlun->lock);
+       blk = gennvm_get_blk_unlocked(dev, vlun, flags);
        spin_unlock(&vlun->lock);
        return blk;
 }
 
-static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
+static void gennvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
 {
        struct nvm_lun *vlun = blk->lun;
        struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
 
-       spin_lock(&vlun->lock);
+       assert_spin_locked(&vlun->lock);
 
-       switch (blk->type) {
-       case 1:
+       if (blk->state & NVM_BLK_ST_OPEN) {
                list_move_tail(&blk->list, &lun->free_list);
+               lun->vlun.nr_open_blocks--;
                lun->vlun.nr_free_blocks++;
-               lun->vlun.nr_inuse_blocks--;
-               blk->type = 0;
-               break;
-       case 2:
+               blk->state = NVM_BLK_ST_FREE;
+       } else if (blk->state & NVM_BLK_ST_CLOSED) {
+               list_move_tail(&blk->list, &lun->free_list);
+               lun->vlun.nr_closed_blocks--;
+               lun->vlun.nr_free_blocks++;
+               blk->state = NVM_BLK_ST_FREE;
+       } else if (blk->state & NVM_BLK_ST_BAD) {
                list_move_tail(&blk->list, &lun->bb_list);
                lun->vlun.nr_bad_blocks++;
-               lun->vlun.nr_inuse_blocks--;
-               break;
-       default:
+               blk->state = NVM_BLK_ST_BAD;
+       } else {
                WARN_ON_ONCE(1);
                pr_err("gennvm: erroneous block type (%lu -> %u)\n",
-                                                       blk->id, blk->type);
+                                                       blk->id, blk->state);
                list_move_tail(&blk->list, &lun->bb_list);
                lun->vlun.nr_bad_blocks++;
-               lun->vlun.nr_inuse_blocks--;
-       }
-
-       spin_unlock(&vlun->lock);
-}
-
-static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
-       int i;
-
-       if (rqd->nr_pages > 1) {
-               for (i = 0; i < rqd->nr_pages; i++)
-                       rqd->ppa_list[i] = dev_to_generic_addr(dev,
-                                                       rqd->ppa_list[i]);
-       } else {
-               rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
+               blk->state = NVM_BLK_ST_BAD;
        }
 }
 
-static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
-       int i;
-
-       if (rqd->nr_pages > 1) {
-               for (i = 0; i < rqd->nr_pages; i++)
-                       rqd->ppa_list[i] = generic_to_dev_addr(dev,
-                                                       rqd->ppa_list[i]);
-       } else {
-               rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
-       }
-}
-
-static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
 {
-       if (!dev->ops->submit_io)
-               return 0;
-
-       /* Convert address space */
-       gennvm_generic_to_addr_mode(dev, rqd);
+       struct nvm_lun *vlun = blk->lun;
 
-       rqd->dev = dev;
-       return dev->ops->submit_io(dev, rqd);
+       spin_lock(&vlun->lock);
+       gennvm_put_blk_unlocked(dev, blk);
+       spin_unlock(&vlun->lock);
 }
 
 static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
@@ -376,7 +359,7 @@ static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
        blk = &lun->vlun.blocks[ppa->g.blk];
 
        /* will be moved to bb list on put_blk from target */
-       blk->type = type;
+       blk->state = type;
 }
 
 /* mark block bad. It is expected the target recover from the error. */
@@ -390,77 +373,51 @@ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
        if (dev->ops->set_bb_tbl(dev, rqd, 1))
                return;
 
-       gennvm_addr_to_generic_mode(dev, rqd);
+       nvm_addr_to_generic_mode(dev, rqd);
 
        /* look up blocks and mark them as bad */
        if (rqd->nr_pages > 1)
                for (i = 0; i < rqd->nr_pages; i++)
-                       gennvm_blk_set_type(dev, &rqd->ppa_list[i], 2);
+                       gennvm_blk_set_type(dev, &rqd->ppa_list[i],
+                                               NVM_BLK_ST_BAD);
        else
-               gennvm_blk_set_type(dev, &rqd->ppa_addr, 2);
+               gennvm_blk_set_type(dev, &rqd->ppa_addr, NVM_BLK_ST_BAD);
 }
 
-static int gennvm_end_io(struct nvm_rq *rqd, int error)
+static void gennvm_end_io(struct nvm_rq *rqd)
 {
        struct nvm_tgt_instance *ins = rqd->ins;
-       int ret = 0;
 
-       switch (error) {
+       switch (rqd->error) {
        case NVM_RSP_SUCCESS:
-               break;
        case NVM_RSP_ERR_EMPTYPAGE:
                break;
        case NVM_RSP_ERR_FAILWRITE:
                gennvm_mark_blk_bad(rqd->dev, rqd);
-       default:
-               ret++;
        }
 
-       ret += ins->tt->end_io(rqd, error);
-
-       return ret;
+       ins->tt->end_io(rqd);
 }
 
-static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
-                                                       unsigned long flags)
+static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
-       int plane_cnt = 0, pl_idx, ret;
-       struct ppa_addr addr;
-       struct nvm_rq rqd;
-
-       if (!dev->ops->erase_block)
-               return 0;
-
-       addr = block_to_ppa(dev, blk);
-
-       if (dev->plane_mode == NVM_PLANE_SINGLE) {
-               rqd.nr_pages = 1;
-               rqd.ppa_addr = addr;
-       } else {
-               plane_cnt = (1 << dev->plane_mode);
-               rqd.nr_pages = plane_cnt;
-
-               rqd.ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL,
-                                                       &rqd.dma_ppa_list);
-               if (!rqd.ppa_list) {
-                       pr_err("gennvm: failed to allocate dma memory\n");
-                       return -ENOMEM;
-               }
-
-               for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
-                       addr.g.pl = pl_idx;
-                       rqd.ppa_list[pl_idx] = addr;
-               }
-       }
+       if (!dev->ops->submit_io)
+               return -ENODEV;
 
-       gennvm_generic_to_addr_mode(dev, &rqd);
+       /* Convert address space */
+       nvm_generic_to_addr_mode(dev, rqd);
 
-       ret = dev->ops->erase_block(dev, &rqd);
+       rqd->dev = dev;
+       rqd->end_io = gennvm_end_io;
+       return dev->ops->submit_io(dev, rqd);
+}
 
-       if (plane_cnt)
-               nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list);
+static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
+                                                       unsigned long flags)
+{
+       struct ppa_addr addr = block_to_ppa(dev, blk);
 
-       return ret;
+       return nvm_erase_ppa(dev, &addr, 1);
 }
 
 static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
@@ -480,10 +437,11 @@ static void gennvm_lun_info_print(struct nvm_dev *dev)
        gennvm_for_each_lun(gn, lun, i) {
                spin_lock(&lun->vlun.lock);
 
-               pr_info("%s: lun%8u\t%u\t%u\t%u\n",
+               pr_info("%s: lun%8u\t%u\t%u\t%u\t%u\n",
                                dev->name, i,
                                lun->vlun.nr_free_blocks,
-                               lun->vlun.nr_inuse_blocks,
+                               lun->vlun.nr_open_blocks,
+                               lun->vlun.nr_closed_blocks,
                                lun->vlun.nr_bad_blocks);
 
                spin_unlock(&lun->vlun.lock);
@@ -491,21 +449,23 @@ static void gennvm_lun_info_print(struct nvm_dev *dev)
 }
 
 static struct nvmm_type gennvm = {
-       .name           = "gennvm",
-       .version        = {0, 1, 0},
+       .name                   = "gennvm",
+       .version                = {0, 1, 0},
+
+       .register_mgr           = gennvm_register,
+       .unregister_mgr         = gennvm_unregister,
 
-       .register_mgr   = gennvm_register,
-       .unregister_mgr = gennvm_unregister,
+       .get_blk_unlocked       = gennvm_get_blk_unlocked,
+       .put_blk_unlocked       = gennvm_put_blk_unlocked,
 
-       .get_blk        = gennvm_get_blk,
-       .put_blk        = gennvm_put_blk,
+       .get_blk                = gennvm_get_blk,
+       .put_blk                = gennvm_put_blk,
 
-       .submit_io      = gennvm_submit_io,
-       .end_io         = gennvm_end_io,
-       .erase_blk      = gennvm_erase_blk,
+       .submit_io              = gennvm_submit_io,
+       .erase_blk              = gennvm_erase_blk,
 
-       .get_lun        = gennvm_get_lun,
-       .lun_info_print = gennvm_lun_info_print,
+       .get_lun                = gennvm_get_lun,
+       .lun_info_print         = gennvm_lun_info_print,
 };
 
 static int __init gennvm_module_init(void)
index 134e4faba48259434db7d55d446f40f3deb206b9..307db1ea22defbcbddcfdc3d63cfe86bda43fa0c 100644 (file)
@@ -179,16 +179,23 @@ static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk)
 static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
                                                        unsigned long flags)
 {
+       struct nvm_lun *lun = rlun->parent;
        struct nvm_block *blk;
        struct rrpc_block *rblk;
 
-       blk = nvm_get_blk(rrpc->dev, rlun->parent, flags);
-       if (!blk)
+       spin_lock(&lun->lock);
+       blk = nvm_get_blk_unlocked(rrpc->dev, rlun->parent, flags);
+       if (!blk) {
+               pr_err("nvm: rrpc: cannot get new block from media manager\n");
+               spin_unlock(&lun->lock);
                return NULL;
+       }
 
        rblk = &rlun->blocks[blk->id];
-       blk->priv = rblk;
+       list_add_tail(&rblk->list, &rlun->open_list);
+       spin_unlock(&lun->lock);
 
+       blk->priv = rblk;
        bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk);
        rblk->next_page = 0;
        rblk->nr_invalid_pages = 0;
@@ -199,7 +206,13 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 
 static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-       nvm_put_blk(rrpc->dev, rblk->parent);
+       struct rrpc_lun *rlun = rblk->rlun;
+       struct nvm_lun *lun = rlun->parent;
+
+       spin_lock(&lun->lock);
+       nvm_put_blk_unlocked(rrpc->dev, rblk->parent);
+       list_del(&rblk->list);
+       spin_unlock(&lun->lock);
 }
 
 static void rrpc_put_blks(struct rrpc *rrpc)
@@ -287,6 +300,10 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
        }
 
        page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
+       if (!page) {
+               bio_put(bio);
+               return -ENOMEM;
+       }
 
        while ((slot = find_first_zero_bit(rblk->invalid_pages,
                                            nr_pgs_per_blk)) < nr_pgs_per_blk) {
@@ -328,6 +345,10 @@ try:
                        goto finished;
                }
                wait_for_completion_io(&wait);
+               if (bio->bi_error) {
+                       rrpc_inflight_laddr_release(rrpc, rqd);
+                       goto finished;
+               }
 
                bio_reset(bio);
                reinit_completion(&wait);
@@ -350,6 +371,8 @@ try:
                wait_for_completion_io(&wait);
 
                rrpc_inflight_laddr_release(rrpc, rqd);
+               if (bio->bi_error)
+                       goto finished;
 
                bio_reset(bio);
        }
@@ -373,16 +396,26 @@ static void rrpc_block_gc(struct work_struct *work)
        struct rrpc *rrpc = gcb->rrpc;
        struct rrpc_block *rblk = gcb->rblk;
        struct nvm_dev *dev = rrpc->dev;
+       struct nvm_lun *lun = rblk->parent->lun;
+       struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
 
+       mempool_free(gcb, rrpc->gcb_pool);
        pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
 
        if (rrpc_move_valid_pages(rrpc, rblk))
-               goto done;
+               goto put_back;
+
+       if (nvm_erase_blk(dev, rblk->parent))
+               goto put_back;
 
-       nvm_erase_blk(dev, rblk->parent);
        rrpc_put_blk(rrpc, rblk);
-done:
-       mempool_free(gcb, rrpc->gcb_pool);
+
+       return;
+
+put_back:
+       spin_lock(&rlun->lock);
+       list_add_tail(&rblk->prio, &rlun->prio_list);
+       spin_unlock(&rlun->lock);
 }
 
 /* the block with highest number of invalid pages, will be in the beginning
@@ -427,7 +460,7 @@ static void rrpc_lun_gc(struct work_struct *work)
        if (nr_blocks_need < rrpc->nr_luns)
                nr_blocks_need = rrpc->nr_luns;
 
-       spin_lock(&lun->lock);
+       spin_lock(&rlun->lock);
        while (nr_blocks_need > lun->nr_free_blocks &&
                                        !list_empty(&rlun->prio_list)) {
                struct rrpc_block *rblock = block_prio_find_max(rlun);
@@ -436,16 +469,16 @@ static void rrpc_lun_gc(struct work_struct *work)
                if (!rblock->nr_invalid_pages)
                        break;
 
+               gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
+               if (!gcb)
+                       break;
+
                list_del_init(&rblock->prio);
 
                BUG_ON(!block_is_full(rrpc, rblock));
 
                pr_debug("rrpc: selected block '%lu' for GC\n", block->id);
 
-               gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
-               if (!gcb)
-                       break;
-
                gcb->rrpc = rrpc;
                gcb->rblk = rblock;
                INIT_WORK(&gcb->ws_gc, rrpc_block_gc);
@@ -454,7 +487,7 @@ static void rrpc_lun_gc(struct work_struct *work)
 
                nr_blocks_need--;
        }
-       spin_unlock(&lun->lock);
+       spin_unlock(&rlun->lock);
 
        /* TODO: Hint that request queue can be started again */
 }
@@ -635,12 +668,24 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
                lun = rblk->parent->lun;
 
                cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
-               if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
+               if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) {
+                       struct nvm_block *blk = rblk->parent;
+                       struct rrpc_lun *rlun = rblk->rlun;
+
+                       spin_lock(&lun->lock);
+                       lun->nr_open_blocks--;
+                       lun->nr_closed_blocks++;
+                       blk->state &= ~NVM_BLK_ST_OPEN;
+                       blk->state |= NVM_BLK_ST_CLOSED;
+                       list_move_tail(&rblk->list, &rlun->closed_list);
+                       spin_unlock(&lun->lock);
+
                        rrpc_run_gc(rrpc, rblk);
+               }
        }
 }
 
-static int rrpc_end_io(struct nvm_rq *rqd, int error)
+static void rrpc_end_io(struct nvm_rq *rqd)
 {
        struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
        struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
@@ -650,11 +695,12 @@ static int rrpc_end_io(struct nvm_rq *rqd, int error)
        if (bio_data_dir(rqd->bio) == WRITE)
                rrpc_end_io_write(rrpc, rrqd, laddr, npages);
 
+       bio_put(rqd->bio);
+
        if (rrqd->flags & NVM_IOTYPE_GC)
-               return 0;
+               return;
 
        rrpc_unlock_rq(rrpc, rqd);
-       bio_put(rqd->bio);
 
        if (npages > 1)
                nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
@@ -662,8 +708,6 @@ static int rrpc_end_io(struct nvm_rq *rqd, int error)
                nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata);
 
        mempool_free(rqd, rrpc->rq_pool);
-
-       return 0;
 }
 
 static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
@@ -841,6 +885,13 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
        err = nvm_submit_io(rrpc->dev, rqd);
        if (err) {
                pr_err("rrpc: I/O submission failed: %d\n", err);
+               bio_put(bio);
+               if (!(flags & NVM_IOTYPE_GC)) {
+                       rrpc_unlock_rq(rrpc, rqd);
+                       if (rqd->nr_pages > 1)
+                               nvm_dev_dma_free(rrpc->dev,
+                       rqd->ppa_list, rqd->dma_ppa_list);
+               }
                return NVM_IO_ERR;
        }
 
@@ -1090,6 +1141,11 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
        struct rrpc_lun *rlun;
        int i, j;
 
+       if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
+               pr_err("rrpc: number of pages per block too high.");
+               return -EINVAL;
+       }
+
        spin_lock_init(&rrpc->rev_lock);
 
        rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
@@ -1101,16 +1157,13 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
        for (i = 0; i < rrpc->nr_luns; i++) {
                struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
 
-               if (dev->pgs_per_blk >
-                               MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
-                       pr_err("rrpc: number of pages per block too high.");
-                       goto err;
-               }
-
                rlun = &rrpc->luns[i];
                rlun->rrpc = rrpc;
                rlun->parent = lun;
                INIT_LIST_HEAD(&rlun->prio_list);
+               INIT_LIST_HEAD(&rlun->open_list);
+               INIT_LIST_HEAD(&rlun->closed_list);
+
                INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
                spin_lock_init(&rlun->lock);
 
@@ -1127,6 +1180,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
                        struct nvm_block *blk = &lun->blocks[j];
 
                        rblk->parent = blk;
+                       rblk->rlun = rlun;
                        INIT_LIST_HEAD(&rblk->prio);
                        spin_lock_init(&rblk->lock);
                }
index a9696a06c38c79c91821aa9f0104dd41ce3dbf00..f7b37336353fd56771f776ed420ff07521f2b6d5 100644 (file)
@@ -54,7 +54,9 @@ struct rrpc_rq {
 
 struct rrpc_block {
        struct nvm_block *parent;
+       struct rrpc_lun *rlun;
        struct list_head prio;
+       struct list_head list;
 
 #define MAX_INVALID_PAGES_STORAGE 8
        /* Bitmap for invalid page intries */
@@ -73,7 +75,16 @@ struct rrpc_lun {
        struct nvm_lun *parent;
        struct rrpc_block *cur, *gc_cur;
        struct rrpc_block *blocks;      /* Reference to block allocation */
-       struct list_head prio_list;             /* Blocks that may be GC'ed */
+
+       struct list_head prio_list;     /* Blocks that may be GC'ed */
+       struct list_head open_list;     /* In-use open blocks. These are blocks
+                                        * that can be both written to and read
+                                        * from
+                                        */
+       struct list_head closed_list;   /* In-use closed blocks. These are
+                                        * blocks that can _only_ be read from
+                                        */
+
        struct work_struct ws_gc;
 
        spinlock_t lock;
@@ -163,8 +174,7 @@ static inline sector_t rrpc_get_sector(sector_t laddr)
 static inline int request_intersects(struct rrpc_inflight_rq *r,
                                sector_t laddr_start, sector_t laddr_end)
 {
-       return (laddr_end >= r->l_start && laddr_end <= r->l_end) &&
-               (laddr_start >= r->l_start && laddr_start <= r->l_end);
+       return (laddr_end >= r->l_start) && (laddr_start <= r->l_end);
 }
 
 static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
@@ -173,6 +183,8 @@ static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
        sector_t laddr_end = laddr + pages - 1;
        struct rrpc_inflight_rq *rtmp;
 
+       WARN_ON(irqs_disabled());
+
        spin_lock_irq(&rrpc->inflights.lock);
        list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) {
                if (unlikely(request_intersects(rtmp, laddr, laddr_end))) {
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
new file mode 100644 (file)
index 0000000..321de1f
--- /dev/null
@@ -0,0 +1,741 @@
+/*
+ * Copyright (C) 2015 Matias Bjorling. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/lightnvm.h>
+
+#define MAX_SYSBLKS 3  /* remember to update mapping scheme on change */
+#define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases
+                             * enables ~1.5M updates per sysblk unit
+                             */
+
+struct sysblk_scan {
+       /* A row is a collection of flash blocks for a system block. */
+       int nr_rows;
+       int row;
+       int act_blk[MAX_SYSBLKS];
+
+       int nr_ppas;
+       struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */
+};
+
+static inline int scan_ppa_idx(int row, int blkid)
+{
+       return (row * MAX_BLKS_PR_SYSBLK) + blkid;
+}
+
+void nvm_sysblk_to_cpu(struct nvm_sb_info *info, struct nvm_system_block *sb)
+{
+       info->seqnr = be32_to_cpu(sb->seqnr);
+       info->erase_cnt = be32_to_cpu(sb->erase_cnt);
+       info->version = be16_to_cpu(sb->version);
+       strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN);
+       info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa);
+}
+
+void nvm_cpu_to_sysblk(struct nvm_system_block *sb, struct nvm_sb_info *info)
+{
+       sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC);
+       sb->seqnr = cpu_to_be32(info->seqnr);
+       sb->erase_cnt = cpu_to_be32(info->erase_cnt);
+       sb->version = cpu_to_be16(info->version);
+       strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN);
+       sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa);
+}
+
+static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
+{
+       int nr_rows = min_t(int, MAX_SYSBLKS, dev->nr_chnls);
+       int i;
+
+       for (i = 0; i < nr_rows; i++)
+               sysblk_ppas[i].ppa = 0;
+
+       /* if possible, place sysblk at first channel, middle channel and last
+        * channel of the device. If not, create only one or two sys blocks
+        */
+       switch (dev->nr_chnls) {
+       case 2:
+               sysblk_ppas[1].g.ch = 1;
+               /* fall-through */
+       case 1:
+               sysblk_ppas[0].g.ch = 0;
+               break;
+       default:
+               sysblk_ppas[0].g.ch = 0;
+               sysblk_ppas[1].g.ch = dev->nr_chnls / 2;
+               sysblk_ppas[2].g.ch = dev->nr_chnls - 1;
+               break;
+       }
+
+       return nr_rows;
+}
+
+void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
+                                               struct ppa_addr *sysblk_ppas)
+{
+       memset(s, 0, sizeof(struct sysblk_scan));
+       s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas);
+}
+
+static int sysblk_get_host_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+                                                               void *private)
+{
+       struct sysblk_scan *s = private;
+       int i, nr_sysblk = 0;
+
+       for (i = 0; i < nr_blks; i++) {
+               if (blks[i] != NVM_BLK_T_HOST)
+                       continue;
+
+               if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) {
+                       pr_err("nvm: too many host blks\n");
+                       return -EINVAL;
+               }
+
+               ppa.g.blk = i;
+
+               s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa;
+               s->nr_ppas++;
+               nr_sysblk++;
+       }
+
+       return 0;
+}
+
+static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s,
+                               struct ppa_addr *ppas, nvm_bb_update_fn *fn)
+{
+       struct ppa_addr dppa;
+       int i, ret;
+
+       s->nr_ppas = 0;
+
+       for (i = 0; i < s->nr_rows; i++) {
+               dppa = generic_to_dev_addr(dev, ppas[i]);
+               s->row = i;
+
+               ret = dev->ops->get_bb_tbl(dev, dppa, dev->blks_per_lun, fn, s);
+               if (ret) {
+                       pr_err("nvm: failed bb tbl for ppa (%u %u)\n",
+                                                       ppas[i].g.ch,
+                                                       ppas[i].g.blk);
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * scans a block for latest sysblk.
+ * Returns:
+ *     0 - newer sysblk not found. PPA is updated to latest page.
+ *     1 - newer sysblk found and stored in *cur. PPA is updated to
+ *         next valid page.
+ *     <0- error.
+ */
+static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
+                                               struct nvm_system_block *sblk)
+{
+       struct nvm_system_block *cur;
+       int pg, cursz, ret, found = 0;
+
+       /* the full buffer for a flash page is allocated. Only the first of it
+        * contains the system block information
+        */
+       cursz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
+       cur = kmalloc(cursz, GFP_KERNEL);
+       if (!cur)
+               return -ENOMEM;
+
+       /* perform linear scan through the block */
+       for (pg = 0; pg < dev->lps_per_blk; pg++) {
+               ppa->g.pg = ppa_to_slc(dev, pg);
+
+               ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE,
+                                                               cur, cursz);
+               if (ret) {
+                       if (ret == NVM_RSP_ERR_EMPTYPAGE) {
+                               pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n",
+                                                       ppa->g.ch,
+                                                       ppa->g.lun,
+                                                       ppa->g.blk,
+                                                       ppa->g.pg);
+                               break;
+                       }
+                       pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)",
+                                                       ret,
+                                                       ppa->g.ch,
+                                                       ppa->g.lun,
+                                                       ppa->g.blk,
+                                                       ppa->g.pg);
+                       break; /* if we can't read a page, continue to the
+                               * next blk
+                               */
+               }
+
+               if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) {
+                       pr_debug("nvm: scan break for ppa (%u %u %u %u)\n",
+                                                       ppa->g.ch,
+                                                       ppa->g.lun,
+                                                       ppa->g.blk,
+                                                       ppa->g.pg);
+                       break; /* last valid page already found */
+               }
+
+               if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr))
+                       continue;
+
+               memcpy(sblk, cur, sizeof(struct nvm_system_block));
+               found = 1;
+       }
+
+       kfree(cur);
+
+       return found;
+}
+
+static int nvm_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, int type)
+{
+       struct nvm_rq rqd;
+       int ret;
+
+       if (s->nr_ppas > dev->ops->max_phys_sect) {
+               pr_err("nvm: unable to update all sysblocks atomically\n");
+               return -EINVAL;
+       }
+
+       memset(&rqd, 0, sizeof(struct nvm_rq));
+
+       nvm_set_rqd_ppalist(dev, &rqd, s->ppas, s->nr_ppas);
+       nvm_generic_to_addr_mode(dev, &rqd);
+
+       ret = dev->ops->set_bb_tbl(dev, &rqd, type);
+       nvm_free_rqd_ppalist(dev, &rqd);
+       if (ret) {
+               pr_err("nvm: sysblk failed bb mark\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int sysblk_get_free_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+                                                               void *private)
+{
+       struct sysblk_scan *s = private;
+       struct ppa_addr *sppa;
+       int i, blkid = 0;
+
+       for (i = 0; i < nr_blks; i++) {
+               if (blks[i] == NVM_BLK_T_HOST)
+                       return -EEXIST;
+
+               if (blks[i] != NVM_BLK_T_FREE)
+                       continue;
+
+               sppa = &s->ppas[scan_ppa_idx(s->row, blkid)];
+               sppa->g.ch = ppa.g.ch;
+               sppa->g.lun = ppa.g.lun;
+               sppa->g.blk = i;
+               s->nr_ppas++;
+               blkid++;
+
+               pr_debug("nvm: use (%u %u %u) as sysblk\n",
+                                       sppa->g.ch, sppa->g.lun, sppa->g.blk);
+               if (blkid > MAX_BLKS_PR_SYSBLK - 1)
+                       return 0;
+       }
+
+       pr_err("nvm: sysblk failed get sysblk\n");
+       return -EINVAL;
+}
+
+static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
+                                                       struct sysblk_scan *s)
+{
+       struct nvm_system_block nvmsb;
+       void *buf;
+       int i, sect, ret, bufsz;
+       struct ppa_addr *ppas;
+
+       nvm_cpu_to_sysblk(&nvmsb, info);
+
+       /* buffer for flash page */
+       bufsz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
+       buf = kzalloc(bufsz, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+       memcpy(buf, &nvmsb, sizeof(struct nvm_system_block));
+
+       ppas = kcalloc(dev->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
+       if (!ppas) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       /* Write and verify */
+       for (i = 0; i < s->nr_rows; i++) {
+               ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])];
+
+               pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n",
+                                                       ppas[0].g.ch,
+                                                       ppas[0].g.lun,
+                                                       ppas[0].g.blk,
+                                                       ppas[0].g.pg);
+
+               /* Expand to all sectors within a flash page */
+               if (dev->sec_per_pg > 1) {
+                       for (sect = 1; sect < dev->sec_per_pg; sect++) {
+                               ppas[sect].ppa = ppas[0].ppa;
+                               ppas[sect].g.sec = sect;
+                       }
+               }
+
+               ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PWRITE,
+                                               NVM_IO_SLC_MODE, buf, bufsz);
+               if (ret) {
+                       pr_err("nvm: sysblk failed program (%u %u %u)\n",
+                                                       ppas[0].g.ch,
+                                                       ppas[0].g.lun,
+                                                       ppas[0].g.blk);
+                       break;
+               }
+
+               ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PREAD,
+                                               NVM_IO_SLC_MODE, buf, bufsz);
+               if (ret) {
+                       pr_err("nvm: sysblk failed read (%u %u %u)\n",
+                                                       ppas[0].g.ch,
+                                                       ppas[0].g.lun,
+                                                       ppas[0].g.blk);
+                       break;
+               }
+
+               if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) {
+                       pr_err("nvm: sysblk failed verify (%u %u %u)\n",
+                                                       ppas[0].g.ch,
+                                                       ppas[0].g.lun,
+                                                       ppas[0].g.blk);
+                       ret = -EINVAL;
+                       break;
+               }
+       }
+
+       kfree(ppas);
+err:
+       kfree(buf);
+
+       return ret;
+}
+
+static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s)
+{
+       int i, ret;
+       unsigned long nxt_blk;
+       struct ppa_addr *ppa;
+
+       for (i = 0; i < s->nr_rows; i++) {
+               nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK;
+               ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)];
+               ppa->g.pg = ppa_to_slc(dev, 0);
+
+               ret = nvm_erase_ppa(dev, ppa, 1);
+               if (ret)
+                       return ret;
+
+               s->act_blk[i] = nxt_blk;
+       }
+
+       return 0;
+}
+
+int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
+{
+       struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+       struct sysblk_scan s;
+       struct nvm_system_block *cur;
+       int i, j, found = 0;
+       int ret = -ENOMEM;
+
+       /*
+        * 1. setup sysblk locations
+        * 2. get bad block list
+        * 3. filter on host-specific (type 3)
+        * 4. iterate through all and find the highest seq nr.
+        * 5. return superblock information
+        */
+
+       if (!dev->ops->get_bb_tbl)
+               return -EINVAL;
+
+       nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+       mutex_lock(&dev->mlock);
+       ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
+       if (ret)
+               goto err_sysblk;
+
+       /* no sysblocks initialized */
+       if (!s.nr_ppas)
+               goto err_sysblk;
+
+       cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
+       if (!cur)
+               goto err_sysblk;
+
+       /* find the latest block across all sysblocks */
+       for (i = 0; i < s.nr_rows; i++) {
+               for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
+                       struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)];
+
+                       ret = nvm_scan_block(dev, &ppa, cur);
+                       if (ret > 0)
+                               found = 1;
+                       else if (ret < 0)
+                               break;
+               }
+       }
+
+       nvm_sysblk_to_cpu(info, cur);
+
+       kfree(cur);
+err_sysblk:
+       mutex_unlock(&dev->mlock);
+
+       if (found)
+               return 1;
+       return ret;
+}
+
+int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new)
+{
+       /* 1. for each latest superblock
+        * 2. if room
+        *    a. write new flash page entry with the updated information
+        * 3. if no room
+        *    a. find next available block on lun (linear search)
+        *       if none, continue to next lun
+        *       if none at all, report error. also report that it wasn't
+        *       possible to write to all superblocks.
+        *    c. write data to block.
+        */
+       struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+       struct sysblk_scan s;
+       struct nvm_system_block *cur;
+       int i, j, ppaidx, found = 0;
+       int ret = -ENOMEM;
+
+       if (!dev->ops->get_bb_tbl)
+               return -EINVAL;
+
+       nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+       mutex_lock(&dev->mlock);
+       ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
+       if (ret)
+               goto err_sysblk;
+
+       cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
+       if (!cur)
+               goto err_sysblk;
+
+       /* Get the latest sysblk for each sysblk row */
+       for (i = 0; i < s.nr_rows; i++) {
+               found = 0;
+               for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
+                       ppaidx = scan_ppa_idx(i, j);
+                       ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur);
+                       if (ret > 0) {
+                               s.act_blk[i] = j;
+                               found = 1;
+                       } else if (ret < 0)
+                               break;
+               }
+       }
+
+       if (!found) {
+               pr_err("nvm: no valid sysblks found to update\n");
+               ret = -EINVAL;
+               goto err_cur;
+       }
+
+       /*
+        * All sysblocks found. Check that they have same page id in their flash
+        * blocks
+        */
+       for (i = 1; i < s.nr_rows; i++) {
+               struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])];
+               struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])];
+
+               if (l.g.pg != r.g.pg) {
+                       pr_err("nvm: sysblks not on same page. Previous update failed.\n");
+                       ret = -EINVAL;
+                       goto err_cur;
+               }
+       }
+
+       /*
+        * Check that there haven't been another update to the seqnr since we
+        * began
+        */
+       if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) {
+               pr_err("nvm: seq is not sequential\n");
+               ret = -EINVAL;
+               goto err_cur;
+       }
+
+       /*
+        * When all pages in a block has been written, a new block is selected
+        * and writing is performed on the new block.
+        */
+       if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg ==
+                                               dev->lps_per_blk - 1) {
+               ret = nvm_prepare_new_sysblks(dev, &s);
+               if (ret)
+                       goto err_cur;
+       }
+
+       ret = nvm_write_and_verify(dev, new, &s);
+err_cur:
+       kfree(cur);
+err_sysblk:
+       mutex_unlock(&dev->mlock);
+
+       return ret;
+}
+
+int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
+{
+       struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+       struct sysblk_scan s;
+       int ret;
+
+       /*
+        * 1. select master blocks and select first available blks
+        * 2. get bad block list
+        * 3. mark MAX_SYSBLKS block as host-based device allocated.
+        * 4. write and verify data to block
+        */
+
+       if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl)
+               return -EINVAL;
+
+       if (!(dev->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
+               pr_err("nvm: memory does not support SLC access\n");
+               return -EINVAL;
+       }
+
+       /* Index all sysblocks and mark them as host-driven */
+       nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+       mutex_lock(&dev->mlock);
+       ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_free_blks);
+       if (ret)
+               goto err_mark;
+
+       ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
+       if (ret)
+               goto err_mark;
+
+       /* Write to the first block of each row */
+       ret = nvm_write_and_verify(dev, info, &s);
+err_mark:
+       mutex_unlock(&dev->mlock);
+       return ret;
+}
+
+struct factory_blks {
+       struct nvm_dev *dev;
+       int flags;
+       unsigned long *blks;
+};
+
+static int factory_nblks(int nblks)
+{
+       /* Round up to nearest BITS_PER_LONG */
+       return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+}
+
+static unsigned int factory_blk_offset(struct nvm_dev *dev, int ch, int lun)
+{
+       int nblks = factory_nblks(dev->blks_per_lun);
+
+       return ((ch * dev->luns_per_chnl * nblks) + (lun * nblks)) /
+                                                               BITS_PER_LONG;
+}
+
+static int nvm_factory_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+                                                               void *private)
+{
+       struct factory_blks *f = private;
+       struct nvm_dev *dev = f->dev;
+       int i, lunoff;
+
+       lunoff = factory_blk_offset(dev, ppa.g.ch, ppa.g.lun);
+
+       /* non-set bits correspond to the block must be erased */
+       for (i = 0; i < nr_blks; i++) {
+               switch (blks[i]) {
+               case NVM_BLK_T_FREE:
+                       if (f->flags & NVM_FACTORY_ERASE_ONLY_USER)
+                               set_bit(i, &f->blks[lunoff]);
+                       break;
+               case NVM_BLK_T_HOST:
+                       if (!(f->flags & NVM_FACTORY_RESET_HOST_BLKS))
+                               set_bit(i, &f->blks[lunoff]);
+                       break;
+               case NVM_BLK_T_GRWN_BAD:
+                       if (!(f->flags & NVM_FACTORY_RESET_GRWN_BBLKS))
+                               set_bit(i, &f->blks[lunoff]);
+                       break;
+               default:
+                       set_bit(i, &f->blks[lunoff]);
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
+                                       int max_ppas, struct factory_blks *f)
+{
+       struct ppa_addr ppa;
+       int ch, lun, blkid, idx, done = 0, ppa_cnt = 0;
+       unsigned long *offset;
+
+       while (!done) {
+               done = 1;
+               for (ch = 0; ch < dev->nr_chnls; ch++) {
+                       for (lun = 0; lun < dev->luns_per_chnl; lun++) {
+                               idx = factory_blk_offset(dev, ch, lun);
+                               offset = &f->blks[idx];
+
+                               blkid = find_first_zero_bit(offset,
+                                                       dev->blks_per_lun);
+                               if (blkid >= dev->blks_per_lun)
+                                       continue;
+                               set_bit(blkid, offset);
+
+                               ppa.ppa = 0;
+                               ppa.g.ch = ch;
+                               ppa.g.lun = lun;
+                               ppa.g.blk = blkid;
+                               pr_debug("nvm: erase ppa (%u %u %u)\n",
+                                                               ppa.g.ch,
+                                                               ppa.g.lun,
+                                                               ppa.g.blk);
+
+                               erase_list[ppa_cnt] = ppa;
+                               ppa_cnt++;
+                               done = 0;
+
+                               if (ppa_cnt == max_ppas)
+                                       return ppa_cnt;
+                       }
+               }
+       }
+
+       return ppa_cnt;
+}
+
+static int nvm_fact_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa,
+                                       nvm_bb_update_fn *fn, void *priv)
+{
+       struct ppa_addr dev_ppa;
+       int ret;
+
+       dev_ppa = generic_to_dev_addr(dev, ppa);
+
+       ret = dev->ops->get_bb_tbl(dev, dev_ppa, dev->blks_per_lun, fn, priv);
+       if (ret)
+               pr_err("nvm: failed bb tbl for ch%u lun%u\n",
+                                                       ppa.g.ch, ppa.g.blk);
+       return ret;
+}
+
+static int nvm_fact_select_blks(struct nvm_dev *dev, struct factory_blks *f)
+{
+       int ch, lun, ret;
+       struct ppa_addr ppa;
+
+       ppa.ppa = 0;
+       for (ch = 0; ch < dev->nr_chnls; ch++) {
+               for (lun = 0; lun < dev->luns_per_chnl; lun++) {
+                       ppa.g.ch = ch;
+                       ppa.g.lun = lun;
+
+                       ret = nvm_fact_get_bb_tbl(dev, ppa, nvm_factory_blks,
+                                                                       f);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+int nvm_dev_factory(struct nvm_dev *dev, int flags)
+{
+       struct factory_blks f;
+       struct ppa_addr *ppas;
+       int ppa_cnt, ret = -ENOMEM;
+       int max_ppas = dev->ops->max_phys_sect / dev->nr_planes;
+       struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+       struct sysblk_scan s;
+
+       f.blks = kzalloc(factory_nblks(dev->blks_per_lun) * dev->nr_luns,
+                                                               GFP_KERNEL);
+       if (!f.blks)
+               return ret;
+
+       ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL);
+       if (!ppas)
+               goto err_blks;
+
+       f.dev = dev;
+       f.flags = flags;
+
+       /* create list of blks to be erased */
+       ret = nvm_fact_select_blks(dev, &f);
+       if (ret)
+               goto err_ppas;
+
+       /* continue to erase until list of blks until empty */
+       while ((ppa_cnt = nvm_fact_get_blks(dev, ppas, max_ppas, &f)) > 0)
+               nvm_erase_ppa(dev, ppas, ppa_cnt);
+
+       /* mark host reserved blocks free */
+       if (flags & NVM_FACTORY_RESET_HOST_BLKS) {
+               nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+               mutex_lock(&dev->mlock);
+               ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas,
+                                                       sysblk_get_host_blks);
+               if (!ret)
+                       ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
+               mutex_unlock(&dev->mlock);
+       }
+err_ppas:
+       kfree(ppas);
+err_blks:
+       kfree(f.blks);
+       return ret;
+}
+EXPORT_SYMBOL(nvm_dev_factory);
index 546d05f4358a7c452ee80366b3ae782e685fd7ef..b2bbe8659beda5ad8182b5f4ff7bbefeda970459 100644 (file)
@@ -81,6 +81,7 @@ config STI_MBOX
 config MAILBOX_TEST
        tristate "Mailbox Test Client"
        depends on OF
+       depends on HAS_IOMEM
        help
          Test client to help with testing new Controller driver
          implementations.
index 45d85aea9955404d0c4567f33863e0e3f500674f..8f779a1ec99c4b248c9721fceef19b579432539b 100644 (file)
@@ -81,16 +81,10 @@ static struct mbox_controller pcc_mbox_ctrl = {};
  */
 static struct mbox_chan *get_pcc_channel(int id)
 {
-       struct mbox_chan *pcc_chan;
-
        if (id < 0 || id > pcc_mbox_ctrl.num_chans)
                return ERR_PTR(-ENOENT);
 
-       pcc_chan = (struct mbox_chan *)
-               (unsigned long) pcc_mbox_channels +
-               (id * sizeof(*pcc_chan));
-
-       return pcc_chan;
+       return &pcc_mbox_channels[id];
 }
 
 /**
index 83392f856dfd5d86e67cc636ab61e0fb0f16750d..22b9e34ceb75c5318a25870bf346a27ba2e64052 100644 (file)
@@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c)
        do {
                ret = btree_root(gc_root, c, &op, &writes, &stats);
                closure_sync(&writes);
+               cond_resched();
 
                if (ret && ret != -EAGAIN)
                        pr_warn("gc failed!");
@@ -2162,8 +2163,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
                rw_lock(true, b, b->level);
 
                if (b->key.ptr[0] != btree_ptr ||
-                   b->seq != seq + 1)
+                   b->seq != seq + 1) {
+                       op->lock = b->level;
                        goto out;
+               }
        }
 
        SET_KEY_PTRS(check_key, 1);
index 679a093a3bf6382a3edc89dc5be96e9019b938ea..8d0ead98eb6edc2c25eafad5f8b4ab0a49e91e2d 100644 (file)
@@ -685,6 +685,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
        WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
             sysfs_create_link(&c->kobj, &d->kobj, d->name),
             "Couldn't create device <-> cache set symlinks");
+
+       clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
 }
 
 static void bcache_device_detach(struct bcache_device *d)
@@ -847,8 +849,11 @@ void bch_cached_dev_run(struct cached_dev *dc)
        buf[SB_LABEL_SIZE] = '\0';
        env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
 
-       if (atomic_xchg(&dc->running, 1))
+       if (atomic_xchg(&dc->running, 1)) {
+               kfree(env[1]);
+               kfree(env[2]);
                return;
+       }
 
        if (!d->c &&
            BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
@@ -1933,6 +1938,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
                        else
                                err = "device busy";
                        mutex_unlock(&bch_register_lock);
+                       if (attr == &ksysfs_register_quiet)
+                               goto out;
                }
                goto err;
        }
@@ -1971,8 +1978,7 @@ out:
 err_close:
        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 err:
-       if (attr != &ksysfs_register_quiet)
-               pr_info("error opening %s: %s", path, err);
+       pr_info("error opening %s: %s", path, err);
        ret = -EINVAL;
        goto out;
 }
@@ -2066,8 +2072,10 @@ static int __init bcache_init(void)
        closure_debug_init();
 
        bcache_major = register_blkdev(0, "bcache");
-       if (bcache_major < 0)
+       if (bcache_major < 0) {
+               unregister_reboot_notifier(&reboot);
                return bcache_major;
+       }
 
        if (!(bcache_wq = create_workqueue("bcache")) ||
            !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
index b23f88d9f18cf1caa863b1b4ed56c37901296ece..b9346cd9cda192bdf9142bfa462dc0ab74383707 100644 (file)
@@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
 
 static bool dirty_pred(struct keybuf *buf, struct bkey *k)
 {
+       struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
+
+       BUG_ON(KEY_INODE(k) != dc->disk.id);
+
        return KEY_DIRTY(k);
 }
 
@@ -372,11 +376,24 @@ next:
        }
 }
 
+/*
+ * Returns true if we scanned the entire disk
+ */
 static bool refill_dirty(struct cached_dev *dc)
 {
        struct keybuf *buf = &dc->writeback_keys;
+       struct bkey start = KEY(dc->disk.id, 0, 0);
        struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0);
-       bool searched_from_start = false;
+       struct bkey start_pos;
+
+       /*
+        * make sure keybuf pos is inside the range for this disk - at bringup
+        * we might not be attached yet so this disk's inode nr isn't
+        * initialized then
+        */
+       if (bkey_cmp(&buf->last_scanned, &start) < 0 ||
+           bkey_cmp(&buf->last_scanned, &end) > 0)
+               buf->last_scanned = start;
 
        if (dc->partial_stripes_expensive) {
                refill_full_stripes(dc);
@@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc)
                        return false;
        }
 
-       if (bkey_cmp(&buf->last_scanned, &end) >= 0) {
-               buf->last_scanned = KEY(dc->disk.id, 0, 0);
-               searched_from_start = true;
-       }
-
+       start_pos = buf->last_scanned;
        bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
 
-       return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start;
+       if (bkey_cmp(&buf->last_scanned, &end) < 0)
+               return false;
+
+       /*
+        * If we get to the end start scanning again from the beginning, and
+        * only scan up to where we initially started scanning from:
+        */
+       buf->last_scanned = start;
+       bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred);
+
+       return bkey_cmp(&buf->last_scanned, &start_pos) >= 0;
 }
 
 static int bch_writeback_thread(void *arg)
index 0a9dab187b79c7ef0a4429c4616a6985d320b964..073a042aed243b2660f6b70a380d0647c709aa65 100644 (file)
@@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
 
 static inline void bch_writeback_queue(struct cached_dev *dc)
 {
-       wake_up_process(dc->writeback_thread);
+       if (!IS_ERR_OR_NULL(dc->writeback_thread))
+               wake_up_process(dc->writeback_thread);
 }
 
 static inline void bch_writeback_add(struct cached_dev *dc)
index 4f22e919787aba1bcab7b8f6799a3c22a6b7ce28..d80cce499a56e595d1f2679170124ca5fc620315 100644 (file)
@@ -210,10 +210,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
        struct block_device *bdev;
        struct mddev *mddev = bitmap->mddev;
        struct bitmap_storage *store = &bitmap->storage;
-       int node_offset = 0;
-
-       if (mddev_is_clustered(bitmap->mddev))
-               node_offset = bitmap->cluster_slot * store->file_pages;
 
        while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
                int size = PAGE_SIZE;
index 5df40480228b7a26e3c73ac78e963ce47ed25448..dd834927bc66e87bf4ec433bc96889068d3bbb82 100644 (file)
@@ -1191,6 +1191,8 @@ static void dm_unprep_request(struct request *rq)
 
        if (clone)
                free_rq_clone(clone);
+       else if (!tio->md->queue->mq_ops)
+               free_rq_tio(tio);
 }
 
 /*
index 4a8e15058e8b56a8a9d89d9f0db50135df44c6d3..685aa2d77e2526935f8f2f416ad8d8681b6c7b14 100644 (file)
@@ -170,7 +170,7 @@ static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
                conf->nfaults = n+1;
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void faulty_make_request(struct mddev *mddev, struct bio *bio)
 {
        struct faulty_conf *conf = mddev->private;
        int failit = 0;
@@ -226,7 +226,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
        generic_make_request(bio);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void faulty_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct faulty_conf *conf = mddev->private;
        int n;
@@ -259,7 +259,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
 }
 
 
-static int reshape(struct mddev *mddev)
+static int faulty_reshape(struct mddev *mddev)
 {
        int mode = mddev->new_layout & ModeMask;
        int count = mddev->new_layout >> ModeShift;
@@ -299,7 +299,7 @@ static sector_t faulty_size(struct mddev *mddev, sector_t sectors, int raid_disk
        return sectors;
 }
 
-static int run(struct mddev *mddev)
+static int faulty_run(struct mddev *mddev)
 {
        struct md_rdev *rdev;
        int i;
@@ -327,7 +327,7 @@ static int run(struct mddev *mddev)
        md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
        mddev->private = conf;
 
-       reshape(mddev);
+       faulty_reshape(mddev);
 
        return 0;
 }
@@ -344,11 +344,11 @@ static struct md_personality faulty_personality =
        .name           = "faulty",
        .level          = LEVEL_FAULTY,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = faulty_make_request,
+       .run            = faulty_run,
        .free           = faulty_free,
-       .status         = status,
-       .check_reshape  = reshape,
+       .status         = faulty_status,
+       .check_reshape  = faulty_reshape,
        .size           = faulty_size,
 };
 
index 0ded8e97751d270dbfdae0aac73a792043f10621..dd97d42458226b4b284b488983e218e01c814889 100644 (file)
@@ -293,6 +293,7 @@ static void recover_bitmaps(struct md_thread *thread)
 dlm_unlock:
                dlm_unlock_sync(bm_lockres);
 clear_bit:
+               lockres_free(bm_lockres);
                clear_bit(slot, &cinfo->recovery_map);
        }
 }
@@ -682,8 +683,10 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
                bm_lockres = lockres_init(mddev, str, NULL, 1);
                if (!bm_lockres)
                        return -ENOMEM;
-               if (i == (cinfo->slot_number - 1))
+               if (i == (cinfo->slot_number - 1)) {
+                       lockres_free(bm_lockres);
                        continue;
+               }
 
                bm_lockres->flags |= DLM_LKF_NOQUEUE;
                ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
@@ -858,6 +861,7 @@ static int leave(struct mddev *mddev)
        lockres_free(cinfo->token_lockres);
        lockres_free(cinfo->ack_lockres);
        lockres_free(cinfo->no_new_dev_lockres);
+       lockres_free(cinfo->resync_lockres);
        lockres_free(cinfo->bitmap_lockres);
        unlock_all_bitmaps(mddev);
        dlm_release_lockspace(cinfo->lockspace, 2);
index c4b9134092260f22939027a0a82945b146a4ebf9..4e3843f7d24592cc596df1fc6d6c4577a9a6f57f 100644 (file)
@@ -1044,7 +1044,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
        kfree(plug);
 }
 
-static void make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 {
        struct r1conf *conf = mddev->private;
        struct raid1_info *mirror;
@@ -1422,7 +1422,7 @@ read_again:
        wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r1conf *conf = mddev->private;
        int i;
@@ -1439,7 +1439,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
        seq_printf(seq, "]");
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r1conf *conf = mddev->private;
@@ -2472,7 +2472,8 @@ static int init_resync(struct r1conf *conf)
  * that can be installed to exclude normal IO requests.
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
+                                  int *skipped)
 {
        struct r1conf *conf = mddev->private;
        struct r1bio *r1_bio;
@@ -2890,7 +2891,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 }
 
 static void raid1_free(struct mddev *mddev, void *priv);
-static int run(struct mddev *mddev)
+static int raid1_run(struct mddev *mddev)
 {
        struct r1conf *conf;
        int i;
@@ -3170,15 +3171,15 @@ static struct md_personality raid1_personality =
        .name           = "raid1",
        .level          = 1,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid1_make_request,
+       .run            = raid1_run,
        .free           = raid1_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid1_status,
+       .error_handler  = raid1_error,
        .hot_add_disk   = raid1_add_disk,
        .hot_remove_disk= raid1_remove_disk,
        .spare_active   = raid1_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid1_sync_request,
        .resize         = raid1_resize,
        .size           = raid1_size,
        .check_reshape  = raid1_reshape,
index ce959b4ae4dfd9a09d33828f5706d3a6aadd7f71..1c1447dd3417cd3e27f7c097a004b5e9b17a10d5 100644 (file)
@@ -1442,7 +1442,7 @@ retry_write:
        one_write_done(r10_bio);
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
        struct r10conf *conf = mddev->private;
        sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
@@ -1484,7 +1484,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
        wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid10_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r10conf *conf = mddev->private;
        int i;
@@ -1562,7 +1562,7 @@ static int enough(struct r10conf *conf, int ignore)
                _enough(conf, 1, ignore);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r10conf *conf = mddev->private;
@@ -2802,7 +2802,7 @@ static int init_resync(struct r10conf *conf)
  *
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
+static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                             int *skipped)
 {
        struct r10conf *conf = mddev->private;
@@ -3523,7 +3523,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
        return ERR_PTR(err);
 }
 
-static int run(struct mddev *mddev)
+static int raid10_run(struct mddev *mddev)
 {
        struct r10conf *conf;
        int i, disk_idx, chunk_size;
@@ -4617,15 +4617,15 @@ static struct md_personality raid10_personality =
        .name           = "raid10",
        .level          = 10,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid10_make_request,
+       .run            = raid10_run,
        .free           = raid10_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid10_status,
+       .error_handler  = raid10_error,
        .hot_add_disk   = raid10_add_disk,
        .hot_remove_disk= raid10_remove_disk,
        .spare_active   = raid10_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid10_sync_request,
        .quiesce        = raid10_quiesce,
        .size           = raid10_size,
        .resize         = raid10_resize,
index a086014dcd49915d5dad8b2c57b9bbfa30b39e5c..b4f02c9959f23c1bb5e8feccb2b9bf6e1c59e862 100644 (file)
@@ -2496,7 +2496,7 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)
        dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r5conf *conf = mddev->private;
@@ -2958,7 +2958,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
         * If several bio share a stripe. The bio bi_phys_segments acts as a
         * reference count to avoid race. The reference count should already be
         * increased before this function is called (for example, in
-        * make_request()), so other bio sharing this stripe will not free the
+        * raid5_make_request()), so other bio sharing this stripe will not free the
         * stripe. If a stripe is owned by one stripe, the stripe lock will
         * protect it.
         */
@@ -5135,7 +5135,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
        }
 }
 
-static void make_request(struct mddev *mddev, struct bio * bi)
+static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 {
        struct r5conf *conf = mddev->private;
        int dd_idx;
@@ -5225,7 +5225,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                new_sector = raid5_compute_sector(conf, logical_sector,
                                                  previous,
                                                  &dd_idx, NULL);
-               pr_debug("raid456: make_request, sector %llu logical %llu\n",
+               pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
                        (unsigned long long)new_sector,
                        (unsigned long long)logical_sector);
 
@@ -5575,7 +5575,8 @@ ret:
        return retn;
 }
 
-static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
+                                         int *skipped)
 {
        struct r5conf *conf = mddev->private;
        struct stripe_head *sh;
@@ -6674,7 +6675,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
        return 0;
 }
 
-static int run(struct mddev *mddev)
+static int raid5_run(struct mddev *mddev)
 {
        struct r5conf *conf;
        int working_disks = 0;
@@ -7048,7 +7049,7 @@ static void raid5_free(struct mddev *mddev, void *priv)
        mddev->to_remove = &raid5_attrs_group;
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid5_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r5conf *conf = mddev->private;
        int i;
@@ -7864,15 +7865,15 @@ static struct md_personality raid6_personality =
        .name           = "raid6",
        .level          = 6,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid6_check_reshape,
@@ -7887,15 +7888,15 @@ static struct md_personality raid5_personality =
        .name           = "raid5",
        .level          = 5,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
@@ -7911,15 +7912,15 @@ static struct md_personality raid4_personality =
        .name           = "raid4",
        .level          = 4,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
index 0e209b56c76c1c487007121e336e0438944acb93..c6abeb4fba9db51827fc815fde920b1a30faa1fa 100644 (file)
@@ -903,9 +903,18 @@ static int tda1004x_get_fe(struct dvb_frontend *fe)
 {
        struct dtv_frontend_properties *fe_params = &fe->dtv_property_cache;
        struct tda1004x_state* state = fe->demodulator_priv;
+       int status;
 
        dprintk("%s\n", __func__);
 
+       status = tda1004x_read_byte(state, TDA1004X_STATUS_CD);
+       if (status == -1)
+               return -EIO;
+
+       /* Only update the properties cache if device is locked */
+       if (!(status & 8))
+               return 0;
+
        // inversion status
        fe_params->inversion = INVERSION_OFF;
        if (tda1004x_read_byte(state, TDA1004X_CONFC1) & 0x20)
index 830491960add2d239d817e46db7c4f0d2dc8d00a..bf82726fd3f44f684ac5677286764255055cc9a7 100644 (file)
@@ -478,7 +478,6 @@ static const struct i2c_device_id ir_kbd_id[] = {
        { "ir_rx_z8f0811_hdpvr", 0 },
        { }
 };
-MODULE_DEVICE_TABLE(i2c, ir_kbd_id);
 
 static struct i2c_driver ir_kbd_driver = {
        .driver = {
index b9e43ffa50859caeb4ce2b8343f660a598d249ff..cbe4711e9b31acfca212a8ef0a96867c03bb45fb 100644 (file)
@@ -144,8 +144,7 @@ static int s5k6a3_set_fmt(struct v4l2_subdev *sd,
        mf = __s5k6a3_get_format(sensor, cfg, fmt->pad, fmt->which);
        if (mf) {
                mutex_lock(&sensor->lock);
-               if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE)
-                       *mf = fmt->format;
+               *mf = fmt->format;
                mutex_unlock(&sensor->lock);
        }
        return 0;
index 1d2c310ce838e5fb415ebd4d3b533ac11b8c4b9b..94f8162444071ca3168af652b0c260acaeeab021 100644 (file)
@@ -1211,6 +1211,8 @@ static int alsa_device_init(struct saa7134_dev *dev)
 
 static int alsa_device_exit(struct saa7134_dev *dev)
 {
+       if (!snd_saa7134_cards[dev->nr])
+               return 1;
 
        snd_card_free(snd_saa7134_cards[dev->nr]);
        snd_saa7134_cards[dev->nr] = NULL;
@@ -1260,7 +1262,8 @@ static void saa7134_alsa_exit(void)
        int idx;
 
        for (idx = 0; idx < SNDRV_CARDS; idx++) {
-               snd_card_free(snd_saa7134_cards[idx]);
+               if (snd_saa7134_cards[idx])
+                       snd_card_free(snd_saa7134_cards[idx]);
        }
 
        saa7134_dmasound_init = NULL;
index 526359447ff90fda699b032e0dbed4d6a06e577e..8b89ebe16d94ace645d7401ece918b83cf92add6 100644 (file)
@@ -215,6 +215,7 @@ config VIDEO_SAMSUNG_EXYNOS_GSC
 config VIDEO_STI_BDISP
        tristate "STMicroelectronics BDISP 2D blitter driver"
        depends on VIDEO_DEV && VIDEO_V4L2
+       depends on HAS_DMA
        depends on ARCH_STI || COMPILE_TEST
        select VIDEOBUF2_DMA_CONTIG
        select V4L2_MEM2MEM_DEV
index 40423c6c5324a8e1cf2573b2acffeb566ad74655..57d42c6172c576757b7724906852c7ec9ac34911 100644 (file)
@@ -1,6 +1,6 @@
 
 config VIDEO_SAMSUNG_EXYNOS4_IS
-       bool "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
+       tristate "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
        depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
        depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
        depends on OF && COMMON_CLK
index 49658ca39e5100d70ed9011eea6adb4cd02c3c55..979c388ebf60cdd1a497aac2570aab05c5e800a6 100644 (file)
@@ -631,6 +631,12 @@ static int fimc_is_hw_open_sensor(struct fimc_is *is,
 
        fimc_is_mem_barrier();
 
+       /*
+        * Some user space use cases hang up here without this
+        * empirically chosen delay.
+        */
+       udelay(100);
+
        mcuctl_write(HIC_OPEN_SENSOR, is, MCUCTL_REG_ISSR(0));
        mcuctl_write(is->sensor_index, is, MCUCTL_REG_ISSR(1));
        mcuctl_write(sensor->drvdata->id, is, MCUCTL_REG_ISSR(2));
index bf9261eb57a15c37a8b82fb3197a94649e4cc9c6..c0816728cbfe1d2be7c6eb5c987a4ac3b8213a74 100644 (file)
@@ -218,8 +218,8 @@ static void isp_video_capture_buffer_queue(struct vb2_buffer *vb)
                                                        ivb->dma_addr[i];
 
                        isp_dbg(2, &video->ve.vdev,
-                               "dma_buf %pad (%d/%d/%d) addr: %pad\n",
-                               &buf_index, ivb->index, i, vb->index,
+                               "dma_buf %d (%d/%d/%d) addr: %pad\n",
+                               buf_index, ivb->index, i, vb->index,
                                &ivb->dma_addr[i]);
                }
 
index f3b2dd30ec7769b3199da9ed2694df5886004616..e79ddbb1e14fc0bd8c1b56504183a927edcdec83 100644 (file)
@@ -185,6 +185,37 @@ error:
        return ret;
 }
 
+/**
+ * __fimc_pipeline_enable - enable power of all pipeline subdevs
+ *                         and the sensor clock
+ * @ep: video pipeline structure
+ * @fmd: fimc media device
+ *
+ * Called with the graph mutex held.
+ */
+static int __fimc_pipeline_enable(struct exynos_media_pipeline *ep,
+                                 struct fimc_md *fmd)
+{
+       struct fimc_pipeline *p = to_fimc_pipeline(ep);
+       int ret;
+
+       /* Enable PXLASYNC clock if this pipeline includes FIMC-IS */
+       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP]) {
+               ret = clk_prepare_enable(fmd->wbclk[CLK_IDX_WB_B]);
+               if (ret < 0)
+                       return ret;
+       }
+
+       ret = fimc_pipeline_s_power(p, 1);
+       if (!ret)
+               return 0;
+
+       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP])
+               clk_disable_unprepare(fmd->wbclk[CLK_IDX_WB_B]);
+
+       return ret;
+}
+
 /**
  * __fimc_pipeline_open - update the pipeline information, enable power
  *                        of all pipeline subdevs and the sensor clock
@@ -199,7 +230,6 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
        struct fimc_md *fmd = entity_to_fimc_mdev(me);
        struct fimc_pipeline *p = to_fimc_pipeline(ep);
        struct v4l2_subdev *sd;
-       int ret;
 
        if (WARN_ON(p == NULL || me == NULL))
                return -EINVAL;
@@ -208,24 +238,16 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
                fimc_pipeline_prepare(p, me);
 
        sd = p->subdevs[IDX_SENSOR];
-       if (sd == NULL)
-               return -EINVAL;
-
-       /* Disable PXLASYNC clock if this pipeline includes FIMC-IS */
-       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP]) {
-               ret = clk_prepare_enable(fmd->wbclk[CLK_IDX_WB_B]);
-               if (ret < 0)
-                       return ret;
-       }
-
-       ret = fimc_pipeline_s_power(p, 1);
-       if (!ret)
+       if (sd == NULL) {
+               pr_warn("%s(): No sensor subdev\n", __func__);
+               /*
+                * Pipeline open cannot fail so as to make it possible
+                * for the user space to configure the pipeline.
+                */
                return 0;
+       }
 
-       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP])
-               clk_disable_unprepare(fmd->wbclk[CLK_IDX_WB_B]);
-
-       return ret;
+       return __fimc_pipeline_enable(ep, fmd);
 }
 
 /**
@@ -269,10 +291,43 @@ static int __fimc_pipeline_s_stream(struct exynos_media_pipeline *ep, bool on)
                { IDX_CSIS, IDX_FLITE, IDX_FIMC, IDX_SENSOR, IDX_IS_ISP },
        };
        struct fimc_pipeline *p = to_fimc_pipeline(ep);
+       struct fimc_md *fmd = entity_to_fimc_mdev(&p->subdevs[IDX_CSIS]->entity);
+       enum fimc_subdev_index sd_id;
        int i, ret = 0;
 
-       if (p->subdevs[IDX_SENSOR] == NULL)
-               return -ENODEV;
+       if (p->subdevs[IDX_SENSOR] == NULL) {
+               if (!fmd->user_subdev_api) {
+                       /*
+                        * Sensor must be already discovered if we
+                        * aren't in the user_subdev_api mode
+                        */
+                       return -ENODEV;
+               }
+
+               /* Get pipeline sink entity */
+               if (p->subdevs[IDX_FIMC])
+                       sd_id = IDX_FIMC;
+               else if (p->subdevs[IDX_IS_ISP])
+                       sd_id = IDX_IS_ISP;
+               else if (p->subdevs[IDX_FLITE])
+                       sd_id = IDX_FLITE;
+               else
+                       return -ENODEV;
+
+               /*
+                * Sensor could have been linked between open and STREAMON -
+                * check if this is the case.
+                */
+               fimc_pipeline_prepare(p, &p->subdevs[sd_id]->entity);
+
+               if (p->subdevs[IDX_SENSOR] == NULL)
+                       return -ENODEV;
+
+               ret = __fimc_pipeline_enable(ep, fmd);
+               if (ret < 0)
+                       return ret;
+
+       }
 
        for (i = 0; i < IDX_MAX; i++) {
                unsigned int idx = seq[on][i];
@@ -282,8 +337,10 @@ static int __fimc_pipeline_s_stream(struct exynos_media_pipeline *ep, bool on)
                if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
                        goto error;
        }
+
        return 0;
 error:
+       fimc_pipeline_s_power(p, !on);
        for (; i >= 0; i--) {
                unsigned int idx = seq[on][i];
                v4l2_subdev_call(p->subdevs[idx], video, s_stream, !on);
index c398b285180cda757ec291bb6cc0c004c3c27de4..1af779ee3c747f31aff5d79d7d3db66d65a41b03 100644 (file)
@@ -795,7 +795,7 @@ static int isi_camera_get_formats(struct soc_camera_device *icd,
                        xlate->host_fmt = &isi_camera_formats[i];
                        xlate->code     = code.code;
                        dev_dbg(icd->parent, "Providing format %s using code %d\n",
-                               isi_camera_formats[0].name, code.code);
+                               xlate->host_fmt->name, xlate->code);
                }
                break;
        default:
index cc84c6d6a701ce249722010ed514c1e4a4630def..46c7186f78679a5ca09a01d8709cbb77cf1332bc 100644 (file)
@@ -1493,6 +1493,8 @@ static void soc_camera_async_unbind(struct v4l2_async_notifier *notifier,
                                        struct soc_camera_async_client, notifier);
        struct soc_camera_device *icd = platform_get_drvdata(sasc->pdev);
 
+       icd->control = NULL;
+
        if (icd->clk) {
                v4l2_clk_unregister(icd->clk);
                icd->clk = NULL;
index 42dff9d020afaf66c70e528b58d2f005a53203f3..533bc796391ed3053a5f21c92e94eba92cdc46a4 100644 (file)
@@ -256,7 +256,7 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
 
        /* Create links. */
        list_for_each_entry(entity, &vsp1->entities, list_dev) {
-               if (entity->type == VSP1_ENTITY_LIF) {
+               if (entity->type == VSP1_ENTITY_WPF) {
                        ret = vsp1_wpf_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
@@ -264,7 +264,10 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
                        ret = vsp1_rpf_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
-               } else {
+               }
+
+               if (entity->type != VSP1_ENTITY_LIF &&
+                   entity->type != VSP1_ENTITY_RPF) {
                        ret = vsp1_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
index 637d0d6f79fba5fb511b6fba37fb24875d913dbd..b4dca57d1ae3f6d78a3417251089939a7da15f77 100644 (file)
@@ -515,7 +515,7 @@ static bool vsp1_pipeline_stopped(struct vsp1_pipeline *pipe)
        bool stopped;
 
        spin_lock_irqsave(&pipe->irqlock, flags);
-       stopped = pipe->state == VSP1_PIPELINE_STOPPED,
+       stopped = pipe->state == VSP1_PIPELINE_STOPPED;
        spin_unlock_irqrestore(&pipe->irqlock, flags);
 
        return stopped;
index c5d49d7a0d76d09c1ac5598a21ce7726d03190c1..ff8953ae52d142e654f30548d5ddf7bf08c45690 100644 (file)
@@ -1063,8 +1063,11 @@ EXPORT_SYMBOL_GPL(vb2_discard_done);
  */
 static int __qbuf_mmap(struct vb2_buffer *vb, const void *pb)
 {
-       int ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
-                       vb, pb, vb->planes);
+       int ret = 0;
+
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, vb->planes);
        return ret ? ret : call_vb_qop(vb, buf_prepare, vb);
 }
 
@@ -1077,14 +1080,16 @@ static int __qbuf_userptr(struct vb2_buffer *vb, const void *pb)
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
-       int ret;
+       int ret = 0;
        enum dma_data_direction dma_dir =
                q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
        bool reacquired = vb->planes[0].mem_priv == NULL;
 
        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
-       ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, planes);
        if (ret)
                return ret;
 
@@ -1192,14 +1197,16 @@ static int __qbuf_dmabuf(struct vb2_buffer *vb, const void *pb)
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
-       int ret;
+       int ret = 0;
        enum dma_data_direction dma_dir =
                q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
        bool reacquired = vb->planes[0].mem_priv == NULL;
 
        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
-       ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, planes);
        if (ret)
                return ret;
 
@@ -1520,7 +1527,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
        q->waiting_for_buffers = false;
        vb->state = VB2_BUF_STATE_QUEUED;
 
-       call_void_bufop(q, copy_timestamp, vb, pb);
+       if (pb)
+               call_void_bufop(q, copy_timestamp, vb, pb);
 
        trace_vb2_qbuf(q, vb);
 
@@ -1532,7 +1540,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
                __enqueue_in_driver(vb);
 
        /* Fill buffer information for the userspace */
-       call_void_bufop(q, fill_user_buffer, vb, pb);
+       if (pb)
+               call_void_bufop(q, fill_user_buffer, vb, pb);
 
        /*
         * If streamon has been called, and we haven't yet called
@@ -1731,7 +1740,8 @@ static void __vb2_dqbuf(struct vb2_buffer *vb)
  * The return values from this function are intended to be directly returned
  * from vidioc_dqbuf handler in driver.
  */
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+                  bool nonblocking)
 {
        struct vb2_buffer *vb = NULL;
        int ret;
@@ -1754,8 +1764,12 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
 
        call_void_vb_qop(vb, buf_finish, vb);
 
+       if (pindex)
+               *pindex = vb->index;
+
        /* Fill buffer information for the userspace */
-       call_void_bufop(q, fill_user_buffer, vb, pb);
+       if (pb)
+               call_void_bufop(q, fill_user_buffer, vb, pb);
 
        /* Remove from videobuf queue */
        list_del(&vb->queued_entry);
@@ -1828,7 +1842,7 @@ static void __vb2_queue_cancel(struct vb2_queue *q)
         * that's done in dqbuf, but that's not going to happen when we
         * cancel the whole queue. Note: this code belongs here, not in
         * __vb2_dqbuf() since in vb2_internal_dqbuf() there is a critical
-        * call to __fill_v4l2_buffer() after buf_finish(). That order can't
+        * call to __fill_user_buffer() after buf_finish(). That order can't
         * be changed, so we can't move the buf_finish() to __vb2_dqbuf().
         */
        for (i = 0; i < q->num_buffers; ++i) {
@@ -2357,7 +2371,6 @@ struct vb2_fileio_data {
        unsigned int count;
        unsigned int type;
        unsigned int memory;
-       struct vb2_buffer *b;
        struct vb2_fileio_buf bufs[VB2_MAX_FRAME];
        unsigned int cur_index;
        unsigned int initial_index;
@@ -2410,12 +2423,6 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
        if (fileio == NULL)
                return -ENOMEM;
 
-       fileio->b = kzalloc(q->buf_struct_size, GFP_KERNEL);
-       if (fileio->b == NULL) {
-               kfree(fileio);
-               return -ENOMEM;
-       }
-
        fileio->read_once = q->fileio_read_once;
        fileio->write_immediately = q->fileio_write_immediately;
 
@@ -2460,13 +2467,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
                 * Queue all buffers.
                 */
                for (i = 0; i < q->num_buffers; i++) {
-                       struct vb2_buffer *b = fileio->b;
-
-                       memset(b, 0, q->buf_struct_size);
-                       b->type = q->type;
-                       b->memory = q->memory;
-                       b->index = i;
-                       ret = vb2_core_qbuf(q, i, b);
+                       ret = vb2_core_qbuf(q, i, NULL);
                        if (ret)
                                goto err_reqbufs;
                        fileio->bufs[i].queued = 1;
@@ -2511,7 +2512,6 @@ static int __vb2_cleanup_fileio(struct vb2_queue *q)
                q->fileio = NULL;
                fileio->count = 0;
                vb2_core_reqbufs(q, fileio->memory, &fileio->count);
-               kfree(fileio->b);
                kfree(fileio);
                dprintk(3, "file io emulator closed\n");
        }
@@ -2539,7 +2539,8 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         * else is able to provide this information with the write() operation.
         */
        bool copy_timestamp = !read && q->copy_timestamp;
-       int ret, index;
+       unsigned index;
+       int ret;
 
        dprintk(3, "mode %s, offset %ld, count %zd, %sblocking\n",
                read ? "read" : "write", (long)*ppos, count,
@@ -2564,22 +2565,20 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         */
        index = fileio->cur_index;
        if (index >= q->num_buffers) {
-               struct vb2_buffer *b = fileio->b;
+               struct vb2_buffer *b;
 
                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
-               ret = vb2_core_dqbuf(q, b, nonblock);
+               ret = vb2_core_dqbuf(q, &index, NULL, nonblock);
                dprintk(5, "vb2_dqbuf result: %d\n", ret);
                if (ret)
                        return ret;
                fileio->dq_count += 1;
 
-               fileio->cur_index = index = b->index;
+               fileio->cur_index = index;
                buf = &fileio->bufs[index];
+               b = q->bufs[index];
 
                /*
                 * Get number of bytes filled by the driver
@@ -2630,7 +2629,7 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         * Queue next buffer if required.
         */
        if (buf->pos == buf->size || (!read && fileio->write_immediately)) {
-               struct vb2_buffer *b = fileio->b;
+               struct vb2_buffer *b = q->bufs[index];
 
                /*
                 * Check if this is the last buffer to read.
@@ -2643,15 +2642,11 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
                /*
                 * Call vb2_qbuf and give buffer to the driver.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
-               b->index = index;
                b->planes[0].bytesused = buf->pos;
 
                if (copy_timestamp)
                        b->timestamp = ktime_get_ns();
-               ret = vb2_core_qbuf(q, index, b);
+               ret = vb2_core_qbuf(q, index, NULL);
                dprintk(5, "vb2_dbuf result: %d\n", ret);
                if (ret)
                        return ret;
@@ -2713,10 +2708,9 @@ static int vb2_thread(void *data)
 {
        struct vb2_queue *q = data;
        struct vb2_threadio_data *threadio = q->threadio;
-       struct vb2_fileio_data *fileio = q->fileio;
        bool copy_timestamp = false;
-       int prequeue = 0;
-       int index = 0;
+       unsigned prequeue = 0;
+       unsigned index = 0;
        int ret = 0;
 
        if (q->is_output) {
@@ -2728,37 +2722,34 @@ static int vb2_thread(void *data)
 
        for (;;) {
                struct vb2_buffer *vb;
-               struct vb2_buffer *b = fileio->b;
 
                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
                if (prequeue) {
-                       b->index = index++;
+                       vb = q->bufs[index++];
                        prequeue--;
                } else {
                        call_void_qop(q, wait_finish, q);
                        if (!threadio->stop)
-                               ret = vb2_core_dqbuf(q, b, 0);
+                               ret = vb2_core_dqbuf(q, &index, NULL, 0);
                        call_void_qop(q, wait_prepare, q);
                        dprintk(5, "file io: vb2_dqbuf result: %d\n", ret);
+                       if (!ret)
+                               vb = q->bufs[index];
                }
                if (ret || threadio->stop)
                        break;
                try_to_freeze();
 
-               vb = q->bufs[b->index];
-               if (b->state == VB2_BUF_STATE_DONE)
+               if (vb->state != VB2_BUF_STATE_ERROR)
                        if (threadio->fnc(vb, threadio->priv))
                                break;
                call_void_qop(q, wait_finish, q);
                if (copy_timestamp)
-                       b->timestamp = ktime_get_ns();;
+                       vb->timestamp = ktime_get_ns();;
                if (!threadio->stop)
-                       ret = vb2_core_qbuf(q, b->index, b);
+                       ret = vb2_core_qbuf(q, vb->index, NULL);
                call_void_qop(q, wait_prepare, q);
                if (ret || threadio->stop)
                        break;
index c9a28605511a71166af35a4ef11fb95ff0eb8fbd..91f552124050d2b3b91d2190af16fbdf2f9d7243 100644 (file)
@@ -625,7 +625,7 @@ static int vb2_internal_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b,
                return -EINVAL;
        }
 
-       ret = vb2_core_dqbuf(q, b, nonblocking);
+       ret = vb2_core_dqbuf(q, NULL, b, nonblocking);
 
        return ret;
 }
index e6e4bacb09ee5d2636576c3363c97a1a5c5f72c8..12099b09a9a71ee5f49e61608b24734e42ca58db 100644 (file)
@@ -2048,6 +2048,7 @@ int db8500_prcmu_config_hotmon(u8 low, u8 high)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(db8500_prcmu_config_hotmon);
 
 static int config_hot_period(u16 val)
 {
@@ -2074,11 +2075,13 @@ int db8500_prcmu_start_temp_sense(u16 cycles32k)
 
        return config_hot_period(cycles32k);
 }
+EXPORT_SYMBOL_GPL(db8500_prcmu_start_temp_sense);
 
 int db8500_prcmu_stop_temp_sense(void)
 {
        return config_hot_period(0xFFFF);
 }
+EXPORT_SYMBOL_GPL(db8500_prcmu_stop_temp_sense);
 
 static int prcmu_a9wdog(u8 cmd, u8 d0, u8 d1, u8 d2, u8 d3)
 {
index 677d0362f334e842abb2c8c2439260978249b871..80f9afcb13823282a9859e08a96c36f55901efa6 100644 (file)
@@ -458,7 +458,11 @@ static int mei_ioctl_client_notify_request(struct file *file, u32 request)
 {
        struct mei_cl *cl = file->private_data;
 
-       return mei_cl_notify_request(cl, file, request);
+       if (request != MEI_HBM_NOTIFICATION_START &&
+           request != MEI_HBM_NOTIFICATION_STOP)
+               return -EINVAL;
+
+       return mei_cl_notify_request(cl, file, (u8)request);
 }
 
 /**
index 5914263090fc81447e26130baab143802504c088..fe207e5420324af3db9411902bcbedeb19e6f60f 100644 (file)
 #include "queue.h"
 
 MODULE_ALIAS("mmc:block");
-
-#ifdef KERNEL
 #ifdef MODULE_PARAM_PREFIX
 #undef MODULE_PARAM_PREFIX
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
-#endif
 
 #define INAND_CMD38_ARG_EXT_CSD  113
 #define INAND_CMD38_ARG_ERASE    0x00
@@ -655,8 +652,10 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
        }
 
        md = mmc_blk_get(bdev->bd_disk);
-       if (!md)
+       if (!md) {
+               err = -EINVAL;
                goto cmd_err;
+       }
 
        card = md->queue.card;
        if (IS_ERR(card)) {
index 154aced0b91b63e33cb95a9ba1a82e386395392d..65cc0ac9b82d2b9e1eb928131c7a1dea074f2d02 100644 (file)
@@ -170,7 +170,7 @@ static int mmc_ios_show(struct seq_file *s, void *data)
                str = "invalid";
                break;
        }
-       seq_printf(s, "signal voltage:\t%u (%s)\n", ios->chip_select, str);
+       seq_printf(s, "signal voltage:\t%u (%s)\n", ios->signal_voltage, str);
 
        switch (ios->drv_type) {
        case MMC_SET_DRIVER_TYPE_A:
index 2b16263458af001723636f7770bbf4906b16b07c..aba786daebcadd9e193ffabe71e3a0124dd8ec62 100644 (file)
@@ -29,15 +29,18 @@ struct mmc_pwrseq_simple {
 static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
                                              int value)
 {
-       int i;
        struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
-       int values[reset_gpios->ndescs];
 
-       for (i = 0; i < reset_gpios->ndescs; i++)
-               values[i] = value;
+       if (!IS_ERR(reset_gpios)) {
+               int i;
+               int values[reset_gpios->ndescs];
 
-       gpiod_set_array_value_cansleep(reset_gpios->ndescs, reset_gpios->desc,
-                                      values);
+               for (i = 0; i < reset_gpios->ndescs; i++)
+                       values[i] = value;
+
+               gpiod_set_array_value_cansleep(
+                       reset_gpios->ndescs, reset_gpios->desc, values);
+       }
 }
 
 static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host)
@@ -79,7 +82,8 @@ static void mmc_pwrseq_simple_free(struct mmc_host *host)
        struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
                                        struct mmc_pwrseq_simple, pwrseq);
 
-       gpiod_put_array(pwrseq->reset_gpios);
+       if (!IS_ERR(pwrseq->reset_gpios))
+               gpiod_put_array(pwrseq->reset_gpios);
 
        if (!IS_ERR(pwrseq->ext_clk))
                clk_put(pwrseq->ext_clk);
@@ -112,7 +116,9 @@ struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
        }
 
        pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(pwrseq->reset_gpios)) {
+       if (IS_ERR(pwrseq->reset_gpios) &&
+           PTR_ERR(pwrseq->reset_gpios) != -ENOENT &&
+           PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) {
                ret = PTR_ERR(pwrseq->reset_gpios);
                goto clk_put;
        }
index f2b164b214ae289dd313a0945cbbd5dd7e1dc532..bb39a29b2db68d72c5e34d7e408cce0105c110a7 100644 (file)
@@ -329,6 +329,7 @@ static int mmc_read_switch(struct mmc_card *card)
                card->sw_caps.sd3_bus_mode = status[13];
                /* Driver Strengths supported by the card */
                card->sw_caps.sd3_drv_type = status[9];
+               card->sw_caps.sd3_curr_limit = status[7] | status[6] << 8;
        }
 
 out:
@@ -545,14 +546,25 @@ static int sd_set_current_limit(struct mmc_card *card, u8 *status)
         * when we set current limit to 200ma, the card will draw 200ma, and
         * when we set current limit to 400/600/800ma, the card will draw its
         * maximum 300ma from the host.
+        *
+        * The above is incorrect: if we try to set a current limit that is
+        * not supported by the card, the card can rightfully error out the
+        * attempt, and remain at the default current limit.  This results
+        * in a 300mA card being limited to 200mA even though the host
+        * supports 800mA. Failures seen with SanDisk 8GB UHS cards with
+        * an iMX6 host. --rmk
         */
-       if (max_current >= 800)
+       if (max_current >= 800 &&
+           card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800)
                current_limit = SD_SET_CURRENT_LIMIT_800;
-       else if (max_current >= 600)
+       else if (max_current >= 600 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600)
                current_limit = SD_SET_CURRENT_LIMIT_600;
-       else if (max_current >= 400)
+       else if (max_current >= 400 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400)
                current_limit = SD_SET_CURRENT_LIMIT_400;
-       else if (max_current >= 200)
+       else if (max_current >= 200 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200)
                current_limit = SD_SET_CURRENT_LIMIT_200;
 
        if (current_limit != SD_SET_CURRENT_NO_CHANGE) {
@@ -626,9 +638,9 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
         * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
         */
        if (!mmc_host_is_spi(card->host) &&
-               (card->sd_bus_speed == UHS_SDR50_BUS_SPEED ||
-                card->sd_bus_speed == UHS_DDR50_BUS_SPEED ||
-                card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) {
+               (card->host->ios.timing == MMC_TIMING_UHS_SDR50 ||
+                card->host->ios.timing == MMC_TIMING_UHS_DDR50 ||
+                card->host->ios.timing == MMC_TIMING_UHS_SDR104)) {
                err = mmc_execute_tuning(card);
 
                /*
@@ -638,7 +650,7 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
                 * difference between v3.00 and 3.01 spec means that CMD19
                 * tuning is also available for DDR50 mode.
                 */
-               if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) {
+               if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) {
                        pr_warn("%s: ddr50 tuning failed\n",
                                mmc_hostname(card->host));
                        err = 0;
index d61ba1a0495ebe828ba8c110da3d9053b506079b..467b3cf80c44549c704080d78540c46599eb6882 100644 (file)
@@ -535,8 +535,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card)
         * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
         */
        if (!mmc_host_is_spi(card->host) &&
-           ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) ||
-            (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)))
+           ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) ||
+             (card->host->ios.timing == MMC_TIMING_UHS_SDR104)))
                err = mmc_execute_tuning(card);
 out:
        return err;
index 8e94e555b788d4bc56364c488f1af0894e4db50a..6f6fc527a263384817316afe478803634429633a 100644 (file)
@@ -223,6 +223,7 @@ static const struct cis_tpl cis_tpl_list[] = {
        {       0x20,   4,      cistpl_manfid           },
        {       0x21,   2,      /* cistpl_funcid */     },
        {       0x22,   0,      cistpl_funce            },
+       {       0x91,   2,      /* cistpl_sdio_std */   },
 };
 
 static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func)
index 1c1b45ef3faf847d4087d61904ed18c1a6f7ce5e..3446097a43c01ca6cf7f8e163cc75dbbb3911f3f 100644 (file)
@@ -925,6 +925,10 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
 
                        dma_addr = dma_map_page(dma_dev, sg_page(sg), 0,
                                                PAGE_SIZE, dir);
+                       if (dma_mapping_error(dma_dev, dma_addr)) {
+                               data->error = -EFAULT;
+                               break;
+                       }
                        if (direction == DMA_TO_DEVICE)
                                t->tx_dma = dma_addr + sg->offset;
                        else
@@ -1393,10 +1397,12 @@ static int mmc_spi_probe(struct spi_device *spi)
                host->dma_dev = dev;
                host->ones_dma = dma_map_single(dev, ones,
                                MMC_SPI_BLOCKSIZE, DMA_TO_DEVICE);
+               if (dma_mapping_error(dev, host->ones_dma))
+                       goto fail_ones_dma;
                host->data_dma = dma_map_single(dev, host->data,
                                sizeof(*host->data), DMA_BIDIRECTIONAL);
-
-               /* REVISIT in theory those map operations can fail... */
+               if (dma_mapping_error(dev, host->data_dma))
+                       goto fail_data_dma;
 
                dma_sync_single_for_cpu(host->dma_dev,
                                host->data_dma, sizeof(*host->data),
@@ -1462,6 +1468,11 @@ fail_glue_init:
        if (host->dma_dev)
                dma_unmap_single(host->dma_dev, host->data_dma,
                                sizeof(*host->data), DMA_BIDIRECTIONAL);
+fail_data_dma:
+       if (host->dma_dev)
+               dma_unmap_single(host->dma_dev, host->ones_dma,
+                               MMC_SPI_BLOCKSIZE, DMA_TO_DEVICE);
+fail_ones_dma:
        kfree(host->data);
 
 fail_nobuf1:
index fb266745f8240d603956b8b791370ac4764f5a23..0d6ca4116f3dca7fe3170e53809e5e300193cca0 100644 (file)
@@ -151,6 +151,7 @@ static struct variant_data variant_nomadik = {
        .fifosize               = 16 * 4,
        .fifohalfsize           = 8 * 4,
        .clkreg                 = MCI_CLK_ENABLE,
+       .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
        .datalength_bits        = 24,
        .datactrl_mask_sdio     = MCI_ST_DPSM_SDIOEN,
        .st_sdio                = true,
@@ -1886,7 +1887,7 @@ static struct amba_id mmci_ids[] = {
        {
                .id     = 0x00280180,
                .mask   = 0x00ffffff,
-               .data   = &variant_u300,
+               .data   = &variant_nomadik,
        },
        {
                .id     = 0x00480180,
index b6639ea0bf18dbd37ccf0d639073119353b418b6..f6e4d97180359c32066ffa36d1ae99fd0f612d3d 100644 (file)
@@ -2232,6 +2232,7 @@ err_irq:
                dma_release_channel(host->tx_chan);
        if (host->rx_chan)
                dma_release_channel(host->rx_chan);
+       pm_runtime_dont_use_autosuspend(host->dev);
        pm_runtime_put_sync(host->dev);
        pm_runtime_disable(host->dev);
        if (host->dbclk)
@@ -2253,6 +2254,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
        dma_release_channel(host->tx_chan);
        dma_release_channel(host->rx_chan);
 
+       pm_runtime_dont_use_autosuspend(host->dev);
        pm_runtime_put_sync(host->dev);
        pm_runtime_disable(host->dev);
        device_init_wakeup(&pdev->dev, false);
index ce08896b9d696b00440fe7807aeddf72b922f320..da824772bbb4a37e29c33cb6c44a1be38b10c3a5 100644 (file)
@@ -86,7 +86,7 @@ struct pxamci_host {
 static inline void pxamci_init_ocr(struct pxamci_host *host)
 {
 #ifdef CONFIG_REGULATOR
-       host->vcc = regulator_get_optional(mmc_dev(host->mmc), "vmmc");
+       host->vcc = devm_regulator_get_optional(mmc_dev(host->mmc), "vmmc");
 
        if (IS_ERR(host->vcc))
                host->vcc = NULL;
@@ -654,12 +654,8 @@ static int pxamci_probe(struct platform_device *pdev)
 
        r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        irq = platform_get_irq(pdev, 0);
-       if (!r || irq < 0)
-               return -ENXIO;
-
-       r = request_mem_region(r->start, SZ_4K, DRIVER_NAME);
-       if (!r)
-               return -EBUSY;
+       if (irq < 0)
+               return irq;
 
        mmc = mmc_alloc_host(sizeof(struct pxamci_host), &pdev->dev);
        if (!mmc) {
@@ -695,7 +691,7 @@ static int pxamci_probe(struct platform_device *pdev)
        host->pdata = pdev->dev.platform_data;
        host->clkrt = CLKRT_OFF;
 
-       host->clk = clk_get(&pdev->dev, NULL);
+       host->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(host->clk)) {
                ret = PTR_ERR(host->clk);
                host->clk = NULL;
@@ -727,9 +723,9 @@ static int pxamci_probe(struct platform_device *pdev)
        host->irq = irq;
        host->imask = MMC_I_MASK_ALL;
 
-       host->base = ioremap(r->start, SZ_4K);
-       if (!host->base) {
-               ret = -ENOMEM;
+       host->base = devm_ioremap_resource(&pdev->dev, r);
+       if (IS_ERR(host->base)) {
+               ret = PTR_ERR(host->base);
                goto out;
        }
 
@@ -742,7 +738,8 @@ static int pxamci_probe(struct platform_device *pdev)
        writel(64, host->base + MMC_RESTO);
        writel(host->imask, host->base + MMC_I_MASK);
 
-       ret = request_irq(host->irq, pxamci_irq, 0, DRIVER_NAME, host);
+       ret = devm_request_irq(&pdev->dev, host->irq, pxamci_irq, 0,
+                              DRIVER_NAME, host);
        if (ret)
                goto out;
 
@@ -804,7 +801,7 @@ static int pxamci_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro);
                goto out;
        } else {
-               mmc->caps |= host->pdata->gpio_card_ro_invert ?
+               mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
                        0 : MMC_CAP2_RO_ACTIVE_HIGH;
        }
 
@@ -833,14 +830,9 @@ out:
                        dma_release_channel(host->dma_chan_rx);
                if (host->dma_chan_tx)
                        dma_release_channel(host->dma_chan_tx);
-               if (host->base)
-                       iounmap(host->base);
-               if (host->clk)
-                       clk_put(host->clk);
        }
        if (mmc)
                mmc_free_host(mmc);
-       release_resource(r);
        return ret;
 }
 
@@ -859,9 +851,6 @@ static int pxamci_remove(struct platform_device *pdev)
                        gpio_ro = host->pdata->gpio_card_ro;
                        gpio_power = host->pdata->gpio_power;
                }
-               if (host->vcc)
-                       regulator_put(host->vcc);
-
                if (host->pdata && host->pdata->exit)
                        host->pdata->exit(&pdev->dev, mmc);
 
@@ -870,16 +859,10 @@ static int pxamci_remove(struct platform_device *pdev)
                       END_CMD_RES|PRG_DONE|DATA_TRAN_DONE,
                       host->base + MMC_I_MASK);
 
-               free_irq(host->irq, host);
                dmaengine_terminate_all(host->dma_chan_rx);
                dmaengine_terminate_all(host->dma_chan_tx);
                dma_release_channel(host->dma_chan_rx);
                dma_release_channel(host->dma_chan_tx);
-               iounmap(host->base);
-
-               clk_put(host->clk);
-
-               release_resource(host->res);
 
                mmc_free_host(mmc);
        }
index f6047fc9406204d6ee672b74544d0ec6866308cd..a5cda926d38eb23be20eece31c6b1ef0c44723a3 100644 (file)
@@ -146,6 +146,33 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
        .ops = &sdhci_acpi_ops_int,
 };
 
+static int bxt_get_cd(struct mmc_host *mmc)
+{
+       int gpio_cd = mmc_gpio_get_cd(mmc);
+       struct sdhci_host *host = mmc_priv(mmc);
+       unsigned long flags;
+       int ret = 0;
+
+       if (!gpio_cd)
+               return 0;
+
+       pm_runtime_get_sync(mmc->parent);
+
+       spin_lock_irqsave(&host->lock, flags);
+
+       if (host->flags & SDHCI_DEVICE_DEAD)
+               goto out;
+
+       ret = !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
+out:
+       spin_unlock_irqrestore(&host->lock, flags);
+
+       pm_runtime_mark_last_busy(mmc->parent);
+       pm_runtime_put_autosuspend(mmc->parent);
+
+       return ret;
+}
+
 static int sdhci_acpi_emmc_probe_slot(struct platform_device *pdev,
                                      const char *hid, const char *uid)
 {
@@ -196,6 +223,9 @@ static int sdhci_acpi_sd_probe_slot(struct platform_device *pdev,
 
        /* Platform specific code during sd probe slot goes here */
 
+       if (hid && !strcmp(hid, "80865ACA"))
+               host->mmc_host_ops.get_cd = bxt_get_cd;
+
        return 0;
 }
 
index 7e7d8f0c9438fe4ac41bfb1f1759ff4fde776089..9cb86fb25976f380530fe0029015bdfaf408d743 100644 (file)
@@ -217,6 +217,7 @@ static int sdhci_at91_probe(struct platform_device *pdev)
 pm_runtime_disable:
        pm_runtime_disable(&pdev->dev);
        pm_runtime_set_suspended(&pdev->dev);
+       pm_runtime_put_noidle(&pdev->dev);
 clocks_disable_unprepare:
        clk_disable_unprepare(priv->gck);
        clk_disable_unprepare(priv->mainck);
index cc851b065d0ae685d5297f7e4b8ac7a40b3e3465..df3b8eced8c4a50a727774404a066086d4ebe0f7 100644 (file)
@@ -330,6 +330,33 @@ static void spt_read_drive_strength(struct sdhci_host *host)
        sdhci_pci_spt_drive_strength = 0x10 | ((val >> 12) & 0xf);
 }
 
+static int bxt_get_cd(struct mmc_host *mmc)
+{
+       int gpio_cd = mmc_gpio_get_cd(mmc);
+       struct sdhci_host *host = mmc_priv(mmc);
+       unsigned long flags;
+       int ret = 0;
+
+       if (!gpio_cd)
+               return 0;
+
+       pm_runtime_get_sync(mmc->parent);
+
+       spin_lock_irqsave(&host->lock, flags);
+
+       if (host->flags & SDHCI_DEVICE_DEAD)
+               goto out;
+
+       ret = !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
+out:
+       spin_unlock_irqrestore(&host->lock, flags);
+
+       pm_runtime_mark_last_busy(mmc->parent);
+       pm_runtime_put_autosuspend(mmc->parent);
+
+       return ret;
+}
+
 static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
 {
        slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
@@ -362,6 +389,10 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
        slot->cd_con_id = NULL;
        slot->cd_idx = 0;
        slot->cd_override_level = true;
+       if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD ||
+           slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD)
+               slot->host->mmc_host_ops.get_cd = bxt_get_cd;
+
        return 0;
 }
 
index d622435d1bcc71e47e54cdb42cb847660b009a55..add9fdfd1d8feff619f70d58d581b877a668a88c 100644 (file)
@@ -1360,7 +1360,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
        sdhci_runtime_pm_get(host);
 
        /* Firstly check card presence */
-       present = sdhci_do_get_cd(host);
+       present = mmc->ops->get_cd(mmc);
 
        spin_lock_irqsave(&host->lock, flags);
 
@@ -2849,6 +2849,8 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev,
 
        host = mmc_priv(mmc);
        host->mmc = mmc;
+       host->mmc_host_ops = sdhci_ops;
+       mmc->ops = &host->mmc_host_ops;
 
        return host;
 }
@@ -3037,7 +3039,6 @@ int sdhci_add_host(struct sdhci_host *host)
        /*
         * Set host parameters.
         */
-       mmc->ops = &sdhci_ops;
        max_clk = host->max_clk;
 
        if (host->ops->get_min_clock)
index 7654ae5d2b4e11b8b1814fa99b514223d4475d50..0115e9907bf8243aa394de60ac959e5876d5a3a9 100644 (file)
@@ -430,6 +430,7 @@ struct sdhci_host {
 
        /* Internal data */
        struct mmc_host *mmc;   /* MMC structure */
+       struct mmc_host_ops mmc_host_ops;       /* MMC host ops */
        u64 dma_mask;           /* custom DMA mask */
 
 #if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
index 1ca8a1359cbc1efd6a35cefb596c1c4534c77920..6234eab38ff3efe8b22b8386f4d635496dad0d08 100644 (file)
@@ -445,7 +445,7 @@ static void sh_mmcif_request_dma(struct sh_mmcif_host *host)
                                                        pdata->slave_id_rx);
        } else {
                host->chan_tx = dma_request_slave_channel(dev, "tx");
-               host->chan_tx = dma_request_slave_channel(dev, "rx");
+               host->chan_rx = dma_request_slave_channel(dev, "rx");
        }
        dev_dbg(dev, "%s: got channel TX %p RX %p\n", __func__, host->chan_tx,
                host->chan_rx);
index e4b05dbb9ca822f003f566d07a60c508721bc68c..4a0d6b80eaa374e7749081c7f966ce85379ad3e2 100644 (file)
@@ -94,9 +94,9 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
                        desc = NULL;
                        ret = cookie;
                }
+               dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+                       __func__, host->sg_len, ret, cookie, host->mrq);
        }
-       dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-               __func__, host->sg_len, ret, cookie, host->mrq);
 
 pio:
        if (!desc) {
@@ -116,8 +116,8 @@ pio:
                         "DMA failed: %d, falling back to PIO\n", ret);
        }
 
-       dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
-               desc, cookie, host->sg_len);
+       dev_dbg(&host->pdev->dev, "%s(): desc %p, sg[%d]\n", __func__,
+               desc, host->sg_len);
 }
 
 static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
@@ -174,9 +174,9 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
                        desc = NULL;
                        ret = cookie;
                }
+               dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+                       __func__, host->sg_len, ret, cookie, host->mrq);
        }
-       dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-               __func__, host->sg_len, ret, cookie, host->mrq);
 
 pio:
        if (!desc) {
@@ -196,8 +196,7 @@ pio:
                         "DMA failed: %d, falling back to PIO\n", ret);
        }
 
-       dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
-               desc, cookie);
+       dev_dbg(&host->pdev->dev, "%s(): desc %p\n", __func__, desc);
 }
 
 void tmio_mmc_start_dma(struct tmio_mmc_host *host,
index 44093699859321ce4164ed63d368ec8b72c5d49b..cec3188a170d6a504aad659c45cdd9613002c394 100644 (file)
@@ -24,6 +24,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bcm963xx_tag.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/mach-bcm63xx/bcm63xx_nvram.h>
-#include <asm/mach-bcm63xx/bcm963xx_tag.h>
 #include <asm/mach-bcm63xx/board_bcm963xx.h>
 
-#define BCM63XX_EXTENDED_SIZE  0xBFC00000      /* Extended flash address */
-
 #define BCM63XX_CFE_BLOCK_SIZE SZ_64K          /* always at least 64KiB */
 
 #define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
@@ -123,8 +121,8 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
                pr_info("CFE boot tag found with version %s and board type %s\n",
                        tagversion, boardid);
 
-               kerneladdr = kerneladdr - BCM63XX_EXTENDED_SIZE;
-               rootfsaddr = rootfsaddr - BCM63XX_EXTENDED_SIZE;
+               kerneladdr = kerneladdr - BCM963XX_EXTENDED_SIZE;
+               rootfsaddr = rootfsaddr - BCM963XX_EXTENDED_SIZE;
                spareaddr = roundup(totallen, master->erasesize) + cfelen;
 
                if (rootfsaddr < kerneladdr) {
index 54e056d3be0250910f464732bacc13bfd7eee50b..ee2b74d1d1b5e388a7d60880e5ef0e1f07676e70 100644 (file)
@@ -174,9 +174,9 @@ static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end,
        struct ubi_device *ubi = desc->vol->ubi;
        struct inode *inode = file_inode(file);
        int err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = ubi_sync(ubi->ubi_num);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 56b560558884dc6d87a0081d7d99c96ac4e99a67..b7f1a991903363cd6b3888722cb5742a70f0c765 100644 (file)
@@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev);
 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
                                                struct rtnl_link_stats64 *stats);
 static void bond_slave_arr_handler(struct work_struct *work);
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+                                 int mod);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -2127,6 +2129,7 @@ static void bond_miimon_commit(struct bonding *bond)
                        continue;
 
                case BOND_LINK_UP:
+                       bond_update_speed_duplex(slave);
                        bond_set_slave_link_state(slave, BOND_LINK_UP,
                                                  BOND_SLAVE_NOTIFY_NOW);
                        slave->last_link_up = jiffies;
@@ -2459,7 +2462,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
                 struct slave *slave)
 {
        struct arphdr *arp = (struct arphdr *)skb->data;
-       struct slave *curr_active_slave;
+       struct slave *curr_active_slave, *curr_arp_slave;
        unsigned char *arp_ptr;
        __be32 sip, tip;
        int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@ -2506,26 +2509,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
                     &sip, &tip);
 
        curr_active_slave = rcu_dereference(bond->curr_active_slave);
+       curr_arp_slave = rcu_dereference(bond->current_arp_slave);
 
-       /* Backup slaves won't see the ARP reply, but do come through
-        * here for each ARP probe (so we swap the sip/tip to validate
-        * the probe).  In a "redundant switch, common router" type of
-        * configuration, the ARP probe will (hopefully) travel from
-        * the active, through one switch, the router, then the other
-        * switch before reaching the backup.
+       /* We 'trust' the received ARP enough to validate it if:
+        *
+        * (a) the slave receiving the ARP is active (which includes the
+        * current ARP slave, if any), or
+        *
+        * (b) the receiving slave isn't active, but there is a currently
+        * active slave and it received valid arp reply(s) after it became
+        * the currently active slave, or
+        *
+        * (c) there is an ARP slave that sent an ARP during the prior ARP
+        * interval, and we receive an ARP reply on any slave.  We accept
+        * these because switch FDB update delays may deliver the ARP
+        * reply to a slave other than the sender of the ARP request.
         *
-        * We 'trust' the arp requests if there is an active slave and
-        * it received valid arp reply(s) after it became active. This
-        * is done to avoid endless looping when we can't reach the
+        * Note: for (b), backup slaves are receiving the broadcast ARP
+        * request, not a reply.  This request passes from the sending
+        * slave through the L2 switch(es) to the receiving slave.  Since
+        * this is checking the request, sip/tip are swapped for
+        * validation.
+        *
+        * This is done to avoid endless looping when we can't reach the
         * arp_ip_target and fool ourselves with our own arp requests.
         */
-
        if (bond_is_active_slave(slave))
                bond_validate_arp(bond, slave, sip, tip);
        else if (curr_active_slave &&
                 time_after(slave_last_rx(bond, curr_active_slave),
                            curr_active_slave->last_link_up))
                bond_validate_arp(bond, slave, tip, sip);
+       else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+                bond_time_in_interval(bond,
+                                      dev_trans_start(curr_arp_slave->dev), 1))
+               bond_validate_arp(bond, slave, sip, tip);
 
 out_unlock:
        if (arp != (struct arphdr *)skb->data)
index fc5b75675cd8b6dbebbbc79d357001676900fefa..eb7192fab5932bcde88f31f0b90604997e27a524 100644 (file)
@@ -117,6 +117,9 @@ MODULE_LICENSE("GPL v2");
  */
 #define EMS_USB_ARM7_CLOCK 8000000
 
+#define CPC_TX_QUEUE_TRIGGER_LOW       25
+#define CPC_TX_QUEUE_TRIGGER_HIGH      35
+
 /*
  * CAN-Message representation in a CPC_MSG. Message object type is
  * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or
@@ -278,6 +281,11 @@ static void ems_usb_read_interrupt_callback(struct urb *urb)
        switch (urb->status) {
        case 0:
                dev->free_slots = dev->intr_in_buffer[1];
+               if(dev->free_slots > CPC_TX_QUEUE_TRIGGER_HIGH){
+                       if (netif_queue_stopped(netdev)){
+                               netif_wake_queue(netdev);
+                       }
+               }
                break;
 
        case -ECONNRESET: /* unlink */
@@ -526,8 +534,6 @@ static void ems_usb_write_bulk_callback(struct urb *urb)
        /* Release context */
        context->echo_index = MAX_TX_URBS;
 
-       if (netif_queue_stopped(netdev))
-               netif_wake_queue(netdev);
 }
 
 /*
@@ -587,7 +593,7 @@ static int ems_usb_start(struct ems_usb *dev)
        int err, i;
 
        dev->intr_in_buffer[0] = 0;
-       dev->free_slots = 15; /* initial size */
+       dev->free_slots = 50; /* initial size */
 
        for (i = 0; i < MAX_RX_URBS; i++) {
                struct urb *urb = NULL;
@@ -835,7 +841,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
 
                /* Slow down tx path */
                if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS ||
-                   dev->free_slots < 5) {
+                   dev->free_slots < CPC_TX_QUEUE_TRIGGER_LOW) {
                        netif_stop_queue(netdev);
                }
        }
index cc6c5455341839eb91ba55dfd23e96e954c41fb7..a47f52f44b0d6b5f847a417f682c193ce127e29d 100644 (file)
@@ -25,6 +25,7 @@
 static const struct mv88e6xxx_switch_id mv88e6352_table[] = {
        { PORT_SWITCH_ID_6172, "Marvell 88E6172" },
        { PORT_SWITCH_ID_6176, "Marvell 88E6176" },
+       { PORT_SWITCH_ID_6240, "Marvell 88E6240" },
        { PORT_SWITCH_ID_6320, "Marvell 88E6320" },
        { PORT_SWITCH_ID_6320_A1, "Marvell 88E6320 (A1)" },
        { PORT_SWITCH_ID_6320_A2, "Marvell 88e6320 (A2)" },
index 9fe33fc3c2b9a54ddbd0db4e8a03d750553b6d7c..512c8c0be1b4cbf029d4cb0bc692f88e9bf64660 100644 (file)
@@ -1532,7 +1532,7 @@ int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
 
        /* no PVID with ranges, otherwise it's a bug */
        if (pvid)
-               err = _mv88e6xxx_port_pvid_set(ds, port, vid);
+               err = _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end);
 unlock:
        mutex_unlock(&ps->smi_mutex);
 
@@ -1555,7 +1555,7 @@ static int _mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, u16 vid)
 
        if (vlan.vid != vid || !vlan.valid ||
            vlan.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
-               return -ENOENT;
+               return -EOPNOTSUPP;
 
        vlan.data[port] = GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
 
@@ -1582,6 +1582,7 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
                            const struct switchdev_obj_port_vlan *vlan)
 {
        struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       const u16 defpvid = 4000 + ds->index * DSA_MAX_PORTS + port;
        u16 pvid, vid;
        int err = 0;
 
@@ -1597,7 +1598,8 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
                        goto unlock;
 
                if (vid == pvid) {
-                       err = _mv88e6xxx_port_pvid_set(ds, port, 0);
+                       /* restore reserved VLAN ID */
+                       err = _mv88e6xxx_port_pvid_set(ds, port, defpvid);
                        if (err)
                                goto unlock;
                }
@@ -1889,26 +1891,20 @@ unlock:
 
 int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, u32 members)
 {
-       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-       const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
-       int err;
-
-       /* The port joined a bridge, so leave its reserved VLAN */
-       mutex_lock(&ps->smi_mutex);
-       err = _mv88e6xxx_port_vlan_del(ds, port, pvid);
-       if (!err)
-               err = _mv88e6xxx_port_pvid_set(ds, port, 0);
-       mutex_unlock(&ps->smi_mutex);
-       return err;
+       return 0;
 }
 
 int mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, u32 members)
+{
+       return 0;
+}
+
+static int mv88e6xxx_setup_port_default_vlan(struct dsa_switch *ds, int port)
 {
        struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
        const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
        int err;
 
-       /* The port left the bridge, so join its reserved VLAN */
        mutex_lock(&ps->smi_mutex);
        err = _mv88e6xxx_port_vlan_add(ds, port, pvid, true);
        if (!err)
@@ -2163,7 +2159,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
         * database, and allow every port to egress frames on all other ports.
         */
        reg = BIT(ps->num_ports) - 1; /* all ports */
-       ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg & ~port);
+       reg &= ~BIT(port); /* except itself */
+       ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg);
        if (ret)
                goto abort;
 
@@ -2191,8 +2188,7 @@ int mv88e6xxx_setup_ports(struct dsa_switch *ds)
                if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
                        continue;
 
-               /* setup the unbridged state */
-               ret = mv88e6xxx_port_bridge_leave(ds, i, 0);
+               ret = mv88e6xxx_setup_port_default_vlan(ds, i);
                if (ret < 0)
                        return ret;
        }
index 2777289a26c0419f855926ef028942074ca62a2f..2f79d29f17f2f0fae388f9cd4b4d9da201793775 100644 (file)
@@ -1501,6 +1501,7 @@ static const struct pcmcia_device_id pcnet_ids[] = {
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x030a),
        PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1103),
        PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1121),
+       PCMCIA_DEVICE_MANF_CARD(0xc001, 0x0009),
        PCMCIA_DEVICE_PROD_ID12("2408LAN", "Ethernet", 0x352fff7f, 0x00b2e941),
        PCMCIA_DEVICE_PROD_ID1234("Socket", "CF 10/100 Ethernet Card", "Revision B", "05/11/06", 0xb38bcc2e, 0x4de88352, 0xeaca6c8d, 0x7e57c22e),
        PCMCIA_DEVICE_PROD_ID123("Cardwell", "PCMCIA", "ETHERNET", 0x9533672e, 0x281f1c5d, 0x3ff7175b),
index 3f3bcbea15bd7a6caf7c8296e6c5bc0bef14c6f1..0907ab6ff309e65ac4ff56250443dc23d6904b1c 100644 (file)
@@ -2380,7 +2380,7 @@ static int et131x_tx_dma_memory_alloc(struct et131x_adapter *adapter)
                                                    sizeof(u32),
                                                    &tx_ring->tx_status_pa,
                                                    GFP_KERNEL);
-       if (!tx_ring->tx_status_pa) {
+       if (!tx_ring->tx_status) {
                dev_err(&adapter->pdev->dev,
                        "Cannot alloc memory for Tx status block\n");
                return -ENOMEM;
index 87e727b921dc0a20f99a6bdb084d2a6e3b168f9e..fcdf5dda448f9c7acf5a0a537cbbcb0de76b09df 100644 (file)
@@ -50,8 +50,8 @@ static const char version[] =
 static void write_rreg(u_long base, u_int reg, u_int val)
 {
        asm volatile(
-       "str%?h %1, [%2]        @ NET_RAP\n\t"
-       "str%?h %0, [%2, #-4]   @ NET_RDP"
+       "strh   %1, [%2]        @ NET_RAP\n\t"
+       "strh   %0, [%2, #-4]   @ NET_RDP"
        :
        : "r" (val), "r" (reg), "r" (ISAIO_BASE + 0x0464));
 }
@@ -60,8 +60,8 @@ static inline unsigned short read_rreg(u_long base_addr, u_int reg)
 {
        unsigned short v;
        asm volatile(
-       "str%?h %1, [%2]        @ NET_RAP\n\t"
-       "ldr%?h %0, [%2, #-4]   @ NET_RDP"
+       "strh   %1, [%2]        @ NET_RAP\n\t"
+       "ldrh   %0, [%2, #-4]   @ NET_RDP"
        : "=r" (v)
        : "r" (reg), "r" (ISAIO_BASE + 0x0464));
        return v;
@@ -70,8 +70,8 @@ static inline unsigned short read_rreg(u_long base_addr, u_int reg)
 static inline void write_ireg(u_long base, u_int reg, u_int val)
 {
        asm volatile(
-       "str%?h %1, [%2]        @ NET_RAP\n\t"
-       "str%?h %0, [%2, #8]    @ NET_IDP"
+       "strh   %1, [%2]        @ NET_RAP\n\t"
+       "strh   %0, [%2, #8]    @ NET_IDP"
        :
        : "r" (val), "r" (reg), "r" (ISAIO_BASE + 0x0464));
 }
@@ -80,8 +80,8 @@ static inline unsigned short read_ireg(u_long base_addr, u_int reg)
 {
        u_short v;
        asm volatile(
-       "str%?h %1, [%2]        @ NAT_RAP\n\t"
-       "ldr%?h %0, [%2, #8]    @ NET_IDP\n\t"
+       "strh   %1, [%2]        @ NAT_RAP\n\t"
+       "ldrh   %0, [%2, #8]    @ NET_IDP\n\t"
        : "=r" (v)
        : "r" (reg), "r" (ISAIO_BASE + 0x0464));
        return v;
@@ -96,7 +96,7 @@ am_writebuffer(struct net_device *dev, u_int offset, unsigned char *buf, unsigne
        offset = ISAMEM_BASE + (offset << 1);
        length = (length + 1) & ~1;
        if ((int)buf & 2) {
-               asm volatile("str%?h    %2, [%0], #4"
+               asm volatile("strh      %2, [%0], #4"
                 : "=&r" (offset) : "0" (offset), "r" (buf[0] | (buf[1] << 8)));
                buf += 2;
                length -= 2;
@@ -104,20 +104,20 @@ am_writebuffer(struct net_device *dev, u_int offset, unsigned char *buf, unsigne
        while (length > 8) {
                register unsigned int tmp asm("r2"), tmp2 asm("r3");
                asm volatile(
-                       "ldm%?ia        %0!, {%1, %2}"
+                       "ldmia  %0!, {%1, %2}"
                        : "+r" (buf), "=&r" (tmp), "=&r" (tmp2));
                length -= 8;
                asm volatile(
-                       "str%?h %1, [%0], #4\n\t"
-                       "mov%?  %1, %1, lsr #16\n\t"
-                       "str%?h %1, [%0], #4\n\t"
-                       "str%?h %2, [%0], #4\n\t"
-                       "mov%?  %2, %2, lsr #16\n\t"
-                       "str%?h %2, [%0], #4"
+                       "strh   %1, [%0], #4\n\t"
+                       "mov    %1, %1, lsr #16\n\t"
+                       "strh   %1, [%0], #4\n\t"
+                       "strh   %2, [%0], #4\n\t"
+                       "mov    %2, %2, lsr #16\n\t"
+                       "strh   %2, [%0], #4"
                : "+r" (offset), "=&r" (tmp), "=&r" (tmp2));
        }
        while (length > 0) {
-               asm volatile("str%?h    %2, [%0], #4"
+               asm volatile("strh      %2, [%0], #4"
                 : "=&r" (offset) : "0" (offset), "r" (buf[0] | (buf[1] << 8)));
                buf += 2;
                length -= 2;
@@ -132,23 +132,23 @@ am_readbuffer(struct net_device *dev, u_int offset, unsigned char *buf, unsigned
        if ((int)buf & 2) {
                unsigned int tmp;
                asm volatile(
-                       "ldr%?h %2, [%0], #4\n\t"
-                       "str%?b %2, [%1], #1\n\t"
-                       "mov%?  %2, %2, lsr #8\n\t"
-                       "str%?b %2, [%1], #1"
+                       "ldrh   %2, [%0], #4\n\t"
+                       "strb   %2, [%1], #1\n\t"
+                       "mov    %2, %2, lsr #8\n\t"
+                       "strb   %2, [%1], #1"
                : "=&r" (offset), "=&r" (buf), "=r" (tmp): "0" (offset), "1" (buf));
                length -= 2;
        }
        while (length > 8) {
                register unsigned int tmp asm("r2"), tmp2 asm("r3"), tmp3;
                asm volatile(
-                       "ldr%?h %2, [%0], #4\n\t"
-                       "ldr%?h %4, [%0], #4\n\t"
-                       "ldr%?h %3, [%0], #4\n\t"
-                       "orr%?  %2, %2, %4, lsl #16\n\t"
-                       "ldr%?h %4, [%0], #4\n\t"
-                       "orr%?  %3, %3, %4, lsl #16\n\t"
-                       "stm%?ia        %1!, {%2, %3}"
+                       "ldrh   %2, [%0], #4\n\t"
+                       "ldrh   %4, [%0], #4\n\t"
+                       "ldrh   %3, [%0], #4\n\t"
+                       "orr    %2, %2, %4, lsl #16\n\t"
+                       "ldrh   %4, [%0], #4\n\t"
+                       "orr    %3, %3, %4, lsl #16\n\t"
+                       "stmia  %1!, {%2, %3}"
                : "=&r" (offset), "=&r" (buf), "=r" (tmp), "=r" (tmp2), "=r" (tmp3)
                : "0" (offset), "1" (buf));
                length -= 8;
@@ -156,10 +156,10 @@ am_readbuffer(struct net_device *dev, u_int offset, unsigned char *buf, unsigned
        while (length > 0) {
                unsigned int tmp;
                asm volatile(
-                       "ldr%?h %2, [%0], #4\n\t"
-                       "str%?b %2, [%1], #1\n\t"
-                       "mov%?  %2, %2, lsr #8\n\t"
-                       "str%?b %2, [%1], #1"
+                       "ldrh   %2, [%0], #4\n\t"
+                       "strb   %2, [%1], #1\n\t"
+                       "mov    %2, %2, lsr #8\n\t"
+                       "strb   %2, [%1], #1"
                : "=&r" (offset), "=&r" (buf), "=r" (tmp) : "0" (offset), "1" (buf));
                length -= 2;
        }
index 256f590f6bb1a6db167f4375b3ed2719d2ad5073..3a7ebfdda57dee33cbde71cf3440e5eb70aa262a 100644 (file)
@@ -547,8 +547,8 @@ static int __init lance_probe1(struct net_device *dev, int ioaddr, int irq, int
        /* Make certain the data structures used by the LANCE are aligned and DMAble. */
 
        lp = kzalloc(sizeof(*lp), GFP_DMA | GFP_KERNEL);
-       if(lp==NULL)
-               return -ENODEV;
+       if (!lp)
+               return -ENOMEM;
        if (lance_debug > 6) printk(" (#0x%05lx)", (unsigned long)lp);
        dev->ml_priv = lp;
        lp->name = chipname;
index a4799c1fc7d4de55c38a0e1a99c1aa42985d5143..5eb9b20c0eeab09954a909290ac5bd9bf3fdcb06 100644 (file)
@@ -628,6 +628,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
        int ret;
 
        ring = pdata->rx_ring;
+       irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
        ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
                               IRQF_SHARED, ring->irq_name, ring);
        if (ret)
@@ -635,6 +636,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 
        if (pdata->cq_cnt) {
                ring = pdata->tx_ring->cp_ring;
+               irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
                ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
                                       IRQF_SHARED, ring->irq_name, ring);
                if (ret) {
@@ -649,15 +651,19 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 static void xgene_enet_free_irq(struct net_device *ndev)
 {
        struct xgene_enet_pdata *pdata;
+       struct xgene_enet_desc_ring *ring;
        struct device *dev;
 
        pdata = netdev_priv(ndev);
        dev = ndev_to_dev(ndev);
-       devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring);
+       ring = pdata->rx_ring;
+       irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+       devm_free_irq(dev, ring->irq, ring);
 
        if (pdata->cq_cnt) {
-               devm_free_irq(dev, pdata->tx_ring->cp_ring->irq,
-                             pdata->tx_ring->cp_ring);
+               ring = pdata->tx_ring->cp_ring;
+               irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+               devm_free_irq(dev, ring->irq, ring);
        }
 }
 
index 70d5b62c125a489ceb5fafaf21b96988cecaab5c..248dfc40a7611a0b298336c0aa552665da165ba7 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/efi.h>
+#include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
index abe1eabc017177ede3d1bdc7df6bec5a8ded86da..6446af1403f70be8101625ed9a8b03d3de17b792 100644 (file)
@@ -163,7 +163,7 @@ static void arc_emac_tx_clean(struct net_device *ndev)
                struct sk_buff *skb = tx_buff->skb;
                unsigned int info = le32_to_cpu(txbd->info);
 
-               if ((info & FOR_EMAC) || !txbd->data)
+               if ((info & FOR_EMAC) || !txbd->data || !skb)
                        break;
 
                if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
@@ -191,6 +191,7 @@ static void arc_emac_tx_clean(struct net_device *ndev)
 
                txbd->data = 0;
                txbd->info = 0;
+               tx_buff->skb = NULL;
 
                *txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
        }
@@ -446,6 +447,9 @@ static int arc_emac_open(struct net_device *ndev)
                *last_rx_bd = (*last_rx_bd + 1) % RX_BD_NUM;
        }
 
+       priv->txbd_curr = 0;
+       priv->txbd_dirty = 0;
+
        /* Clean Tx BD's */
        memset(priv->txbd, 0, TX_RING_SZ);
 
@@ -513,6 +517,64 @@ static void arc_emac_set_rx_mode(struct net_device *ndev)
        }
 }
 
+/**
+ * arc_free_tx_queue - free skb from tx queue
+ * @ndev:      Pointer to the network device.
+ *
+ * This function must be called while EMAC disable
+ */
+static void arc_free_tx_queue(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       unsigned int i;
+
+       for (i = 0; i < TX_BD_NUM; i++) {
+               struct arc_emac_bd *txbd = &priv->txbd[i];
+               struct buffer_state *tx_buff = &priv->tx_buff[i];
+
+               if (tx_buff->skb) {
+                       dma_unmap_single(&ndev->dev, dma_unmap_addr(tx_buff, addr),
+                                        dma_unmap_len(tx_buff, len), DMA_TO_DEVICE);
+
+                       /* return the sk_buff to system */
+                       dev_kfree_skb_irq(tx_buff->skb);
+               }
+
+               txbd->info = 0;
+               txbd->data = 0;
+               tx_buff->skb = NULL;
+       }
+}
+
+/**
+ * arc_free_rx_queue - free skb from rx queue
+ * @ndev:      Pointer to the network device.
+ *
+ * This function must be called while EMAC disable
+ */
+static void arc_free_rx_queue(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       unsigned int i;
+
+       for (i = 0; i < RX_BD_NUM; i++) {
+               struct arc_emac_bd *rxbd = &priv->rxbd[i];
+               struct buffer_state *rx_buff = &priv->rx_buff[i];
+
+               if (rx_buff->skb) {
+                       dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+                                       dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+                       /* return the sk_buff to system */
+                       dev_kfree_skb_irq(rx_buff->skb);
+               }
+
+               rxbd->info = 0;
+               rxbd->data = 0;
+               rx_buff->skb = NULL;
+       }
+}
+
 /**
  * arc_emac_stop - Close the network device.
  * @ndev:      Pointer to the network device.
@@ -534,6 +596,10 @@ static int arc_emac_stop(struct net_device *ndev)
        /* Disable EMAC */
        arc_reg_clr(priv, R_CTRL, EN_MASK);
 
+       /* Return the sk_buff to system */
+       arc_free_tx_queue(ndev);
+       arc_free_rx_queue(ndev);
+
        return 0;
 }
 
@@ -610,7 +676,6 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
        dma_unmap_addr_set(&priv->tx_buff[*txbd_curr], addr, addr);
        dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
 
-       priv->tx_buff[*txbd_curr].skb = skb;
        priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
 
        /* Make sure pointer to data buffer is set */
@@ -620,6 +685,11 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
 
        *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
 
+       /* Make sure info word is set */
+       wmb();
+
+       priv->tx_buff[*txbd_curr].skb = skb;
+
        /* Increment index to point to the next BD */
        *txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
 
index ecc4a334c50727c7a93b07576b540fc4b5b53d58..f71ab2647a3bc1f62f6b7450b34dd325c710549d 100644 (file)
@@ -302,7 +302,7 @@ static int nb8800_poll(struct napi_struct *napi, int budget)
        nb8800_tx_done(dev);
 
 again:
-       while (work < budget) {
+       do {
                struct nb8800_rx_buf *rxb;
                unsigned int len;
 
@@ -330,7 +330,7 @@ again:
                rxd->report = 0;
                last = next;
                work++;
-       }
+       } while (work < budget);
 
        if (work) {
                priv->rx_descs[last].desc.config |= DESC_EOC;
index 8550df189ceb709290c8b1caad4a3132dcb6a810..19f7cd02e08527ec234b1e8de426bad1fd37cadb 100644 (file)
@@ -151,8 +151,11 @@ config BNX2X_VXLAN
 
 config BGMAC
        tristate "BCMA bus GBit core support"
-       depends on BCMA_HOST_SOC && HAS_DMA && (BCM47XX || ARCH_BCM_5301X)
+       depends on BCMA && BCMA_HOST_SOC
+       depends on HAS_DMA
+       depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
        select PHYLIB
+       select FIXED_PHY
        ---help---
          This driver supports GBit MAC and BCM4706 GBit MAC cores on BCMA bus.
          They can be found on BCM47xx SoCs and provide gigabit ethernet.
index d946bba43726f94b0d8a62973978a19d07959390..1fb80100e5e7d753b2ddd8ae741e0120ac161f42 100644 (file)
@@ -6185,26 +6185,80 @@ static int bnx2x_format_ver(u32 num, u8 *str, u16 *len)
                shift -= 4;
                digit = ((num & mask) >> shift);
                if (digit == 0 && remove_leading_zeros) {
-                       mask = mask >> 4;
-                       continue;
-               } else if (digit < 0xa)
-                       *str_ptr = digit + '0';
-               else
-                       *str_ptr = digit - 0xa + 'a';
-               remove_leading_zeros = 0;
-               str_ptr++;
-               (*len)--;
+                       *str_ptr = '0';
+               } else {
+                       if (digit < 0xa)
+                               *str_ptr = digit + '0';
+                       else
+                               *str_ptr = digit - 0xa + 'a';
+
+                       remove_leading_zeros = 0;
+                       str_ptr++;
+                       (*len)--;
+               }
                mask = mask >> 4;
                if (shift == 4*4) {
+                       if (remove_leading_zeros) {
+                               str_ptr++;
+                               (*len)--;
+                       }
                        *str_ptr = '.';
                        str_ptr++;
                        (*len)--;
                        remove_leading_zeros = 1;
                }
        }
+       if (remove_leading_zeros)
+               (*len)--;
        return 0;
 }
 
+static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len)
+{
+       u8 *str_ptr = str;
+       u32 mask = 0x00f00000;
+       u8 shift = 8*3;
+       u8 digit;
+       u8 remove_leading_zeros = 1;
+
+       if (*len < 10) {
+               /* Need more than 10chars for this format */
+               *str_ptr = '\0';
+               (*len)--;
+               return -EINVAL;
+       }
+
+       while (shift > 0) {
+               shift -= 4;
+               digit = ((num & mask) >> shift);
+               if (digit == 0 && remove_leading_zeros) {
+                       *str_ptr = '0';
+               } else {
+                       if (digit < 0xa)
+                               *str_ptr = digit + '0';
+                       else
+                               *str_ptr = digit - 0xa + 'a';
+
+                       remove_leading_zeros = 0;
+                       str_ptr++;
+                       (*len)--;
+               }
+               mask = mask >> 4;
+               if ((shift == 4*4) || (shift == 4*2)) {
+                       if (remove_leading_zeros) {
+                               str_ptr++;
+                               (*len)--;
+                       }
+                       *str_ptr = '.';
+                       str_ptr++;
+                       (*len)--;
+                       remove_leading_zeros = 1;
+               }
+       }
+       if (remove_leading_zeros)
+               (*len)--;
+       return 0;
+}
 
 static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len)
 {
@@ -9677,8 +9731,9 @@ static void bnx2x_save_848xx_spirom_version(struct bnx2x_phy *phy,
 
        if (bnx2x_is_8483x_8485x(phy)) {
                bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD, 0x400f, &fw_ver1);
-               bnx2x_save_spirom_version(bp, port, fw_ver1 & 0xfff,
-                               phy->ver_addr);
+               if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+                       fw_ver1 &= 0xfff;
+               bnx2x_save_spirom_version(bp, port, fw_ver1, phy->ver_addr);
        } else {
                /* For 32-bit registers in 848xx, access via MDIO2ARM i/f. */
                /* (1) set reg 0xc200_0014(SPI_BRIDGE_CTRL_2) to 0x03000000 */
@@ -9732,16 +9787,32 @@ static void bnx2x_save_848xx_spirom_version(struct bnx2x_phy *phy,
 static void bnx2x_848xx_set_led(struct bnx2x *bp,
                                struct bnx2x_phy *phy)
 {
-       u16 val, offset, i;
+       u16 val, led3_blink_rate, offset, i;
        static struct bnx2x_reg_set reg_set[] = {
                {MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED1_MASK, 0x0080},
                {MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED2_MASK, 0x0018},
                {MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_MASK, 0x0006},
-               {MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_BLINK, 0x0000},
                {MDIO_PMA_DEVAD, MDIO_PMA_REG_84823_CTL_SLOW_CLK_CNT_HIGH,
                        MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ},
                {MDIO_AN_DEVAD, 0xFFFB, 0xFFFD}
        };
+
+       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+               /* Set LED5 source */
+               bnx2x_cl45_write(bp, phy,
+                                MDIO_PMA_DEVAD,
+                                MDIO_PMA_REG_8481_LED5_MASK,
+                                0x90);
+               led3_blink_rate = 0x000f;
+       } else {
+               led3_blink_rate = 0x0000;
+       }
+       /* Set LED3 BLINK */
+       bnx2x_cl45_write(bp, phy,
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_8481_LED3_BLINK,
+                        led3_blink_rate);
+
        /* PHYC_CTL_LED_CTL */
        bnx2x_cl45_read(bp, phy,
                        MDIO_PMA_DEVAD,
@@ -9749,6 +9820,9 @@ static void bnx2x_848xx_set_led(struct bnx2x *bp,
        val &= 0xFE00;
        val |= 0x0092;
 
+       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+               val |= 2 << 12; /* LED5 ON based on source */
+
        bnx2x_cl45_write(bp, phy,
                         MDIO_PMA_DEVAD,
                         MDIO_PMA_REG_8481_LINK_SIGNAL, val);
@@ -9762,10 +9836,17 @@ static void bnx2x_848xx_set_led(struct bnx2x *bp,
        else
                offset = MDIO_PMA_REG_84823_CTL_LED_CTL_1;
 
-       /* stretch_en for LED3*/
+       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+               val = MDIO_PMA_REG_84858_ALLOW_GPHY_ACT |
+                     MDIO_PMA_REG_84823_LED3_STRETCH_EN;
+       else
+               val = MDIO_PMA_REG_84823_LED3_STRETCH_EN;
+
+       /* stretch_en for LEDs */
        bnx2x_cl45_read_or_write(bp, phy,
-                                MDIO_PMA_DEVAD, offset,
-                                MDIO_PMA_REG_84823_LED3_STRETCH_EN);
+                                MDIO_PMA_DEVAD,
+                                offset,
+                                val);
 }
 
 static void bnx2x_848xx_specific_func(struct bnx2x_phy *phy,
@@ -9775,7 +9856,7 @@ static void bnx2x_848xx_specific_func(struct bnx2x_phy *phy,
        struct bnx2x *bp = params->bp;
        switch (action) {
        case PHY_INIT:
-               if (!bnx2x_is_8483x_8485x(phy)) {
+               if (bnx2x_is_8483x_8485x(phy)) {
                        /* Save spirom version */
                        bnx2x_save_848xx_spirom_version(phy, bp, params->port);
                }
@@ -10036,15 +10117,20 @@ static int bnx2x_84858_cmd_hdlr(struct bnx2x_phy *phy,
 
 static int bnx2x_84833_cmd_hdlr(struct bnx2x_phy *phy,
                                struct link_params *params, u16 fw_cmd,
-                               u16 cmd_args[], int argc)
+                               u16 cmd_args[], int argc, int process)
 {
        int idx;
        u16 val;
        struct bnx2x *bp = params->bp;
-       /* Write CMD_OPEN_OVERRIDE to STATUS reg */
-       bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-                       MDIO_848xx_CMD_HDLR_STATUS,
-                       PHY84833_STATUS_CMD_OPEN_OVERRIDE);
+       int rc = 0;
+
+       if (process == PHY84833_MB_PROCESS2) {
+               /* Write CMD_OPEN_OVERRIDE to STATUS reg */
+               bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+                                MDIO_848xx_CMD_HDLR_STATUS,
+                                PHY84833_STATUS_CMD_OPEN_OVERRIDE);
+       }
+
        for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
                bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
                                MDIO_848xx_CMD_HDLR_STATUS, &val);
@@ -10054,15 +10140,27 @@ static int bnx2x_84833_cmd_hdlr(struct bnx2x_phy *phy,
        }
        if (idx >= PHY848xx_CMDHDLR_WAIT) {
                DP(NETIF_MSG_LINK, "FW cmd: FW not ready.\n");
+               /* if the status is CMD_COMPLETE_PASS or CMD_COMPLETE_ERROR
+                * clear the status to CMD_CLEAR_COMPLETE
+                */
+               if (val == PHY84833_STATUS_CMD_COMPLETE_PASS ||
+                   val == PHY84833_STATUS_CMD_COMPLETE_ERROR) {
+                       bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+                                        MDIO_848xx_CMD_HDLR_STATUS,
+                                        PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+               }
                return -EINVAL;
        }
-
-       /* Prepare argument(s) and issue command */
-       for (idx = 0; idx < argc; idx++) {
-               bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-                               MDIO_848xx_CMD_HDLR_DATA1 + idx,
-                               cmd_args[idx]);
+       if (process == PHY84833_MB_PROCESS1 ||
+           process == PHY84833_MB_PROCESS2) {
+               /* Prepare argument(s) */
+               for (idx = 0; idx < argc; idx++) {
+                       bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+                                        MDIO_848xx_CMD_HDLR_DATA1 + idx,
+                                        cmd_args[idx]);
+               }
        }
+
        bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
                        MDIO_848xx_CMD_HDLR_COMMAND, fw_cmd);
        for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
@@ -10076,24 +10174,30 @@ static int bnx2x_84833_cmd_hdlr(struct bnx2x_phy *phy,
        if ((idx >= PHY848xx_CMDHDLR_WAIT) ||
            (val == PHY84833_STATUS_CMD_COMPLETE_ERROR)) {
                DP(NETIF_MSG_LINK, "FW cmd failed.\n");
-               return -EINVAL;
+               rc = -EINVAL;
        }
-       /* Gather returning data */
-       for (idx = 0; idx < argc; idx++) {
-               bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
-                               MDIO_848xx_CMD_HDLR_DATA1 + idx,
-                               &cmd_args[idx]);
+       if (process == PHY84833_MB_PROCESS3 && rc == 0) {
+               /* Gather returning data */
+               for (idx = 0; idx < argc; idx++) {
+                       bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
+                                       MDIO_848xx_CMD_HDLR_DATA1 + idx,
+                                       &cmd_args[idx]);
+               }
        }
-       bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-                       MDIO_848xx_CMD_HDLR_STATUS,
-                       PHY84833_STATUS_CMD_CLEAR_COMPLETE);
-       return 0;
+       if (val == PHY84833_STATUS_CMD_COMPLETE_ERROR ||
+           val == PHY84833_STATUS_CMD_COMPLETE_PASS) {
+               bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+                                MDIO_848xx_CMD_HDLR_STATUS,
+                                PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+       }
+       return rc;
 }
 
 static int bnx2x_848xx_cmd_hdlr(struct bnx2x_phy *phy,
                                struct link_params *params,
                                u16 fw_cmd,
-                               u16 cmd_args[], int argc)
+                                          u16 cmd_args[], int argc,
+                                          int process)
 {
        struct bnx2x *bp = params->bp;
 
@@ -10106,7 +10210,7 @@ static int bnx2x_848xx_cmd_hdlr(struct bnx2x_phy *phy,
                                            argc);
        } else {
                return bnx2x_84833_cmd_hdlr(phy, params, fw_cmd, cmd_args,
-                                           argc);
+                                           argc, process);
        }
 }
 
@@ -10133,7 +10237,7 @@ static int bnx2x_848xx_pair_swap_cfg(struct bnx2x_phy *phy,
 
        status = bnx2x_848xx_cmd_hdlr(phy, params,
                                      PHY848xx_CMD_SET_PAIR_SWAP, data,
-                                     PHY848xx_CMDHDLR_MAX_ARGS);
+                                     2, PHY84833_MB_PROCESS2);
        if (status == 0)
                DP(NETIF_MSG_LINK, "Pairswap OK, val=0x%x\n", data[1]);
 
@@ -10222,8 +10326,8 @@ static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy,
        DP(NETIF_MSG_LINK, "Don't Advertise 10GBase-T EEE\n");
 
        /* Prevent Phy from working in EEE and advertising it */
-       rc = bnx2x_848xx_cmd_hdlr(phy, params,
-                                 PHY848xx_CMD_SET_EEE_MODE, &cmd_args, 1);
+       rc = bnx2x_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+                                 &cmd_args, 1, PHY84833_MB_PROCESS1);
        if (rc) {
                DP(NETIF_MSG_LINK, "EEE disable failed.\n");
                return rc;
@@ -10240,8 +10344,8 @@ static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy,
        struct bnx2x *bp = params->bp;
        u16 cmd_args = 1;
 
-       rc = bnx2x_848xx_cmd_hdlr(phy, params,
-                                 PHY848xx_CMD_SET_EEE_MODE, &cmd_args, 1);
+       rc = bnx2x_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+                                 &cmd_args, 1, PHY84833_MB_PROCESS1);
        if (rc) {
                DP(NETIF_MSG_LINK, "EEE enable failed.\n");
                return rc;
@@ -10362,7 +10466,7 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
                cmd_args[3] = PHY84833_CONSTANT_LATENCY;
                rc = bnx2x_848xx_cmd_hdlr(phy, params,
                                          PHY848xx_CMD_SET_EEE_MODE, cmd_args,
-                                         PHY848xx_CMDHDLR_MAX_ARGS);
+                                         4, PHY84833_MB_PROCESS1);
                if (rc)
                        DP(NETIF_MSG_LINK, "Cfg AutogrEEEn failed.\n");
        }
@@ -10416,6 +10520,32 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
                vars->eee_status &= ~SHMEM_EEE_SUPPORTED_MASK;
        }
 
+       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+               /* Additional settings for jumbo packets in 1000BASE-T mode */
+               /* Allow rx extended length */
+               bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+                               MDIO_AN_REG_8481_AUX_CTRL, &val);
+               val |= 0x4000;
+               bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+                                MDIO_AN_REG_8481_AUX_CTRL, val);
+               /* TX FIFO Elasticity LSB */
+               bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+                               MDIO_AN_REG_8481_1G_100T_EXT_CTRL, &val);
+               val |= 0x1;
+               bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+                                MDIO_AN_REG_8481_1G_100T_EXT_CTRL, val);
+               /* TX FIFO Elasticity MSB */
+               /* Enable expansion register 0x46 (Pattern Generator status) */
+               bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+                                MDIO_AN_REG_8481_EXPANSION_REG_ACCESS, 0xf46);
+
+               bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+                               MDIO_AN_REG_8481_EXPANSION_REG_RD_RW, &val);
+               val |= 0x4000;
+               bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+                                MDIO_AN_REG_8481_EXPANSION_REG_RD_RW, val);
+       }
+
        if (bnx2x_is_8483x_8485x(phy)) {
                /* Bring PHY out of super isolate mode as the final step. */
                bnx2x_cl45_read_and_write(bp, phy,
@@ -10555,6 +10685,17 @@ static u8 bnx2x_848xx_read_status(struct bnx2x_phy *phy,
        return link_up;
 }
 
+static int bnx2x_8485x_format_ver(u32 raw_ver, u8 *str, u16 *len)
+{
+       int status = 0;
+       u32 num;
+
+       num = ((raw_ver & 0xF80) >> 7) << 16 | ((raw_ver & 0x7F) << 8) |
+             ((raw_ver & 0xF000) >> 12);
+       status = bnx2x_3_seq_format_ver(num, str, len);
+       return status;
+}
+
 static int bnx2x_848xx_format_ver(u32 raw_ver, u8 *str, u16 *len)
 {
        int status = 0;
@@ -10651,10 +10792,25 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy,
                                        0x0);
 
                } else {
+                       /* LED 1 OFF */
                        bnx2x_cl45_write(bp, phy,
                                         MDIO_PMA_DEVAD,
                                         MDIO_PMA_REG_8481_LED1_MASK,
                                         0x0);
+
+                       if (phy->type ==
+                               PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+                               /* LED 2 OFF */
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LED2_MASK,
+                                                0x0);
+                               /* LED 3 OFF */
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LED3_MASK,
+                                                0x0);
+                       }
                }
                break;
        case LED_MODE_FRONT_PANEL_OFF:
@@ -10713,6 +10869,19 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy,
                                                 MDIO_PMA_REG_8481_SIGNAL_MASK,
                                                 0x0);
                        }
+                       if (phy->type ==
+                               PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+                               /* LED 2 OFF */
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LED2_MASK,
+                                                0x0);
+                               /* LED 3 OFF */
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LED3_MASK,
+                                                0x0);
+                       }
                }
                break;
        case LED_MODE_ON:
@@ -10776,6 +10945,25 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy,
                                                params->port*4,
                                                NIG_MASK_MI_INT);
                                }
+                       }
+                       if (phy->type ==
+                           PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+                               /* Tell LED3 to constant on */
+                               bnx2x_cl45_read(bp, phy,
+                                               MDIO_PMA_DEVAD,
+                                               MDIO_PMA_REG_8481_LINK_SIGNAL,
+                                               &val);
+                               val &= ~(7<<6);
+                               val |= (2<<6);  /* A83B[8:6]= 2 */
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LINK_SIGNAL,
+                                                val);
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LED3_MASK,
+                                                0x20);
+                       } else {
                                bnx2x_cl45_write(bp, phy,
                                                 MDIO_PMA_DEVAD,
                                                 MDIO_PMA_REG_8481_SIGNAL_MASK,
@@ -10853,6 +11041,17 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy,
                                         MDIO_PMA_DEVAD,
                                         MDIO_PMA_REG_8481_LINK_SIGNAL,
                                         val);
+                       if (phy->type ==
+                           PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LED2_MASK,
+                                                0x18);
+                               bnx2x_cl45_write(bp, phy,
+                                                MDIO_PMA_DEVAD,
+                                                MDIO_PMA_REG_8481_LED3_MASK,
+                                                0x06);
+                       }
                        if (phy->type ==
                            PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834) {
                                /* Restore LED4 source to external link,
@@ -11982,7 +12181,7 @@ static const struct bnx2x_phy phy_84858 = {
        .read_status    = (read_status_t)bnx2x_848xx_read_status,
        .link_reset     = (link_reset_t)bnx2x_848x3_link_reset,
        .config_loopback = (config_loopback_t)NULL,
-       .format_fw_ver  = (format_fw_ver_t)bnx2x_848xx_format_ver,
+       .format_fw_ver  = (format_fw_ver_t)bnx2x_8485x_format_ver,
        .hw_reset       = (hw_reset_t)bnx2x_84833_hw_reset_phy,
        .set_link_led   = (set_link_led_t)bnx2x_848xx_set_link_led,
        .phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func
@@ -13807,8 +14006,10 @@ void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
        if (CHIP_IS_E3(bp)) {
                struct bnx2x_phy *phy = &params->phy[INT_PHY];
                bnx2x_set_aer_mmd(params, phy);
-               if ((phy->supported & SUPPORTED_20000baseKR2_Full) &&
-                   (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G))
+               if (((phy->req_line_speed == SPEED_AUTO_NEG) &&
+                    (phy->speed_cap_mask &
+                     PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) ||
+                   (phy->req_line_speed == SPEED_20000))
                        bnx2x_check_kr2_wa(params, vars, phy);
                bnx2x_check_over_curr(params, vars);
                if (vars->rx_tx_asic_rst)
index 4dead49bd5cb0866ff89db2c5f2e149536b1ea3c..a43dea259b1290168c43f2713f51a2ba1c62652b 100644 (file)
@@ -7296,6 +7296,8 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_PMA_REG_84823_CTL_LED_CTL_1                       0xa8e3
 #define MDIO_PMA_REG_84833_CTL_LED_CTL_1                       0xa8ec
 #define MDIO_PMA_REG_84823_LED3_STRETCH_EN                     0x0080
+/* BCM84858 only */
+#define MDIO_PMA_REG_84858_ALLOW_GPHY_ACT                      0x8000
 
 /* BCM84833 only */
 #define MDIO_84833_TOP_CFG_FW_REV                      0x400f
@@ -7337,6 +7339,10 @@ Theotherbitsarereservedandshouldbezero*/
 #define PHY84833_STATUS_CMD_NOT_OPEN_FOR_CMDS          0x0040
 #define PHY84833_STATUS_CMD_CLEAR_COMPLETE             0x0080
 #define PHY84833_STATUS_CMD_OPEN_OVERRIDE              0xa5a5
+/* Mailbox Process */
+#define PHY84833_MB_PROCESS1                           1
+#define PHY84833_MB_PROCESS2                           2
+#define PHY84833_MB_PROCESS3                           3
 
 /* Mailbox status set used by 84858 only */
 #define PHY84858_STATUS_CMD_RECEIVED                   0x0001
index df835f5e46d85f374cb1e0d9adf1fd8e16cb40f3..8ab000dd52d958317323c8486fc0538576727803 100644 (file)
@@ -69,7 +69,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
 #define BNXT_RX_DMA_OFFSET NET_SKB_PAD
 #define BNXT_RX_COPY_THRESH 256
 
-#define BNXT_TX_PUSH_THRESH 92
+#define BNXT_TX_PUSH_THRESH 164
 
 enum board_idx {
        BCM57301,
@@ -223,11 +223,12 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
-               struct tx_push_bd *push = txr->tx_push;
-               struct tx_bd *tx_push = &push->txbd1;
-               struct tx_bd_ext *tx_push1 = &push->txbd2;
-               void *pdata = tx_push1 + 1;
-               int j;
+               struct tx_push_buffer *tx_push_buf = txr->tx_push;
+               struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
+               struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
+               void *pdata = tx_push_buf->data;
+               u64 *end;
+               int j, push_len;
 
                /* Set COAL_NOW to be ready quickly for the next push */
                tx_push->tx_bd_len_flags_type =
@@ -247,6 +248,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
                tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
                tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
 
+               end = PTR_ALIGN(pdata + length + 1, 8) - 1;
+               *end = 0;
+
                skb_copy_from_linear_data(skb, pdata, len);
                pdata += len;
                for (j = 0; j < last_frag; j++) {
@@ -261,22 +265,29 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
                        pdata += skb_frag_size(frag);
                }
 
-               memcpy(txbd, tx_push, sizeof(*txbd));
+               txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type;
+               txbd->tx_bd_haddr = txr->data_mapping;
                prod = NEXT_TX(prod);
                txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
                memcpy(txbd, tx_push1, sizeof(*txbd));
                prod = NEXT_TX(prod);
-               push->doorbell =
+               tx_push->doorbell =
                        cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
                txr->tx_prod = prod;
 
                netdev_tx_sent_queue(txq, skb->len);
 
-               __iowrite64_copy(txr->tx_doorbell, push,
-                                (length + sizeof(*push) + 8) / 8);
+               push_len = (length + sizeof(*tx_push) + 7) / 8;
+               if (push_len > 16) {
+                       __iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
+                       __iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+                                        push_len - 16);
+               } else {
+                       __iowrite64_copy(txr->tx_doorbell, tx_push_buf,
+                                        push_len);
+               }
 
                tx_buf->is_push = 1;
-
                goto tx_done;
        }
 
@@ -1490,10 +1501,11 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
 
                        last = tx_buf->nr_frags;
                        j += 2;
-                       for (k = 0; k < last; k++, j = NEXT_TX(j)) {
+                       for (k = 0; k < last; k++, j++) {
+                               int ring_idx = j & bp->tx_ring_mask;
                                skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
 
-                               tx_buf = &txr->tx_buf_ring[j];
+                               tx_buf = &txr->tx_buf_ring[ring_idx];
                                dma_unmap_page(
                                        &pdev->dev,
                                        dma_unmap_addr(tx_buf, mapping),
@@ -1752,7 +1764,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
                push_size  = L1_CACHE_ALIGN(sizeof(struct tx_push_bd) +
                                        bp->tx_push_thresh);
 
-               if (push_size > 128) {
+               if (push_size > 256) {
                        push_size = 0;
                        bp->tx_push_thresh = 0;
                }
@@ -1771,7 +1783,6 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
                        return rc;
 
                if (bp->tx_push_size) {
-                       struct tx_bd *txbd;
                        dma_addr_t mapping;
 
                        /* One pre-allocated DMA buffer to backup
@@ -1785,13 +1796,11 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
                        if (!txr->tx_push)
                                return -ENOMEM;
 
-                       txbd = &txr->tx_push->txbd1;
-
                        mapping = txr->tx_push_mapping +
                                sizeof(struct tx_push_bd);
-                       txbd->tx_bd_haddr = cpu_to_le64(mapping);
+                       txr->data_mapping = cpu_to_le64(mapping);
 
-                       memset(txbd + 1, 0, sizeof(struct tx_bd_ext));
+                       memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
                }
                ring->queue_id = bp->q_info[j].queue_id;
                if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@ -3406,7 +3415,7 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
        struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
        u16 error_code;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, -1, -1);
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
        req.ring_type = ring_type;
        req.ring_id = cpu_to_le16(ring->fw_ring_id);
 
@@ -4545,20 +4554,18 @@ static int bnxt_update_phy_setting(struct bnxt *bp)
        if (!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) &&
            link_info->force_pause_setting != link_info->req_flow_ctrl)
                update_pause = true;
-       if (link_info->req_duplex != link_info->duplex_setting)
-               update_link = true;
        if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
                if (BNXT_AUTO_MODE(link_info->auto_mode))
                        update_link = true;
                if (link_info->req_link_speed != link_info->force_link_speed)
                        update_link = true;
+               if (link_info->req_duplex != link_info->duplex_setting)
+                       update_link = true;
        } else {
                if (link_info->auto_mode == BNXT_LINK_AUTO_NONE)
                        update_link = true;
                if (link_info->advertising != link_info->auto_link_speeds)
                        update_link = true;
-               if (link_info->req_link_speed != link_info->auto_link_speed)
-                       update_link = true;
        }
 
        if (update_link)
@@ -4635,7 +4642,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        if (link_re_init) {
                rc = bnxt_update_phy_setting(bp);
                if (rc)
-                       goto open_err;
+                       netdev_warn(bp->dev, "failed to update phy settings\n");
        }
 
        if (irq_re_init) {
@@ -4653,6 +4660,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        /* Enable TX queues */
        bnxt_tx_enable(bp);
        mod_timer(&bp->timer, jiffies + bp->current_interval);
+       bnxt_update_link(bp, true);
 
        return 0;
 
@@ -4819,8 +4827,6 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
                stats->multicast += le64_to_cpu(hw_stats->rx_mcast_pkts);
 
-               stats->rx_dropped += le64_to_cpu(hw_stats->rx_drop_pkts);
-
                stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
        }
 
@@ -5671,22 +5677,16 @@ static int bnxt_probe_phy(struct bnxt *bp)
        }
 
        /*initialize the ethool setting copy with NVM settings */
-       if (BNXT_AUTO_MODE(link_info->auto_mode))
-               link_info->autoneg |= BNXT_AUTONEG_SPEED;
-
-       if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
-               if (link_info->auto_pause_setting == BNXT_LINK_PAUSE_BOTH)
-                       link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+       if (BNXT_AUTO_MODE(link_info->auto_mode)) {
+               link_info->autoneg = BNXT_AUTONEG_SPEED |
+                                    BNXT_AUTONEG_FLOW_CTRL;
+               link_info->advertising = link_info->auto_link_speeds;
                link_info->req_flow_ctrl = link_info->auto_pause_setting;
-       } else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
+       } else {
+               link_info->req_link_speed = link_info->force_link_speed;
+               link_info->req_duplex = link_info->duplex_setting;
                link_info->req_flow_ctrl = link_info->force_pause_setting;
        }
-       link_info->req_duplex = link_info->duplex_setting;
-       if (link_info->autoneg & BNXT_AUTONEG_SPEED)
-               link_info->req_link_speed = link_info->auto_link_speed;
-       else
-               link_info->req_link_speed = link_info->force_link_speed;
-       link_info->advertising = link_info->auto_link_speeds;
        snprintf(phy_ver, PHY_VER_STR_LEN, " ph %d.%d.%d",
                 link_info->phy_ver[0],
                 link_info->phy_ver[1],
index 8af3ca8efcef9ac275f2daac4e44a06d108eed61..2be51b332652b0d0fd63629795599c10e6c25dea 100644 (file)
@@ -411,8 +411,8 @@ struct rx_tpa_end_cmp_ext {
 
 #define BNXT_NUM_TESTS(bp)     0
 
-#define BNXT_DEFAULT_RX_RING_SIZE      1023
-#define BNXT_DEFAULT_TX_RING_SIZE      512
+#define BNXT_DEFAULT_RX_RING_SIZE      511
+#define BNXT_DEFAULT_TX_RING_SIZE      511
 
 #define MAX_TPA                64
 
@@ -523,10 +523,16 @@ struct bnxt_ring_struct {
 
 struct tx_push_bd {
        __le32                  doorbell;
-       struct tx_bd            txbd1;
+       __le32                  tx_bd_len_flags_type;
+       u32                     tx_bd_opaque;
        struct tx_bd_ext        txbd2;
 };
 
+struct tx_push_buffer {
+       struct tx_push_bd       push_bd;
+       u32                     data[25];
+};
+
 struct bnxt_tx_ring_info {
        struct bnxt_napi        *bnapi;
        u16                     tx_prod;
@@ -538,8 +544,9 @@ struct bnxt_tx_ring_info {
 
        dma_addr_t              tx_desc_mapping[MAX_TX_PAGES];
 
-       struct tx_push_bd       *tx_push;
+       struct tx_push_buffer   *tx_push;
        dma_addr_t              tx_push_mapping;
+       __le64                  data_mapping;
 
 #define BNXT_DEV_STATE_CLOSING 0x1
        u32                     dev_state;
index 922b898e7a320e39da102a27818a68c84af08e79..3238817dfd5f79e2bd6a13b97f2483ce7eb4d663 100644 (file)
@@ -486,15 +486,8 @@ static u32 bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info)
                speed_mask |= SUPPORTED_2500baseX_Full;
        if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
                speed_mask |= SUPPORTED_10000baseT_Full;
-       /* TODO: support 25GB, 50GB with different cable type */
-       if (fw_speeds & BNXT_LINK_SPEED_MSK_20GB)
-               speed_mask |= SUPPORTED_20000baseMLD2_Full |
-                       SUPPORTED_20000baseKR2_Full;
        if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
-               speed_mask |= SUPPORTED_40000baseKR4_Full |
-                       SUPPORTED_40000baseCR4_Full |
-                       SUPPORTED_40000baseSR4_Full |
-                       SUPPORTED_40000baseLR4_Full;
+               speed_mask |= SUPPORTED_40000baseCR4_Full;
 
        return speed_mask;
 }
@@ -514,15 +507,8 @@ static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
                speed_mask |= ADVERTISED_2500baseX_Full;
        if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
                speed_mask |= ADVERTISED_10000baseT_Full;
-       /* TODO: how to advertise 20, 25, 40, 50GB with different cable type ?*/
-       if (fw_speeds & BNXT_LINK_SPEED_MSK_20GB)
-               speed_mask |= ADVERTISED_20000baseMLD2_Full |
-                             ADVERTISED_20000baseKR2_Full;
        if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
-               speed_mask |= ADVERTISED_40000baseKR4_Full |
-                             ADVERTISED_40000baseCR4_Full |
-                             ADVERTISED_40000baseSR4_Full |
-                             ADVERTISED_40000baseLR4_Full;
+               speed_mask |= ADVERTISED_40000baseCR4_Full;
        return speed_mask;
 }
 
@@ -557,11 +543,12 @@ static int bnxt_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        u16 ethtool_speed;
 
        cmd->supported = bnxt_fw_to_ethtool_support_spds(link_info);
+       cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 
        if (link_info->auto_link_speeds)
                cmd->supported |= SUPPORTED_Autoneg;
 
-       if (BNXT_AUTO_MODE(link_info->auto_mode)) {
+       if (link_info->autoneg) {
                cmd->advertising =
                        bnxt_fw_to_ethtool_advertised_spds(link_info);
                cmd->advertising |= ADVERTISED_Autoneg;
@@ -570,28 +557,16 @@ static int bnxt_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                cmd->autoneg = AUTONEG_DISABLE;
                cmd->advertising = 0;
        }
-       if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
+       if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) {
                if ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
                    BNXT_LINK_PAUSE_BOTH) {
                        cmd->advertising |= ADVERTISED_Pause;
-                       cmd->supported |= SUPPORTED_Pause;
                } else {
                        cmd->advertising |= ADVERTISED_Asym_Pause;
-                       cmd->supported |= SUPPORTED_Asym_Pause;
                        if (link_info->auto_pause_setting &
                            BNXT_LINK_PAUSE_RX)
                                cmd->advertising |= ADVERTISED_Pause;
                }
-       } else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
-               if ((link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
-                   BNXT_LINK_PAUSE_BOTH) {
-                       cmd->supported |= SUPPORTED_Pause;
-               } else {
-                       cmd->supported |= SUPPORTED_Asym_Pause;
-                       if (link_info->force_pause_setting &
-                           BNXT_LINK_PAUSE_RX)
-                               cmd->supported |= SUPPORTED_Pause;
-               }
        }
 
        cmd->port = PORT_NONE;
@@ -670,6 +645,9 @@ static u16 bnxt_get_fw_auto_link_speeds(u32 advertising)
        if (advertising & ADVERTISED_10000baseT_Full)
                fw_speed_mask |= BNXT_LINK_SPEED_MSK_10GB;
 
+       if (advertising & ADVERTISED_40000baseCR4_Full)
+               fw_speed_mask |= BNXT_LINK_SPEED_MSK_40GB;
+
        return fw_speed_mask;
 }
 
@@ -729,7 +707,7 @@ static int bnxt_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                speed = ethtool_cmd_speed(cmd);
                link_info->req_link_speed = bnxt_get_fw_speed(dev, speed);
                link_info->req_duplex = BNXT_LINK_DUPLEX_FULL;
-               link_info->autoneg &= ~BNXT_AUTONEG_SPEED;
+               link_info->autoneg = 0;
                link_info->advertising = 0;
        }
 
@@ -748,8 +726,7 @@ static void bnxt_get_pauseparam(struct net_device *dev,
 
        if (BNXT_VF(bp))
                return;
-       epause->autoneg = !!(link_info->auto_pause_setting &
-                            BNXT_LINK_PAUSE_BOTH);
+       epause->autoneg = !!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL);
        epause->rx_pause = ((link_info->pause & BNXT_LINK_PAUSE_RX) != 0);
        epause->tx_pause = ((link_info->pause & BNXT_LINK_PAUSE_TX) != 0);
 }
@@ -765,6 +742,9 @@ static int bnxt_set_pauseparam(struct net_device *dev,
                return rc;
 
        if (epause->autoneg) {
+               if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
+                       return -EINVAL;
+
                link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
                link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_BOTH;
        } else {
index b15a60d787c70354b780d9214e026dd2fbaebdec..d7e01a74e9275b29a915e4c5417c7116ffddcabf 100644 (file)
@@ -2445,8 +2445,7 @@ static void bcmgenet_irq_task(struct work_struct *work)
        }
 
        /* Link UP/DOWN event */
-       if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
-           (priv->irq0_stat & UMAC_IRQ_LINK_EVENT)) {
+       if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
                phy_mac_interrupt(priv->phydev,
                                  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
                priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
index 0d775964b06016bfcc66352c99c0164c6957d773..457c3bc8cfff49654b45e879495e5cbfa88428d6 100644 (file)
@@ -401,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
         * Ethernet MAC ISRs
         */
        if (priv->internal_phy)
-               priv->mii_bus->irq[phydev->mdio.addr] = PHY_IGNORE_INTERRUPT;
+               priv->phydev->irq = PHY_IGNORE_INTERRUPT;
 
        return 0;
 }
index 9293675df7ba99c44b2f77c739fbe12da0e86093..3010080cfeee350a2e523ce0c26aeb427953f21a 100644 (file)
@@ -7831,6 +7831,14 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
        return ret;
 }
 
+static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb)
+{
+       /* Check if we will never have enough descriptors,
+        * as gso_segs can be more than current ring size
+        */
+       return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3;
+}
+
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
@@ -7934,14 +7942,19 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
                 * vlan encapsulated.
                 */
                if (skb->protocol == htons(ETH_P_8021Q) ||
-                   skb->protocol == htons(ETH_P_8021AD))
-                       return tg3_tso_bug(tp, tnapi, txq, skb);
+                   skb->protocol == htons(ETH_P_8021AD)) {
+                       if (tg3_tso_bug_gso_check(tnapi, skb))
+                               return tg3_tso_bug(tp, tnapi, txq, skb);
+                       goto drop;
+               }
 
                if (!skb_is_gso_v6(skb)) {
                        if (unlikely((ETH_HLEN + hdr_len) > 80) &&
-                           tg3_flag(tp, TSO_BUG))
-                               return tg3_tso_bug(tp, tnapi, txq, skb);
-
+                           tg3_flag(tp, TSO_BUG)) {
+                               if (tg3_tso_bug_gso_check(tnapi, skb))
+                                       return tg3_tso_bug(tp, tnapi, txq, skb);
+                               goto drop;
+                       }
                        ip_csum = iph->check;
                        ip_tot_len = iph->tot_len;
                        iph->check = 0;
@@ -8073,7 +8086,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
        if (would_hit_hwbug) {
                tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
 
-               if (mss) {
+               if (mss && tg3_tso_bug_gso_check(tnapi, skb)) {
                        /* If it's a TSO packet, do GSO instead of
                         * allocating and copying to a large linear SKB
                         */
@@ -12016,7 +12029,7 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
        int ret;
        u32 offset, len, b_offset, odd_len;
        u8 *buf;
-       __be32 start, end;
+       __be32 start = 0, end;
 
        if (tg3_flag(tp, NO_NVRAM) ||
            eeprom->magic != TG3_EEPROM_MAGIC)
index 9d9984a87d4227f0b79c01e5ff1a3877da41b7e3..50c94104f19cc5d90b5fb3d4643bae1d4624e127 100644 (file)
@@ -2823,7 +2823,7 @@ static int macb_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct device_node *phy_node;
        const struct macb_config *macb_config = NULL;
-       struct clk *pclk, *hclk, *tx_clk;
+       struct clk *pclk, *hclk = NULL, *tx_clk = NULL;
        unsigned int queue_mask, num_queues;
        struct macb_platform_data *pdata;
        bool native_io;
index b89504405b7207f12663aee0d6d3ecf78d392235..34d269cd5579b7e0f1c1afd29bffb04c8711d71d 100644 (file)
@@ -1526,7 +1526,6 @@ static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
                                struct timespec64 *ts)
 {
        u64 ns;
-       u32 remainder;
        unsigned long flags;
        struct lio *lio = container_of(ptp, struct lio, ptp_info);
        struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
@@ -1536,8 +1535,7 @@ static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
        ns += lio->ptp_adjust;
        spin_unlock_irqrestore(&lio->ptp_lock, flags);
 
-       ts->tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder);
-       ts->tv_nsec = remainder;
+       *ts = ns_to_timespec64(ns);
 
        return 0;
 }
@@ -1685,7 +1683,7 @@ static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
        dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
        /* droq creation and local register settings. */
        ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
-       if (ret_val == -1)
+       if (ret_val < 0)
                return ret_val;
 
        if (ret_val == 1) {
@@ -2526,7 +2524,7 @@ static void handle_timestamp(struct octeon_device *oct,
 
        octeon_swap_8B_data(&resp->timestamp, 1);
 
-       if (unlikely((skb_shinfo(skb)->tx_flags | SKBTX_IN_PROGRESS) != 0)) {
+       if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) != 0)) {
                struct skb_shared_hwtstamps ts;
                u64 ns = resp->timestamp;
 
index 4dba86eaa04559649b012cbeff8707c47a176927..174072b3740b4a15f86f292a51a28d8239ee8c0a 100644 (file)
@@ -983,5 +983,5 @@ int octeon_create_droq(struct octeon_device *oct,
 
 create_droq_fail:
        octeon_delete_droq(oct, q_no);
-       return -1;
+       return -ENOMEM;
 }
index c24cb2a86a42f997c31536947ff0ad7f37bd577c..a009bc30dc4dd36e51c9a1269c28dcb10b8de23f 100644 (file)
@@ -574,8 +574,7 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,
 
 static void nicvf_rcv_pkt_handler(struct net_device *netdev,
                                  struct napi_struct *napi,
-                                 struct cmp_queue *cq,
-                                 struct cqe_rx_t *cqe_rx, int cqe_type)
+                                 struct cqe_rx_t *cqe_rx)
 {
        struct sk_buff *skb;
        struct nicvf *nic = netdev_priv(netdev);
@@ -591,7 +590,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
        }
 
        /* Check for errors */
-       err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
+       err = nicvf_check_cqe_rx_errs(nic, cqe_rx);
        if (err && !cqe_rx->rb_cnt)
                return;
 
@@ -682,8 +681,7 @@ loop:
                           cq_idx, cq_desc->cqe_type);
                switch (cq_desc->cqe_type) {
                case CQE_TYPE_RX:
-                       nicvf_rcv_pkt_handler(netdev, napi, cq,
-                                             cq_desc, CQE_TYPE_RX);
+                       nicvf_rcv_pkt_handler(netdev, napi, cq_desc);
                        work_done++;
                break;
                case CQE_TYPE_SEND:
@@ -1125,7 +1123,6 @@ int nicvf_stop(struct net_device *netdev)
 
        /* Clear multiqset info */
        nic->pnicvf = nic;
-       nic->sqs_count = 0;
 
        return 0;
 }
@@ -1354,6 +1351,9 @@ void nicvf_update_stats(struct nicvf *nic)
        drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
                                  stats->tx_bcast_frames_ok +
                                  stats->tx_mcast_frames_ok;
+       drv_stats->rx_frames_ok = stats->rx_ucast_frames +
+                                 stats->rx_bcast_frames +
+                                 stats->rx_mcast_frames;
        drv_stats->rx_drops = stats->rx_drop_red +
                              stats->rx_drop_overrun;
        drv_stats->tx_drops = stats->tx_drops;
@@ -1538,6 +1538,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        nicvf_send_vf_struct(nic);
 
+       if (!pass1_silicon(nic->pdev))
+               nic->hw_tso = true;
+
        /* Check if this VF is in QS only mode */
        if (nic->sqs_mode)
                return 0;
@@ -1557,9 +1560,6 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 
-       if (!pass1_silicon(nic->pdev))
-               nic->hw_tso = true;
-
        netdev->netdev_ops = &nicvf_netdev_ops;
        netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 
index d0d1b54900610046955a390f6f5c87ce45cbf1df..767347b1f63122271a74363421cb78f717218948 100644 (file)
@@ -1329,16 +1329,12 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
 }
 
 /* Check for errors in the receive cmp.queue entry */
-int nicvf_check_cqe_rx_errs(struct nicvf *nic,
-                           struct cmp_queue *cq, struct cqe_rx_t *cqe_rx)
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 {
        struct nicvf_hw_stats *stats = &nic->hw_stats;
-       struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
 
-       if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
-               drv_stats->rx_frames_ok++;
+       if (!cqe_rx->err_level && !cqe_rx->err_opcode)
                return 0;
-       }
 
        if (netif_msg_rx_err(nic))
                netdev_err(nic->netdev,
index c5030a7f213ae57e9799142958e5d4fe64a8fbf5..6673e1133523881a4c1719e7d6ca4c123f7a8e1e 100644 (file)
@@ -338,8 +338,7 @@ u64  nicvf_queue_reg_read(struct nicvf *nic,
 /* Stats */
 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx);
 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx);
-int nicvf_check_cqe_rx_errs(struct nicvf *nic,
-                           struct cmp_queue *cq, struct cqe_rx_t *cqe_rx);
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
 int nicvf_check_cqe_tx_errs(struct nicvf *nic,
                            struct cmp_queue *cq, struct cqe_send_t *cqe_tx);
 #endif /* NICVF_QUEUES_H */
index ee04caa6c4d8522d0cc34ec15ec7575bfbdc4afa..a89721fad633ec75a3ed46a7fc22d8988cbdada4 100644 (file)
@@ -681,6 +681,24 @@ int t3_seeprom_wp(struct adapter *adapter, int enable)
        return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
 }
 
+static int vpdstrtouint(char *s, int len, unsigned int base, unsigned int *val)
+{
+       char tok[len + 1];
+
+       memcpy(tok, s, len);
+       tok[len] = 0;
+       return kstrtouint(strim(tok), base, val);
+}
+
+static int vpdstrtou16(char *s, int len, unsigned int base, u16 *val)
+{
+       char tok[len + 1];
+
+       memcpy(tok, s, len);
+       tok[len] = 0;
+       return kstrtou16(strim(tok), base, val);
+}
+
 /**
  *     get_vpd_params - read VPD parameters from VPD EEPROM
  *     @adapter: adapter to read
@@ -709,19 +727,19 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
                        return ret;
        }
 
-       ret = kstrtouint(vpd.cclk_data, 10, &p->cclk);
+       ret = vpdstrtouint(vpd.cclk_data, vpd.cclk_len, 10, &p->cclk);
        if (ret)
                return ret;
-       ret = kstrtouint(vpd.mclk_data, 10, &p->mclk);
+       ret = vpdstrtouint(vpd.mclk_data, vpd.mclk_len, 10, &p->mclk);
        if (ret)
                return ret;
-       ret = kstrtouint(vpd.uclk_data, 10, &p->uclk);
+       ret = vpdstrtouint(vpd.uclk_data, vpd.uclk_len, 10, &p->uclk);
        if (ret)
                return ret;
-       ret = kstrtouint(vpd.mdc_data, 10, &p->mdc);
+       ret = vpdstrtouint(vpd.mdc_data, vpd.mdc_len, 10, &p->mdc);
        if (ret)
                return ret;
-       ret = kstrtouint(vpd.mt_data, 10, &p->mem_timing);
+       ret = vpdstrtouint(vpd.mt_data, vpd.mt_len, 10, &p->mem_timing);
        if (ret)
                return ret;
        memcpy(p->sn, vpd.sn_data, SERNUM_LEN);
@@ -733,10 +751,12 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
        } else {
                p->port_type[0] = hex_to_bin(vpd.port0_data[0]);
                p->port_type[1] = hex_to_bin(vpd.port1_data[0]);
-               ret = kstrtou16(vpd.xaui0cfg_data, 16, &p->xauicfg[0]);
+               ret = vpdstrtou16(vpd.xaui0cfg_data, vpd.xaui0cfg_len, 16,
+                                 &p->xauicfg[0]);
                if (ret)
                        return ret;
-               ret = kstrtou16(vpd.xaui1cfg_data, 16, &p->xauicfg[1]);
+               ret = vpdstrtou16(vpd.xaui1cfg_data, vpd.xaui1cfg_len, 16,
+                                 &p->xauicfg[1]);
                if (ret)
                        return ret;
        }
index a8dda635456dc1f8fb0d85799c3f605ea7cda39a..06bc2d2e7a7315ceb0f544aac1fcaec1af51fbc6 100644 (file)
@@ -165,6 +165,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x5098), /* Custom 2x40G QSFP */
        CH_PCI_ID_TABLE_FENTRY(0x5099), /* Custom 2x40G QSFP */
        CH_PCI_ID_TABLE_FENTRY(0x509a), /* Custom T520-CR */
+       CH_PCI_ID_TABLE_FENTRY(0x509b), /* Custom T540-CR LOM */
 
        /* T6 adapters:
         */
index 1671fa3332c2d3011db8c308a17ea5a9bf8b3fb2..7ba6d530b0c0ab6e3d1c3d4ea21b0b950ebe3c3f 100644 (file)
@@ -33,7 +33,7 @@
 
 #define DRV_NAME               "enic"
 #define DRV_DESCRIPTION                "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION            "2.3.0.12"
+#define DRV_VERSION            "2.3.0.20"
 #define DRV_COPYRIGHT          "Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX          6
index 1ffd1050860bb5cde576fd291e0651b351d578ff..1fdf5fe12a9562251e3bcee9fe635872053357a2 100644 (file)
@@ -298,7 +298,8 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
                          int wait)
 {
        struct devcmd2_controller *dc2c = vdev->devcmd2;
-       struct devcmd2_result *result = dc2c->result + dc2c->next_result;
+       struct devcmd2_result *result;
+       u8 color;
        unsigned int i;
        int delay, err;
        u32 fetch_index, new_posted;
@@ -336,13 +337,17 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
        if (dc2c->cmd_ring[posted].flags & DEVCMD2_FNORESULT)
                return 0;
 
+       result = dc2c->result + dc2c->next_result;
+       color = dc2c->color;
+
+       dc2c->next_result++;
+       if (dc2c->next_result == dc2c->result_size) {
+               dc2c->next_result = 0;
+               dc2c->color = dc2c->color ? 0 : 1;
+       }
+
        for (delay = 0; delay < wait; delay++) {
-               if (result->color == dc2c->color) {
-                       dc2c->next_result++;
-                       if (dc2c->next_result == dc2c->result_size) {
-                               dc2c->next_result = 0;
-                               dc2c->color = dc2c->color ? 0 : 1;
-                       }
+               if (result->color == color) {
                        if (result->error) {
                                err = result->error;
                                if (err != ERR_ECMDUNKNOWN ||
index cf94b72dbacd942b9c56d2bc669854bea7c2ac18..48d91941408d8fe3aa148c632ff33c1ea4201335 100644 (file)
@@ -128,7 +128,6 @@ struct board_info {
        struct resource *data_res;
        struct resource *addr_req;   /* resources requested */
        struct resource *data_req;
-       struct resource *irq_res;
 
        int              irq_wake;
 
@@ -1300,22 +1299,16 @@ static int
 dm9000_open(struct net_device *dev)
 {
        struct board_info *db = netdev_priv(dev);
-       unsigned long irqflags = db->irq_res->flags & IRQF_TRIGGER_MASK;
 
        if (netif_msg_ifup(db))
                dev_dbg(db->dev, "enabling %s\n", dev->name);
 
-       /* If there is no IRQ type specified, default to something that
-        * may work, and tell the user that this is a problem */
-
-       if (irqflags == IRQF_TRIGGER_NONE)
-               irqflags = irq_get_trigger_type(dev->irq);
-
-       if (irqflags == IRQF_TRIGGER_NONE)
+       /* If there is no IRQ type specified, tell the user that this is a
+        * problem
+        */
+       if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
                dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
 
-       irqflags |= IRQF_SHARED;
-
        /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
        iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
        mdelay(1); /* delay needs by DM9000B */
@@ -1323,7 +1316,8 @@ dm9000_open(struct net_device *dev)
        /* Initialize DM9000 board */
        dm9000_init_dm9000(dev);
 
-       if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev))
+       if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
+                       dev->name, dev))
                return -EAGAIN;
        /* Now that we have an interrupt handler hooked up we can unmask
         * our interrupts
@@ -1500,15 +1494,22 @@ dm9000_probe(struct platform_device *pdev)
 
        db->addr_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        db->data_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       db->irq_res  = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 
-       if (db->addr_res == NULL || db->data_res == NULL ||
-           db->irq_res == NULL) {
-               dev_err(db->dev, "insufficient resources\n");
+       if (!db->addr_res || !db->data_res) {
+               dev_err(db->dev, "insufficient resources addr=%p data=%p\n",
+                       db->addr_res, db->data_res);
                ret = -ENOENT;
                goto out;
        }
 
+       ndev->irq = platform_get_irq(pdev, 0);
+       if (ndev->irq < 0) {
+               dev_err(db->dev, "interrupt resource unavailable: %d\n",
+                       ndev->irq);
+               ret = ndev->irq;
+               goto out;
+       }
+
        db->irq_wake = platform_get_irq(pdev, 1);
        if (db->irq_wake >= 0) {
                dev_dbg(db->dev, "wakeup irq %d\n", db->irq_wake);
@@ -1570,7 +1571,6 @@ dm9000_probe(struct platform_device *pdev)
 
        /* fill in parameters for net-dev structure */
        ndev->base_addr = (unsigned long)db->io_addr;
-       ndev->irq       = db->irq_res->start;
 
        /* ensure at least we have a default set of IO routines */
        dm9000_set_io(db, iosize);
index 48ecbc8aaaea2983fdce3b2c9bb08d0578059f41..b423ad380b6a3ff0b31621cf98152143030c2ed6 100644 (file)
@@ -18,6 +18,7 @@ if NET_VENDOR_EZCHIP
 config EZCHIP_NPS_MANAGEMENT_ENET
        tristate "EZchip NPS management enet support"
        depends on OF_IRQ && OF_NET
+       depends on HAS_IOMEM
        ---help---
          Simple LAN device for debug or management purposes.
          Device supports interrupts for RX and TX(completion).
index 4097c58d17a7527a3654fc6664e575d97575d4fe..cbe21dc7e37ee41dab11425d673ba0d20409074a 100644 (file)
@@ -4,6 +4,9 @@
 
 obj-$(CONFIG_FEC) += fec.o
 fec-objs :=fec_main.o fec_ptp.o
+CFLAGS_fec_main.o := -D__CHECK_ENDIAN__
+CFLAGS_fec_ptp.o := -D__CHECK_ENDIAN__
+
 obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx.o
 ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y)
        obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx_phy.o
index 99d33e2d35e6c2c219fbd06f34546619a01a586d..2106d72c91dc610fdf7b368e6dbf0a99c98c0a3d 100644 (file)
@@ -19,8 +19,7 @@
 #include <linux/timecounter.h>
 
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-    defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
-    defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 /*
  *     Just figures, Motorola would have to change the offsets for
  *     registers in the same peripheral device on different models
 
 /*
  *     Define the buffer descriptor structure.
+ *
+ *     Evidently, ARM SoCs have the FEC block generated in a
+ *     little endian mode so adjust endianness accordingly.
  */
-#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+#if defined(CONFIG_ARM)
+#define fec32_to_cpu le32_to_cpu
+#define fec16_to_cpu le16_to_cpu
+#define cpu_to_fec32 cpu_to_le32
+#define cpu_to_fec16 cpu_to_le16
+#define __fec32 __le32
+#define __fec16 __le16
+
 struct bufdesc {
-       unsigned short cbd_datlen;      /* Data length */
-       unsigned short cbd_sc;  /* Control and status info */
-       unsigned long cbd_bufaddr;      /* Buffer address */
+       __fec16 cbd_datlen;     /* Data length */
+       __fec16 cbd_sc;         /* Control and status info */
+       __fec32 cbd_bufaddr;    /* Buffer address */
 };
 #else
+#define fec32_to_cpu be32_to_cpu
+#define fec16_to_cpu be16_to_cpu
+#define cpu_to_fec32 cpu_to_be32
+#define cpu_to_fec16 cpu_to_be16
+#define __fec32 __be32
+#define __fec16 __be16
+
 struct bufdesc {
-       unsigned short  cbd_sc;                 /* Control and status info */
-       unsigned short  cbd_datlen;             /* Data length */
-       unsigned long   cbd_bufaddr;            /* Buffer address */
+       __fec16 cbd_sc;         /* Control and status info */
+       __fec16 cbd_datlen;     /* Data length */
+       __fec32 cbd_bufaddr;    /* Buffer address */
 };
 #endif
 
 struct bufdesc_ex {
        struct bufdesc desc;
-       unsigned long cbd_esc;
-       unsigned long cbd_prot;
-       unsigned long cbd_bdu;
-       unsigned long ts;
-       unsigned short res0[4];
+       __fec32 cbd_esc;
+       __fec32 cbd_prot;
+       __fec32 cbd_bdu;
+       __fec32 ts;
+       __fec16 res0[4];
 };
 
 /*
index 502da6f48f95e5e52a43ce5666a5c265fcd9cfe3..41c81f6ec630a289c2c280ae068f429a8da9e21e 100644 (file)
@@ -332,11 +332,13 @@ static void fec_dump(struct net_device *ndev)
        bdp = txq->tx_bd_base;
 
        do {
-               pr_info("%3u %c%c 0x%04x 0x%08lx %4u %p\n",
+               pr_info("%3u %c%c 0x%04x 0x%08x %4u %p\n",
                        index,
                        bdp == txq->cur_tx ? 'S' : ' ',
                        bdp == txq->dirty_tx ? 'H' : ' ',
-                       bdp->cbd_sc, bdp->cbd_bufaddr, bdp->cbd_datlen,
+                       fec16_to_cpu(bdp->cbd_sc),
+                       fec32_to_cpu(bdp->cbd_bufaddr),
+                       fec16_to_cpu(bdp->cbd_datlen),
                        txq->tx_skbuff[index]);
                bdp = fec_enet_get_nextdesc(bdp, fep, 0);
                index++;
@@ -389,7 +391,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
                ebdp = (struct bufdesc_ex *)bdp;
 
-               status = bdp->cbd_sc;
+               status = fec16_to_cpu(bdp->cbd_sc);
                status &= ~BD_ENET_TX_STATS;
                status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
                frag_len = skb_shinfo(skb)->frags[frag].size;
@@ -411,7 +413,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                        if (skb->ip_summed == CHECKSUM_PARTIAL)
                                estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                        ebdp->cbd_bdu = 0;
-                       ebdp->cbd_esc = estatus;
+                       ebdp->cbd_esc = cpu_to_fec32(estatus);
                }
 
                bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
@@ -435,9 +437,9 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                        goto dma_mapping_error;
                }
 
-               bdp->cbd_bufaddr = addr;
-               bdp->cbd_datlen = frag_len;
-               bdp->cbd_sc = status;
+               bdp->cbd_bufaddr = cpu_to_fec32(addr);
+               bdp->cbd_datlen = cpu_to_fec16(frag_len);
+               bdp->cbd_sc = cpu_to_fec16(status);
        }
 
        return bdp;
@@ -445,8 +447,8 @@ dma_mapping_error:
        bdp = txq->cur_tx;
        for (i = 0; i < frag; i++) {
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
-               dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-                               bdp->cbd_datlen, DMA_TO_DEVICE);
+               dma_unmap_single(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr),
+                                fec16_to_cpu(bdp->cbd_datlen), DMA_TO_DEVICE);
        }
        return ERR_PTR(-ENOMEM);
 }
@@ -483,7 +485,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
        /* Fill in a Tx ring entry */
        bdp = txq->cur_tx;
        last_bdp = bdp;
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
 
        /* Set buffer length and buffer pointer */
@@ -539,21 +541,21 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
 
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
        index = fec_enet_get_bd_index(txq->tx_bd_base, last_bdp, fep);
        /* Save skb pointer */
        txq->tx_skbuff[index] = skb;
 
-       bdp->cbd_datlen = buflen;
-       bdp->cbd_bufaddr = addr;
+       bdp->cbd_datlen = cpu_to_fec16(buflen);
+       bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
        /* Send it on its way.  Tell FEC it's ready, interrupt when done,
         * it's the last BD of the frame, and to put the CRC on the end.
         */
        status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        /* If this was the last BD in the ring, start at the beginning again. */
        bdp = fec_enet_get_nextdesc(last_bdp, fep, queue);
@@ -585,7 +587,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
        unsigned int estatus = 0;
        dma_addr_t addr;
 
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
 
        status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
@@ -607,8 +609,8 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
                return NETDEV_TX_BUSY;
        }
 
-       bdp->cbd_datlen = size;
-       bdp->cbd_bufaddr = addr;
+       bdp->cbd_datlen = cpu_to_fec16(size);
+       bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
        if (fep->bufdesc_ex) {
                if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -616,7 +618,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
        /* Handle the last BD specially */
@@ -625,10 +627,10 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
        if (is_last) {
                status |= BD_ENET_TX_INTR;
                if (fep->bufdesc_ex)
-                       ebdp->cbd_esc |= BD_ENET_TX_INT;
+                       ebdp->cbd_esc |= cpu_to_fec32(BD_ENET_TX_INT);
        }
 
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        return 0;
 }
@@ -647,7 +649,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
        unsigned short status;
        unsigned int estatus = 0;
 
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
        status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
 
@@ -671,8 +673,8 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
                }
        }
 
-       bdp->cbd_bufaddr = dmabuf;
-       bdp->cbd_datlen = hdr_len;
+       bdp->cbd_bufaddr = cpu_to_fec32(dmabuf);
+       bdp->cbd_datlen = cpu_to_fec16(hdr_len);
 
        if (fep->bufdesc_ex) {
                if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -680,10 +682,10 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        return 0;
 }
@@ -823,15 +825,15 @@ static void fec_enet_bd_init(struct net_device *dev)
 
                        /* Initialize the BD for every fragment in the page. */
                        if (bdp->cbd_bufaddr)
-                               bdp->cbd_sc = BD_ENET_RX_EMPTY;
+                               bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
                        else
-                               bdp->cbd_sc = 0;
+                               bdp->cbd_sc = cpu_to_fec16(0);
                        bdp = fec_enet_get_nextdesc(bdp, fep, q);
                }
 
                /* Set the last buffer to wrap */
                bdp = fec_enet_get_prevdesc(bdp, fep, q);
-               bdp->cbd_sc |= BD_SC_WRAP;
+               bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
                rxq->cur_rx = rxq->rx_bd_base;
        }
@@ -844,18 +846,18 @@ static void fec_enet_bd_init(struct net_device *dev)
 
                for (i = 0; i < txq->tx_ring_size; i++) {
                        /* Initialize the BD for every fragment in the page. */
-                       bdp->cbd_sc = 0;
+                       bdp->cbd_sc = cpu_to_fec16(0);
                        if (txq->tx_skbuff[i]) {
                                dev_kfree_skb_any(txq->tx_skbuff[i]);
                                txq->tx_skbuff[i] = NULL;
                        }
-                       bdp->cbd_bufaddr = 0;
+                       bdp->cbd_bufaddr = cpu_to_fec32(0);
                        bdp = fec_enet_get_nextdesc(bdp, fep, q);
                }
 
                /* Set the last buffer to wrap */
                bdp = fec_enet_get_prevdesc(bdp, fep, q);
-               bdp->cbd_sc |= BD_SC_WRAP;
+               bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
                txq->dirty_tx = bdp;
        }
 }
@@ -947,8 +949,10 @@ fec_restart(struct net_device *ndev)
         */
        if (fep->quirks & FEC_QUIRK_ENET_MAC) {
                memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
-               writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);
-               writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);
+               writel((__force u32)cpu_to_be32(temp_mac[0]),
+                      fep->hwp + FEC_ADDR_LOW);
+               writel((__force u32)cpu_to_be32(temp_mac[1]),
+                      fep->hwp + FEC_ADDR_HIGH);
        }
 
        /* Clear any outstanding interrupt. */
@@ -1222,7 +1226,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
        while (bdp != READ_ONCE(txq->cur_tx)) {
                /* Order the load of cur_tx and cbd_sc */
                rmb();
-               status = READ_ONCE(bdp->cbd_sc);
+               status = fec16_to_cpu(READ_ONCE(bdp->cbd_sc));
                if (status & BD_ENET_TX_READY)
                        break;
 
@@ -1230,10 +1234,12 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 
                skb = txq->tx_skbuff[index];
                txq->tx_skbuff[index] = NULL;
-               if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr))
-                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-                                       bdp->cbd_datlen, DMA_TO_DEVICE);
-               bdp->cbd_bufaddr = 0;
+               if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+                       dma_unmap_single(&fep->pdev->dev,
+                                        fec32_to_cpu(bdp->cbd_bufaddr),
+                                        fec16_to_cpu(bdp->cbd_datlen),
+                                        DMA_TO_DEVICE);
+               bdp->cbd_bufaddr = cpu_to_fec32(0);
                if (!skb) {
                        bdp = fec_enet_get_nextdesc(bdp, fep, queue_id);
                        continue;
@@ -1264,7 +1270,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
                        struct skb_shared_hwtstamps shhwtstamps;
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-                       fec_enet_hwtstamp(fep, ebdp->ts, &shhwtstamps);
+                       fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
                        skb_tstamp_tx(skb, &shhwtstamps);
                }
 
@@ -1324,10 +1330,8 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff
        if (off)
                skb_reserve(skb, fep->rx_align + 1 - off);
 
-       bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data,
-                                         FEC_ENET_RX_FRSIZE - fep->rx_align,
-                                         DMA_FROM_DEVICE);
-       if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+       bdp->cbd_bufaddr = cpu_to_fec32(dma_map_single(&fep->pdev->dev, skb->data, FEC_ENET_RX_FRSIZE - fep->rx_align, DMA_FROM_DEVICE));
+       if (dma_mapping_error(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr))) {
                if (net_ratelimit())
                        netdev_err(ndev, "Rx DMA memory map failed\n");
                return -ENOMEM;
@@ -1349,7 +1353,8 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
        if (!new_skb)
                return false;
 
-       dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
+       dma_sync_single_for_cpu(&fep->pdev->dev,
+                               fec32_to_cpu(bdp->cbd_bufaddr),
                                FEC_ENET_RX_FRSIZE - fep->rx_align,
                                DMA_FROM_DEVICE);
        if (!swap)
@@ -1396,7 +1401,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
         */
        bdp = rxq->cur_rx;
 
-       while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
+       while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
 
                if (pkt_received >= budget)
                        break;
@@ -1438,7 +1443,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
                /* Process the incoming frame. */
                ndev->stats.rx_packets++;
-               pkt_len = bdp->cbd_datlen;
+               pkt_len = fec16_to_cpu(bdp->cbd_datlen);
                ndev->stats.rx_bytes += pkt_len;
 
                index = fec_enet_get_bd_index(rxq->rx_bd_base, bdp, fep);
@@ -1456,7 +1461,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                                ndev->stats.rx_dropped++;
                                goto rx_processing_done;
                        }
-                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+                       dma_unmap_single(&fep->pdev->dev,
+                                        fec32_to_cpu(bdp->cbd_bufaddr),
                                         FEC_ENET_RX_FRSIZE - fep->rx_align,
                                         DMA_FROM_DEVICE);
                }
@@ -1475,7 +1481,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                /* If this is a VLAN packet remove the VLAN Tag */
                vlan_packet_rcvd = false;
                if ((ndev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-                       fep->bufdesc_ex && (ebdp->cbd_esc & BD_ENET_RX_VLAN)) {
+                   fep->bufdesc_ex &&
+                   (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN))) {
                        /* Push and remove the vlan tag */
                        struct vlan_hdr *vlan_header =
                                        (struct vlan_hdr *) (data + ETH_HLEN);
@@ -1491,12 +1498,12 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
                /* Get receive timestamp from the skb */
                if (fep->hwts_rx_en && fep->bufdesc_ex)
-                       fec_enet_hwtstamp(fep, ebdp->ts,
+                       fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts),
                                          skb_hwtstamps(skb));
 
                if (fep->bufdesc_ex &&
                    (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) {
-                       if (!(ebdp->cbd_esc & FLAG_RX_CSUM_ERROR)) {
+                       if (!(ebdp->cbd_esc & cpu_to_fec32(FLAG_RX_CSUM_ERROR))) {
                                /* don't check it */
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
                        } else {
@@ -1513,7 +1520,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                napi_gro_receive(&fep->napi, skb);
 
                if (is_copybreak) {
-                       dma_sync_single_for_device(&fep->pdev->dev, bdp->cbd_bufaddr,
+                       dma_sync_single_for_device(&fep->pdev->dev,
+                                                  fec32_to_cpu(bdp->cbd_bufaddr),
                                                   FEC_ENET_RX_FRSIZE - fep->rx_align,
                                                   DMA_FROM_DEVICE);
                } else {
@@ -1527,12 +1535,12 @@ rx_processing_done:
 
                /* Mark the buffer empty */
                status |= BD_ENET_RX_EMPTY;
-               bdp->cbd_sc = status;
+               bdp->cbd_sc = cpu_to_fec16(status);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-                       ebdp->cbd_esc = BD_ENET_RX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
                        ebdp->cbd_prot = 0;
                        ebdp->cbd_bdu = 0;
                }
@@ -2145,8 +2153,7 @@ static int fec_enet_get_regs_len(struct net_device *ndev)
 
 /* List of registers that can be safety be read to dump them with ethtool */
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-       defined(CONFIG_M520x) || defined(CONFIG_M532x) ||               \
-       defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+       defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 static u32 fec_enet_register_offset[] = {
        FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
        FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
@@ -2662,7 +2669,7 @@ static void fec_enet_free_buffers(struct net_device *ndev)
                        rxq->rx_skbuff[i] = NULL;
                        if (skb) {
                                dma_unmap_single(&fep->pdev->dev,
-                                                bdp->cbd_bufaddr,
+                                                fec32_to_cpu(bdp->cbd_bufaddr),
                                                 FEC_ENET_RX_FRSIZE - fep->rx_align,
                                                 DMA_FROM_DEVICE);
                                dev_kfree_skb(skb);
@@ -2777,11 +2784,11 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
                }
 
                rxq->rx_skbuff[i] = skb;
-               bdp->cbd_sc = BD_ENET_RX_EMPTY;
+               bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-                       ebdp->cbd_esc = BD_ENET_RX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
                }
 
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2789,7 +2796,7 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
 
        /* Set the last buffer to wrap. */
        bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-       bdp->cbd_sc |= BD_SC_WRAP;
+       bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
        return 0;
 
  err_alloc:
@@ -2812,12 +2819,12 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue)
                if (!txq->tx_bounce[i])
                        goto err_alloc;
 
-               bdp->cbd_sc = 0;
-               bdp->cbd_bufaddr = 0;
+               bdp->cbd_sc = cpu_to_fec16(0);
+               bdp->cbd_bufaddr = cpu_to_fec32(0);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-                       ebdp->cbd_esc = BD_ENET_TX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_TX_INT);
                }
 
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2825,7 +2832,7 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue)
 
        /* Set the last buffer to wrap. */
        bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-       bdp->cbd_sc |= BD_SC_WRAP;
+       bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
        return 0;
 
index 52e0091b4fb261f983ab0e043d1cb0d48638dc03..1ba359f17ec6e2103d8b1b126ec84a55d30f006e 100644 (file)
@@ -552,7 +552,7 @@ static void tx_restart(struct net_device *dev)
        cbd_t __iomem *prev_bd;
        cbd_t __iomem *last_tx_bd;
 
-       last_tx_bd = fep->tx_bd_base + ((fpi->tx_ring - 1) * sizeof(cbd_t));
+       last_tx_bd = fep->tx_bd_base + (fpi->tx_ring - 1);
 
        /* get the current bd held in TBPTR  and scan back from this point */
        recheck_bd = curr_tbptr = (cbd_t __iomem *)
index a7139f588ad205e0a9441ce8e7d62a2513e4d814..678f5018d0be1c410c809fd8031a9f6e6601210e 100644 (file)
@@ -469,8 +469,8 @@ static int fmvj18x_config(struct pcmcia_device *link)
                    goto failed;
            }
            /* Read MACID from CIS */
-           for (i = 5; i < 11; i++)
-                   dev->dev_addr[i] = buf[i];
+           for (i = 0; i < 6; i++)
+                   dev->dev_addr[i] = buf[i + 5];
            kfree(buf);
        } else {
            if (pcmcia_get_mac_from_cis(link, dev))
index b3645297477e53309918e6762c44a6bfde031880..3bfe36f9405b60b4822185853ca3d3d35ea563a5 100644 (file)
@@ -95,21 +95,17 @@ static struct hnae_buf_ops hnae_bops = {
 static int __ae_match(struct device *dev, const void *data)
 {
        struct hnae_ae_dev *hdev = cls_to_ae_dev(dev);
-       const char *ae_id = data;
 
-       if (!strncmp(ae_id, hdev->name, AE_NAME_SIZE))
-               return 1;
-
-       return 0;
+       return hdev->dev->of_node == data;
 }
 
-static struct hnae_ae_dev *find_ae(const char *ae_id)
+static struct hnae_ae_dev *find_ae(const struct device_node *ae_node)
 {
        struct device *dev;
 
-       WARN_ON(!ae_id);
+       WARN_ON(!ae_node);
 
-       dev = class_find_device(hnae_class, NULL, ae_id, __ae_match);
+       dev = class_find_device(hnae_class, NULL, ae_node, __ae_match);
 
        return dev ? cls_to_ae_dev(dev) : NULL;
 }
@@ -316,7 +312,8 @@ EXPORT_SYMBOL(hnae_reinit_handle);
  * return handle ptr or ERR_PTR
  */
 struct hnae_handle *hnae_get_handle(struct device *owner_dev,
-                                   const char *ae_id, u32 port_id,
+                                   const struct device_node *ae_node,
+                                   u32 port_id,
                                    struct hnae_buf_ops *bops)
 {
        struct hnae_ae_dev *dev;
@@ -324,7 +321,7 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev,
        int i, j;
        int ret;
 
-       dev = find_ae(ae_id);
+       dev = find_ae(ae_node);
        if (!dev)
                return ERR_PTR(-ENODEV);
 
index 6ca94dc3dda3c2447d59e369fee4eadeba6e0f52..1cbcb9fa3fb56b5ea4fee252f3f35294e6e7b6d3 100644 (file)
@@ -524,8 +524,11 @@ struct hnae_handle {
 
 #define ring_to_dev(ring) ((ring)->q->dev->dev)
 
-struct hnae_handle *hnae_get_handle(struct device *owner_dev, const char *ae_id,
-                                   u32 port_id, struct hnae_buf_ops *bops);
+struct hnae_handle *hnae_get_handle(struct device *owner_dev,
+                                   const struct device_node *ae_node,
+                                   u32 port_id,
+                                   struct hnae_buf_ops *bops);
+
 void hnae_put_handle(struct hnae_handle *handle);
 int hnae_ae_register(struct hnae_ae_dev *dev, struct module *owner);
 void hnae_ae_unregister(struct hnae_ae_dev *dev);
index 522b264866b42e68b65e40cd93e28f991a3e678c..a0070d0e740dae5b3de8f2b2a92a772854b08321 100644 (file)
@@ -847,6 +847,7 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
 {
        struct hnae_ae_dev *ae_dev = &dsaf_dev->ae_dev;
+       static atomic_t id = ATOMIC_INIT(-1);
 
        switch (dsaf_dev->dsaf_ver) {
        case AE_VERSION_1:
@@ -858,6 +859,9 @@ int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
        default:
                break;
        }
+
+       snprintf(ae_dev->name, AE_NAME_SIZE, "%s%d", DSAF_DEVICE_NAME,
+                (int)atomic_inc_return(&id));
        ae_dev->ops = &hns_dsaf_ops;
        ae_dev->dev = dsaf_dev->dev;
 
index 1c33bd06bd5cc484ba320e60ceb5bee113493014..9439f04962e1d96bf480d8aedf6472c912e28813 100644 (file)
@@ -35,7 +35,7 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
        int ret, i;
        u32 desc_num;
        u32 buf_size;
-       const char *name, *mode_str;
+       const char *mode_str;
        struct device_node *np = dsaf_dev->dev->of_node;
 
        if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1"))
@@ -43,14 +43,6 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
        else
                dsaf_dev->dsaf_ver = AE_VERSION_2;
 
-       ret = of_property_read_string(np, "dsa_name", &name);
-       if (ret) {
-               dev_err(dsaf_dev->dev, "get dsaf name fail, ret=%d!\n", ret);
-               return ret;
-       }
-       strncpy(dsaf_dev->ae_dev.name, name, AE_NAME_SIZE);
-       dsaf_dev->ae_dev.name[AE_NAME_SIZE - 1] = '\0';
-
        ret = of_property_read_string(np, "mode", &mode_str);
        if (ret) {
                dev_err(dsaf_dev->dev, "get dsaf mode fail, ret=%d!\n", ret);
index 31c312f9826e9d8bc644b0aa1c6d6852ea25e5bd..40205b910f80e66a439c14b53e7f2c907b0fbeb1 100644 (file)
@@ -18,6 +18,7 @@ struct hns_mac_cb;
 
 #define DSAF_DRV_NAME "hns_dsaf"
 #define DSAF_MOD_VERSION "v1.0"
+#define DSAF_DEVICE_NAME "dsaf"
 
 #define HNS_DSAF_DEBUG_NW_REG_OFFSET 0x100000
 
index 0e30846a24f89b632028211f60ee82018432a7e0..3f77ff77abbc4594f8aeb0ccc40c6691fc122fac 100644 (file)
@@ -1802,7 +1802,7 @@ static int hns_nic_try_get_ae(struct net_device *ndev)
        int ret;
 
        h = hnae_get_handle(&priv->netdev->dev,
-                           priv->ae_name, priv->port_id, NULL);
+                           priv->ae_node, priv->port_id, NULL);
        if (IS_ERR_OR_NULL(h)) {
                ret = PTR_ERR(h);
                dev_dbg(priv->dev, "has not handle, register notifier!\n");
@@ -1880,13 +1880,16 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
        else
                priv->enet_ver = AE_VERSION_2;
 
-       ret = of_property_read_string(node, "ae-name", &priv->ae_name);
-       if (ret)
-               goto out_read_string_fail;
+       priv->ae_node = (void *)of_parse_phandle(node, "ae-handle", 0);
+       if (IS_ERR_OR_NULL(priv->ae_node)) {
+               ret = PTR_ERR(priv->ae_node);
+               dev_err(dev, "not find ae-handle\n");
+               goto out_read_prop_fail;
+       }
 
        ret = of_property_read_u32(node, "port-id", &priv->port_id);
        if (ret)
-               goto out_read_string_fail;
+               goto out_read_prop_fail;
 
        hns_init_mac_addr(ndev);
 
@@ -1945,7 +1948,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 
 out_notify_fail:
        (void)cancel_work_sync(&priv->service_task);
-out_read_string_fail:
+out_read_prop_fail:
        free_netdev(ndev);
        return ret;
 }
index 4b75270f014e765ba1cc02010c4175cafddda84c..c68ab3d34fc224ef00f43532f929fbd533b2ee80 100644 (file)
@@ -51,7 +51,7 @@ struct hns_nic_ops {
 };
 
 struct hns_nic_priv {
-       const char *ae_name;
+       const struct device_node *ae_node;
        u32 enet_ver;
        u32 port_id;
        int phy_mode;
index 1d5c3e16d8f4f4ad1f3ba580ee3a4a9c48663551..3daf2d4a7ca057a66e9ad797baa581c0d92caeba 100644 (file)
@@ -194,7 +194,6 @@ static const char *hp100_isa_tbl[] = {
 };
 #endif
 
-#ifdef CONFIG_EISA
 static struct eisa_device_id hp100_eisa_tbl[] = {
        { "HWPF180" }, /* HP J2577 rev A */
        { "HWP1920" }, /* HP 27248B */
@@ -205,9 +204,7 @@ static struct eisa_device_id hp100_eisa_tbl[] = {
        { "" }         /* Mandatory final entry ! */
 };
 MODULE_DEVICE_TABLE(eisa, hp100_eisa_tbl);
-#endif
 
-#ifdef CONFIG_PCI
 static const struct pci_device_id hp100_pci_tbl[] = {
        {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585A, PCI_ANY_ID, PCI_ANY_ID,},
        {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585B, PCI_ANY_ID, PCI_ANY_ID,},
@@ -219,7 +216,6 @@ static const struct pci_device_id hp100_pci_tbl[] = {
        {}                      /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(pci, hp100_pci_tbl);
-#endif
 
 static int hp100_rx_ratio = HP100_DEFAULT_RX_RATIO;
 static int hp100_priority_tx = HP100_DEFAULT_PRIORITY_TX;
@@ -2842,7 +2838,6 @@ static void cleanup_dev(struct net_device *d)
        free_netdev(d);
 }
 
-#ifdef CONFIG_EISA
 static int hp100_eisa_probe(struct device *gendev)
 {
        struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
@@ -2884,9 +2879,7 @@ static struct eisa_driver hp100_eisa_driver = {
                .remove  = hp100_eisa_remove,
         }
 };
-#endif
 
-#ifdef CONFIG_PCI
 static int hp100_pci_probe(struct pci_dev *pdev,
                           const struct pci_device_id *ent)
 {
@@ -2955,7 +2948,6 @@ static struct pci_driver hp100_pci_driver = {
        .probe          = hp100_pci_probe,
        .remove         = hp100_pci_remove,
 };
-#endif
 
 /*
  *  module section
@@ -3032,23 +3024,17 @@ static int __init hp100_module_init(void)
        err = hp100_isa_init();
        if (err && err != -ENODEV)
                goto out;
-#ifdef CONFIG_EISA
        err = eisa_driver_register(&hp100_eisa_driver);
        if (err && err != -ENODEV)
                goto out2;
-#endif
-#ifdef CONFIG_PCI
        err = pci_register_driver(&hp100_pci_driver);
        if (err && err != -ENODEV)
                goto out3;
-#endif
  out:
        return err;
  out3:
-#ifdef CONFIG_EISA
        eisa_driver_unregister (&hp100_eisa_driver);
  out2:
-#endif
        hp100_isa_cleanup();
        goto out;
 }
@@ -3057,12 +3043,8 @@ static int __init hp100_module_init(void)
 static void __exit hp100_module_exit(void)
 {
        hp100_isa_cleanup();
-#ifdef CONFIG_EISA
        eisa_driver_unregister (&hp100_eisa_driver);
-#endif
-#ifdef CONFIG_PCI
        pci_unregister_driver (&hp100_pci_driver);
-#endif
 }
 
 module_init(hp100_module_init)
index bb4612c159fd197db5b2a750027400b48cbdec77..8f3b53e0dc46c28965d00ea994940949404361be 100644 (file)
@@ -7117,9 +7117,7 @@ static void i40e_service_task(struct work_struct *work)
        i40e_watchdog_subtask(pf);
        i40e_fdir_reinit_subtask(pf);
        i40e_sync_filters_subtask(pf);
-#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
        i40e_sync_udp_filters_subtask(pf);
-#endif
        i40e_clean_adminq_subtask(pf);
 
        i40e_service_event_complete(pf);
@@ -8515,6 +8513,8 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 }
 
 #endif
+
+#if IS_ENABLED(CONFIG_VXLAN)
 /**
  * i40e_add_vxlan_port - Get notifications about VXLAN ports that come up
  * @netdev: This physical port's netdev
@@ -8524,7 +8524,6 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 static void i40e_add_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8557,7 +8556,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
        pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
        pf->pending_udp_bitmap |= BIT_ULL(next_idx);
        pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
-#endif
 }
 
 /**
@@ -8569,7 +8567,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
 static void i40e_del_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8592,9 +8589,10 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
                netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
                            ntohs(port));
        }
-#endif
 }
+#endif
 
+#if IS_ENABLED(CONFIG_GENEVE)
 /**
  * i40e_add_geneve_port - Get notifications about GENEVE ports that come up
  * @netdev: This physical port's netdev
@@ -8604,7 +8602,6 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
 static void i40e_add_geneve_port(struct net_device *netdev,
                                 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8639,7 +8636,6 @@ static void i40e_add_geneve_port(struct net_device *netdev,
        pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
 
        dev_info(&pf->pdev->dev, "adding geneve port %d\n", ntohs(port));
-#endif
 }
 
 /**
@@ -8651,7 +8647,6 @@ static void i40e_add_geneve_port(struct net_device *netdev,
 static void i40e_del_geneve_port(struct net_device *netdev,
                                 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8677,8 +8672,8 @@ static void i40e_del_geneve_port(struct net_device *netdev,
                netdev_warn(netdev, "geneve port %d was not found, not deleting\n",
                            ntohs(port));
        }
-#endif
 }
+#endif
 
 static int i40e_get_phys_port_id(struct net_device *netdev,
                                 struct netdev_phys_item_id *ppid)
index 720516b0e8eec59debf3b41195f9a2617eef169b..47bd8b3145a79a9bf2e299fc303100be4bf8289c 100644 (file)
@@ -2313,8 +2313,8 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
        struct iphdr *this_ip_hdr;
        u32 network_hdr_len;
        u8 l4_hdr = 0;
-       struct udphdr *oudph;
-       struct iphdr *oiph;
+       struct udphdr *oudph = NULL;
+       struct iphdr *oiph = NULL;
        u32 l4_tunnel = 0;
 
        if (skb->encapsulation) {
index a0c03834a2f7b3d7494b864db6e96ee590b36c81..55831188bc32431e935550a7da5671fa1d24d89a 100644 (file)
@@ -762,10 +762,10 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
 
        if (length <= 8 && (uintptr_t)data & 0x7) {
                /* Copy unaligned small data fragment to TSO header data area */
-               memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+               memcpy(txq->tso_hdrs + tx_index * TSO_HEADER_SIZE,
                       data, length);
                desc->buf_ptr = txq->tso_hdrs_dma
-                       + txq->tx_curr_desc * TSO_HEADER_SIZE;
+                       + tx_index * TSO_HEADER_SIZE;
        } else {
                /* Alignment is okay, map buffer and hand off to hardware */
                txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
index fabc8df40392572d607f3e3417e60b54a67b4bf3..b0ae69f8449369585dfa82de3243f401f8a1022a 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
 #include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
-#include <linux/mbus.h>
-#include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
 #include <linux/io.h>
-#include <net/tso.h>
+#include <linux/kernel.h>
+#include <linux/mbus.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/of_address.h>
 #include <linux/phy.h>
-#include <linux/clk.h>
-#include <linux/cpu.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/tso.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
@@ -370,9 +370,16 @@ struct mvneta_port {
        struct net_device *dev;
        struct notifier_block cpu_notifier;
        int rxq_def;
+       /* Protect the access to the percpu interrupt registers,
+        * ensuring that the configuration remains coherent.
+        */
+       spinlock_t lock;
+       bool is_stopped;
 
        /* Core clock */
        struct clk *clk;
+       /* AXI clock */
+       struct clk *clk_bus;
        u8 mcast_count[256];
        u16 tx_ring_size;
        u16 rx_ring_size;
@@ -1036,6 +1043,43 @@ static void mvneta_set_autoneg(struct mvneta_port *pp, int enable)
        }
 }
 
+static void mvneta_percpu_unmask_interrupt(void *arg)
+{
+       struct mvneta_port *pp = arg;
+
+       /* All the queue are unmasked, but actually only the ones
+        * mapped to this CPU will be unmasked
+        */
+       mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+                   MVNETA_RX_INTR_MASK_ALL |
+                   MVNETA_TX_INTR_MASK_ALL |
+                   MVNETA_MISCINTR_INTR_MASK);
+}
+
+static void mvneta_percpu_mask_interrupt(void *arg)
+{
+       struct mvneta_port *pp = arg;
+
+       /* All the queue are masked, but actually only the ones
+        * mapped to this CPU will be masked
+        */
+       mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+       mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+       mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+}
+
+static void mvneta_percpu_clear_intr_cause(void *arg)
+{
+       struct mvneta_port *pp = arg;
+
+       /* All the queue are cleared, but actually only the ones
+        * mapped to this CPU will be cleared
+        */
+       mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
+       mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+       mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+}
+
 /* This method sets defaults to the NETA port:
  *     Clears interrupt Cause and Mask registers.
  *     Clears all MAC tables.
@@ -1053,14 +1097,10 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
        int max_cpu = num_present_cpus();
 
        /* Clear all Cause registers */
-       mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
-       mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
-       mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+       on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
        /* Mask all interrupts */
-       mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-       mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-       mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+       on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
        mvreg_write(pp, MVNETA_INTR_ENABLE, 0);
 
        /* Enable MBUS Retry bit16 */
@@ -2526,34 +2566,9 @@ static int mvneta_setup_txqs(struct mvneta_port *pp)
        return 0;
 }
 
-static void mvneta_percpu_unmask_interrupt(void *arg)
-{
-       struct mvneta_port *pp = arg;
-
-       /* All the queue are unmasked, but actually only the ones
-        * maped to this CPU will be unmasked
-        */
-       mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-                   MVNETA_RX_INTR_MASK_ALL |
-                   MVNETA_TX_INTR_MASK_ALL |
-                   MVNETA_MISCINTR_INTR_MASK);
-}
-
-static void mvneta_percpu_mask_interrupt(void *arg)
-{
-       struct mvneta_port *pp = arg;
-
-       /* All the queue are masked, but actually only the ones
-        * maped to this CPU will be masked
-        */
-       mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-       mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-       mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
-}
-
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
-       unsigned int cpu;
+       int cpu;
 
        mvneta_max_rx_size_set(pp, pp->pkt_size);
        mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
@@ -2562,16 +2577,15 @@ static void mvneta_start_dev(struct mvneta_port *pp)
        mvneta_port_enable(pp);
 
        /* Enable polling on the port */
-       for_each_present_cpu(cpu) {
+       for_each_online_cpu(cpu) {
                struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
                napi_enable(&port->napi);
        }
 
        /* Unmask interrupts. It has to be done from each CPU */
-       for_each_online_cpu(cpu)
-               smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
-                                        pp, true);
+       on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+
        mvreg_write(pp, MVNETA_INTR_MISC_MASK,
                    MVNETA_CAUSE_PHY_STATUS_CHANGE |
                    MVNETA_CAUSE_LINK_CHANGE |
@@ -2587,7 +2601,7 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 
        phy_stop(pp->phy_dev);
 
-       for_each_present_cpu(cpu) {
+       for_each_online_cpu(cpu) {
                struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
                napi_disable(&port->napi);
@@ -2602,13 +2616,10 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
        mvneta_port_disable(pp);
 
        /* Clear all ethernet port interrupts */
-       mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
-       mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+       on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
        /* Mask all ethernet port interrupts */
-       mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-       mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-       mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+       on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
        mvneta_tx_reset(pp);
        mvneta_rx_reset(pp);
@@ -2845,11 +2856,20 @@ static void mvneta_percpu_disable(void *arg)
        disable_percpu_irq(pp->dev->irq);
 }
 
+/* Electing a CPU must be done in an atomic way: it should be done
+ * after or before the removal/insertion of a CPU and this function is
+ * not reentrant.
+ */
 static void mvneta_percpu_elect(struct mvneta_port *pp)
 {
-       int online_cpu_idx, max_cpu, cpu, i = 0;
+       int elected_cpu = 0, max_cpu, cpu, i = 0;
+
+       /* Use the cpu associated to the rxq when it is online, in all
+        * the other cases, use the cpu 0 which can't be offline.
+        */
+       if (cpu_online(pp->rxq_def))
+               elected_cpu = pp->rxq_def;
 
-       online_cpu_idx = pp->rxq_def % num_online_cpus();
        max_cpu = num_present_cpus();
 
        for_each_online_cpu(cpu) {
@@ -2860,7 +2880,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
                        if ((rxq % max_cpu) == cpu)
                                rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
 
-               if (i == online_cpu_idx)
+               if (cpu == elected_cpu)
                        /* Map the default receive queue queue to the
                         * elected CPU
                         */
@@ -2871,7 +2891,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
                 * the CPU bound to the default RX queue
                 */
                if (txq_number == 1)
-                       txq_map = (i == online_cpu_idx) ?
+                       txq_map = (cpu == elected_cpu) ?
                                MVNETA_CPU_TXQ_ACCESS(1) : 0;
                else
                        txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
@@ -2900,6 +2920,14 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb,
        switch (action) {
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
+               spin_lock(&pp->lock);
+               /* Configuring the driver for a new CPU while the
+                * driver is stopping is racy, so just avoid it.
+                */
+               if (pp->is_stopped) {
+                       spin_unlock(&pp->lock);
+                       break;
+               }
                netif_tx_stop_all_queues(pp->dev);
 
                /* We have to synchronise on tha napi of each CPU
@@ -2915,9 +2943,7 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb,
                }
 
                /* Mask all ethernet port interrupts */
-               mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-               mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-               mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+               on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
                napi_enable(&port->napi);
 
 
@@ -2932,27 +2958,25 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb,
                 */
                mvneta_percpu_elect(pp);
 
-               /* Unmask all ethernet port interrupts, as this
-                * notifier is called for each CPU then the CPU to
-                * Queue mapping is applied
-                */
-               mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-                       MVNETA_RX_INTR_MASK(rxq_number) |
-                       MVNETA_TX_INTR_MASK(txq_number) |
-                       MVNETA_MISCINTR_INTR_MASK);
+               /* Unmask all ethernet port interrupts */
+               on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
                mvreg_write(pp, MVNETA_INTR_MISC_MASK,
                        MVNETA_CAUSE_PHY_STATUS_CHANGE |
                        MVNETA_CAUSE_LINK_CHANGE |
                        MVNETA_CAUSE_PSC_SYNC_CHANGE);
                netif_tx_start_all_queues(pp->dev);
+               spin_unlock(&pp->lock);
                break;
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
                netif_tx_stop_all_queues(pp->dev);
+               /* Thanks to this lock we are sure that any pending
+                * cpu election is done
+                */
+               spin_lock(&pp->lock);
                /* Mask all ethernet port interrupts */
-               mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-               mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-               mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+               on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+               spin_unlock(&pp->lock);
 
                napi_synchronize(&port->napi);
                napi_disable(&port->napi);
@@ -2966,12 +2990,11 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb,
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
                /* Check if a new CPU must be elected now this on is down */
+               spin_lock(&pp->lock);
                mvneta_percpu_elect(pp);
+               spin_unlock(&pp->lock);
                /* Unmask all ethernet port interrupts */
-               mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-                       MVNETA_RX_INTR_MASK(rxq_number) |
-                       MVNETA_TX_INTR_MASK(txq_number) |
-                       MVNETA_MISCINTR_INTR_MASK);
+               on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
                mvreg_write(pp, MVNETA_INTR_MISC_MASK,
                        MVNETA_CAUSE_PHY_STATUS_CHANGE |
                        MVNETA_CAUSE_LINK_CHANGE |
@@ -2986,7 +3009,7 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb,
 static int mvneta_open(struct net_device *dev)
 {
        struct mvneta_port *pp = netdev_priv(dev);
-       int ret, cpu;
+       int ret;
 
        pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
        pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
@@ -3008,22 +3031,12 @@ static int mvneta_open(struct net_device *dev)
                goto err_cleanup_txqs;
        }
 
-       /* Even though the documentation says that request_percpu_irq
-        * doesn't enable the interrupts automatically, it actually
-        * does so on the local CPU.
-        *
-        * Make sure it's disabled.
-        */
-       mvneta_percpu_disable(pp);
-
        /* Enable per-CPU interrupt on all the CPU to handle our RX
         * queue interrupts
         */
-       for_each_online_cpu(cpu)
-               smp_call_function_single(cpu, mvneta_percpu_enable,
-                                        pp, true);
-
+       on_each_cpu(mvneta_percpu_enable, pp, true);
 
+       pp->is_stopped = false;
        /* Register a CPU notifier to handle the case where our CPU
         * might be taken offline.
         */
@@ -3055,13 +3068,20 @@ err_cleanup_rxqs:
 static int mvneta_stop(struct net_device *dev)
 {
        struct mvneta_port *pp = netdev_priv(dev);
-       int cpu;
 
+       /* Inform that we are stopping so we don't want to setup the
+        * driver for new CPUs in the notifiers
+        */
+       spin_lock(&pp->lock);
+       pp->is_stopped = true;
        mvneta_stop_dev(pp);
        mvneta_mdio_remove(pp);
        unregister_cpu_notifier(&pp->cpu_notifier);
-       for_each_present_cpu(cpu)
-               smp_call_function_single(cpu, mvneta_percpu_disable, pp, true);
+       /* Now that the notifier are unregistered, we can release le
+        * lock
+        */
+       spin_unlock(&pp->lock);
+       on_each_cpu(mvneta_percpu_disable, pp, true);
        free_percpu_irq(dev->irq, pp->ports);
        mvneta_cleanup_rxqs(pp);
        mvneta_cleanup_txqs(pp);
@@ -3242,26 +3262,25 @@ static void mvneta_ethtool_update_stats(struct mvneta_port *pp)
        const struct mvneta_statistic *s;
        void __iomem *base = pp->base;
        u32 high, low, val;
+       u64 val64;
        int i;
 
        for (i = 0, s = mvneta_statistics;
             s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
             s++, i++) {
-               val = 0;
-
                switch (s->type) {
                case T_REG_32:
                        val = readl_relaxed(base + s->offset);
+                       pp->ethtool_stats[i] += val;
                        break;
                case T_REG_64:
                        /* Docs say to read low 32-bit then high */
                        low = readl_relaxed(base + s->offset);
                        high = readl_relaxed(base + s->offset + 4);
-                       val = (u64)high << 32 | low;
+                       val64 = (u64)high << 32 | low;
+                       pp->ethtool_stats[i] += val64;
                        break;
                }
-
-               pp->ethtool_stats[i] += val;
        }
 }
 
@@ -3311,9 +3330,7 @@ static int  mvneta_config_rss(struct mvneta_port *pp)
 
        netif_tx_stop_all_queues(pp->dev);
 
-       for_each_online_cpu(cpu)
-               smp_call_function_single(cpu, mvneta_percpu_mask_interrupt,
-                                        pp, true);
+       on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
        /* We have to synchronise on the napi of each CPU */
        for_each_online_cpu(cpu) {
@@ -3334,7 +3351,9 @@ static int  mvneta_config_rss(struct mvneta_port *pp)
        mvreg_write(pp, MVNETA_PORT_CONFIG, val);
 
        /* Update the elected CPU matching the new rxq_def */
+       spin_lock(&pp->lock);
        mvneta_percpu_elect(pp);
+       spin_unlock(&pp->lock);
 
        /* We have to synchronise on the napi of each CPU */
        for_each_online_cpu(cpu) {
@@ -3605,7 +3624,9 @@ static int mvneta_probe(struct platform_device *pdev)
 
        pp->indir[0] = rxq_def;
 
-       pp->clk = devm_clk_get(&pdev->dev, NULL);
+       pp->clk = devm_clk_get(&pdev->dev, "core");
+       if (IS_ERR(pp->clk))
+               pp->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pp->clk)) {
                err = PTR_ERR(pp->clk);
                goto err_put_phy_node;
@@ -3613,6 +3634,10 @@ static int mvneta_probe(struct platform_device *pdev)
 
        clk_prepare_enable(pp->clk);
 
+       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+       if (!IS_ERR(pp->clk_bus))
+               clk_prepare_enable(pp->clk_bus);
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        pp->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(pp->base)) {
@@ -3724,6 +3749,7 @@ err_free_stats:
 err_free_ports:
        free_percpu(pp->ports);
 err_clk:
+       clk_disable_unprepare(pp->clk_bus);
        clk_disable_unprepare(pp->clk);
 err_put_phy_node:
        of_node_put(phy_node);
@@ -3741,6 +3767,7 @@ static int mvneta_remove(struct platform_device *pdev)
        struct mvneta_port *pp = netdev_priv(dev);
 
        unregister_netdev(dev);
+       clk_disable_unprepare(pp->clk_bus);
        clk_disable_unprepare(pp->clk);
        free_percpu(pp->ports);
        free_percpu(pp->stats);
index a4beccf1fd46e26483dc796829d444a15f77723f..c797971aefabbd89ddd31e8defd7ff9f93508f0c 100644 (file)
@@ -3061,7 +3061,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
 
                pe = kzalloc(sizeof(*pe), GFP_KERNEL);
                if (!pe)
-                       return -1;
+                       return -ENOMEM;
                mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
                pe->index = tid;
 
@@ -3077,7 +3077,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
        if (pmap == 0) {
                if (add) {
                        kfree(pe);
-                       return -1;
+                       return -EINVAL;
                }
                mvpp2_prs_hw_inv(priv, pe->index);
                priv->prs_shadow[pe->index].valid = false;
index 715de8affcc950e0ea18fd706bc8f04542d34a6f..c7e939945259dc876b66cfedd0d85f9d7e90a914 100644 (file)
@@ -182,10 +182,17 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
                err = mlx4_reset_slave(dev);
        else
                err = mlx4_reset_master(dev);
-       BUG_ON(err != 0);
 
+       if (!err) {
+               mlx4_err(dev, "device was reset successfully\n");
+       } else {
+               /* EEH could have disabled the PCI channel during reset. That's
+                * recoverable and the PCI error flow will handle it.
+                */
+               if (!pci_channel_offline(dev->persist->pdev))
+                       BUG_ON(1);
+       }
        dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
-       mlx4_err(dev, "device was reset successfully\n");
        mutex_unlock(&persist->device_state_mutex);
 
        /* At that step HW was already reset, now notify clients */
index d48d5793407d9ec729fe8548a049a2506d02a934..e94ca1c3fc7c6a83a190a29d4116086dcc9de8ca 100644 (file)
@@ -2429,7 +2429,7 @@ err_thread:
        flush_workqueue(priv->mfunc.master.comm_wq);
        destroy_workqueue(priv->mfunc.master.comm_wq);
 err_slaves:
-       while (--i) {
+       while (i--) {
                for (port = 1; port <= MLX4_MAX_PORTS; port++)
                        kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
        }
index 3348e646db705f41ff1cb3923d4d1533aea80e2d..a849da92f857e5c22cd1ec93158cecbff9c75d90 100644 (file)
@@ -318,7 +318,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
        if (timestamp_en)
                cq_context->flags  |= cpu_to_be32(1 << 19);
 
-       cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
+       cq_context->logsize_usrpage =
+               cpu_to_be32((ilog2(nent) << 24) |
+                           mlx4_to_hw_uar_index(dev, uar->index));
        cq_context->comp_eqn        = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn;
        cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
index 038f9ce391e626f02d3fe2f51a19fbd4b08a2711..1494997c4f7e3bef36141ea439d812ca9bb1a335 100644 (file)
@@ -236,6 +236,24 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = {
        .enable         = mlx4_en_phc_enable,
 };
 
+#define MLX4_EN_WRAP_AROUND_SEC        10ULL
+
+/* This function calculates the max shift that enables the user range
+ * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register.
+ */
+static u32 freq_to_shift(u16 freq)
+{
+       u32 freq_khz = freq * 1000;
+       u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
+       u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
+               max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1;
+       /* calculate max possible multiplier in order to fit in 64bit */
+       u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
+
+       /* This comes from the reverse of clocksource_khz2mult */
+       return ilog2(div_u64(max_mul * freq_khz, 1000000));
+}
+
 void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
 {
        struct mlx4_dev *dev = mdev->dev;
@@ -254,12 +272,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
        memset(&mdev->cycles, 0, sizeof(mdev->cycles));
        mdev->cycles.read = mlx4_en_read_clock;
        mdev->cycles.mask = CLOCKSOURCE_MASK(48);
-       /* Using shift to make calculation more accurate. Since current HW
-        * clock frequency is 427 MHz, and cycles are given using a 48 bits
-        * register, the biggest shift when calculating using u64, is 14
-        * (max_cycles * multiplier < 2^64)
-        */
-       mdev->cycles.shift = 14;
+       mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock);
        mdev->cycles.mult =
                clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
        mdev->nominal_c_mult = mdev->cycles.mult;
index 0c7e3f69a73bb6f787b8387efd3d34624a188568..f191a16125893e5b742c5d99ea7bd814dba433a5 100644 (file)
@@ -2344,8 +2344,6 @@ out:
        /* set offloads */
        priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
                                      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
-       priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
-       priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
 }
 
 static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
@@ -2356,8 +2354,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
        /* unset offloads */
        priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
                                      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
-       priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
-       priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
 
        ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
                                  VXLAN_STEER_BY_OUTER_MAC, 0);
@@ -2980,6 +2976,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
                priv->rss_hash_fn = ETH_RSS_HASH_TOP;
        }
 
+       if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+               dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+               dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
+       }
+
        mdev->pndev[port] = dev;
        mdev->upper[port] = NULL;
 
index ee99e67187f5b1cc68fc224b91d5b2bf2d133b05..3904b5fc0b7c904548763ffbb2d31fbcccfe5b6c 100644 (file)
@@ -238,11 +238,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
        stats->collisions = 0;
        stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP);
        stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
-       stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+       stats->rx_over_errors = 0;
        stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
        stats->rx_frame_errors = 0;
        stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
-       stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+       stats->rx_missed_errors = 0;
        stats->tx_aborted_errors = 0;
        stats->tx_carrier_errors = 0;
        stats->tx_fifo_errors = 0;
index 12aab5a659d33e4c098a6b8bedf2d4014798e14c..02e925d6f7348d774fe70c4a91b5038d5a27ba35 100644 (file)
@@ -58,7 +58,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
        } else {
                context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
        }
-       context->usr_page = cpu_to_be32(mdev->priv_uar.index);
+       context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+                                       mdev->priv_uar.index));
        context->local_qpn = cpu_to_be32(qpn);
        context->pri_path.ackto = 1 & 0x07;
        context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
index 4421bf5463f67159618c3a4c572b4e9ebc04dd7b..e0946ab22010f4d1c28373b33e8a9b8d334c764b 100644 (file)
@@ -213,7 +213,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
        mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
                                ring->cqn, user_prio, &ring->context);
        if (ring->bf_alloced)
-               ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
+               ring->context.usr_page =
+                       cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+                                                        ring->bf.uar->index));
 
        err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
                               &ring->qp, &ring->qp_state);
index 4696053165f8ca192d5e8ccb21971d175d267ca7..f613977455e08340995baeb65763086e408479f3 100644 (file)
@@ -940,9 +940,10 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
 
        if (!priv->eq_table.uar_map[index]) {
                priv->eq_table.uar_map[index] =
-                       ioremap(pci_resource_start(dev->persist->pdev, 2) +
-                               ((eq->eqn / 4) << PAGE_SHIFT),
-                               PAGE_SIZE);
+                       ioremap(
+                               pci_resource_start(dev->persist->pdev, 2) +
+                               ((eq->eqn / 4) << (dev->uar_page_shift)),
+                               (1 << (dev->uar_page_shift)));
                if (!priv->eq_table.uar_map[index]) {
                        mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
                                 eq->eqn);
index 2c2baab9d88044d77d910653f882e9815fa57561..d66c690a859773d24db2cc835f8c365135023bf4 100644 (file)
@@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
                [29] = "802.1ad offload support",
                [31] = "Modifying loopback source checks using UPDATE_QP support",
                [32] = "Loopback source checks support",
+               [33] = "RoCEv2 support"
        };
        int i;
 
@@ -626,6 +627,8 @@ out:
        return err;
 }
 
+static void disable_unsupported_roce_caps(void *buf);
+
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
        struct mlx4_cmd_mailbox *mailbox;
@@ -738,6 +741,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        if (err)
                goto out;
 
+       if (mlx4_is_mfunc(dev))
+               disable_unsupported_roce_caps(outbox);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
        dev_cap->reserved_qps = 1 << (field & 0xf);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
@@ -905,6 +910,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
        MLX4_GET(dev_cap->bmme_flags, outbox,
                 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+       if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
        if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
@@ -1161,6 +1168,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
        if (err)
                return err;
 
+       disable_unsupported_roce_caps(outbox->buf);
        /* add port mng change event capability and disable mw type 1
         * unconditionally to slaves
         */
@@ -1258,6 +1266,21 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
        return 0;
 }
 
+static void disable_unsupported_roce_caps(void *buf)
+{
+       u32 flags;
+
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+       flags &= ~(1UL << 31);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+       flags &= ~(1UL << 24);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+       flags &= ~(MLX4_FLAG_ROCE_V1_V2);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+}
+
 int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
                            struct mlx4_vhcr *vhcr,
                            struct mlx4_cmd_mailbox *inbox,
@@ -2239,7 +2262,8 @@ struct mlx4_config_dev {
        __be32  rsvd1[3];
        __be16  vxlan_udp_dport;
        __be16  rsvd2;
-       __be32  rsvd3;
+       __be16  roce_v2_entropy;
+       __be16  roce_v2_udp_dport;
        __be32  roce_flags;
        __be32  rsvd4[25];
        __be16  rsvd5;
@@ -2248,6 +2272,7 @@ struct mlx4_config_dev {
 };
 
 #define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
 #define MLX4_DISABLE_RX_PORT BIT(18)
 
 static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
@@ -2365,6 +2390,18 @@ int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
        return mlx4_CONFIG_DEV_set(dev, &config_dev);
 }
 
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
+{
+       struct mlx4_config_dev config_dev;
+
+       memset(&config_dev, 0, sizeof(config_dev));
+       config_dev.update_flags    = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
+       config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
+
+       return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
+
 int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
 {
        struct mlx4_cmd_mailbox *mailbox;
index f1b6d219e445431c0dfe4f255b8abe9d3ec5f9b4..2cc3c626c3fea732d0862ff5f0ce520d442a6205 100644 (file)
@@ -168,6 +168,20 @@ struct mlx4_port_config {
 
 static atomic_t pf_loading = ATOMIC_INIT(0);
 
+static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
+                                             struct mlx4_dev_cap *dev_cap)
+{
+       /* The reserved_uars is calculated by system page size unit.
+        * Therefore, adjustment is added when the uar page size is less
+        * than the system page size
+        */
+       dev->caps.reserved_uars =
+               max_t(int,
+                     mlx4_get_num_reserved_uar(dev),
+                     dev_cap->reserved_uars /
+                       (1 << (PAGE_SHIFT - dev->uar_page_shift)));
+}
+
 int mlx4_check_port_params(struct mlx4_dev *dev,
                           enum mlx4_port_type *port_type)
 {
@@ -386,8 +400,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
        dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
 
-       /* The first 128 UARs are used for EQ doorbells */
-       dev->caps.reserved_uars      = max_t(int, 128, dev_cap->reserved_uars);
        dev->caps.reserved_pds       = dev_cap->reserved_pds;
        dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
                                        dev_cap->reserved_xrcds : 0;
@@ -405,6 +417,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
        dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 
+       /* Save uar page shift */
+       if (!mlx4_is_slave(dev)) {
+               /* Virtual PCI function needs to determine UAR page size from
+                * firmware. Only master PCI function can set the uar page size
+                */
+               dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
+               mlx4_set_num_reserved_uars(dev, dev_cap);
+       }
+
        if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
                struct mlx4_init_hca_param hca_param;
 
@@ -815,16 +836,25 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
                return -ENODEV;
        }
 
-       /* slave gets uar page size from QUERY_HCA fw command */
-       dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+       /* Set uar_page_shift for VF */
+       dev->uar_page_shift = hca_param.uar_page_sz + 12;
 
-       /* TODO: relax this assumption */
-       if (dev->caps.uar_page_size != PAGE_SIZE) {
-               mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
-                        dev->caps.uar_page_size, PAGE_SIZE);
-               return -ENODEV;
+       /* Make sure the master uar page size is valid */
+       if (dev->uar_page_shift > PAGE_SHIFT) {
+               mlx4_err(dev,
+                        "Invalid configuration: uar page size is larger than system page size\n");
+               return  -ENODEV;
        }
 
+       /* Set reserved_uars based on the uar_page_shift */
+       mlx4_set_num_reserved_uars(dev, &dev_cap);
+
+       /* Although uar page size in FW differs from system page size,
+        * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
+        * still works with assumption that uar page size == system page size
+        */
+       dev->caps.uar_page_size = PAGE_SIZE;
+
        memset(&func_cap, 0, sizeof(func_cap));
        err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
        if (err) {
@@ -2179,8 +2209,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 
                dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
 
-               init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
-               init_hca.uar_page_sz = PAGE_SHIFT - 12;
+               /* Always set UAR page size 4KB, set log_uar_sz accordingly */
+               init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
+                                     PAGE_SHIFT -
+                                     DEFAULT_UAR_PAGE_SHIFT;
+               init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
+
                init_hca.mw_enabled = 0;
                if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
                    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
index 2404c22ad2b2ac5d890798dc5bfe7ae608232e2c..7baef52db6b7586cf41df128c8ddecc581bc1568 100644 (file)
@@ -780,7 +780,10 @@ struct mlx4_set_port_general_context {
        u16 reserved1;
        u8 v_ignore_fcs;
        u8 flags;
-       u8 ignore_fcs;
+       union {
+               u8 ignore_fcs;
+               u8 roce_mode;
+       };
        u8 reserved2;
        __be16 mtu;
        u8 pptx;
index 609c59dc854e987a073e32cb4ff2c08736998edb..b3cc3ab63799462709a569cff08f6e4c5de59f43 100644 (file)
@@ -269,9 +269,15 @@ EXPORT_SYMBOL_GPL(mlx4_bf_free);
 
 int mlx4_init_uar_table(struct mlx4_dev *dev)
 {
-       if (dev->caps.num_uars <= 128) {
-               mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
-                        dev->caps.num_uars);
+       int num_reserved_uar = mlx4_get_num_reserved_uar(dev);
+
+       mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift);
+       mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars);
+
+       if (dev->caps.num_uars <= num_reserved_uar) {
+               mlx4_err(
+                       dev, "Only %d UAR pages (need more than %d)\n",
+                       dev->caps.num_uars, num_reserved_uar);
                mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
                return -ENODEV;
        }
index f2550425c25147e0881fdabfb20d8514cb37a088..787b7bb54d52acd094570283bf6793f1bfb0dab0 100644 (file)
@@ -1520,6 +1520,8 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
        return err;
 }
 
+#define SET_PORT_ROCE_2_FLAGS          0x10
+#define MLX4_SET_PORT_ROCE_V1_V2       0x2
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
                          u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
 {
@@ -1539,6 +1541,11 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
        context->pprx = (pprx * (!pfcrx)) << 7;
        context->pfcrx = pfcrx;
 
+       if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+               context->flags |= SET_PORT_ROCE_2_FLAGS;
+               context->roce_mode |=
+                       MLX4_SET_PORT_ROCE_V1_V2 << 4;
+       }
        in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
        err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
                       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
index 168823dde79f3dd48596bdad5a4ea72a95ed0404..d1cd9c32a9ae83912e9b3b552c5680768b8449c3 100644 (file)
@@ -167,6 +167,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
                context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
        }
 
+       if ((cur_state == MLX4_QP_STATE_RTR) &&
+           (new_state == MLX4_QP_STATE_RTS) &&
+           dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+               context->roce_entropy =
+                       cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
+
        *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
        memcpy(mailbox->buf + 8, context, sizeof *context);
 
@@ -921,3 +927,23 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
        return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
+{
+       struct mlx4_qp_context context;
+       struct mlx4_qp qp;
+       int err;
+
+       qp.qpn = qpn;
+       err = mlx4_qp_query(dev, &qp, &context);
+       if (!err) {
+               u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
+               u16 folded_dst = folded_qp(dest_qpn);
+               u16 folded_src = folded_qp(qpn);
+
+               return (dest_qpn != qpn) ?
+                       ((folded_dst ^ folded_src) | 0xC000) :
+                       folded_src | 0xC000;
+       }
+       return 0xdead;
+}
index b46dbe29ef6c8ec72db19c6e8fd841f5cb806a1b..25ce1b030a00f28cb3ea96833d368d7380aaff44 100644 (file)
@@ -915,11 +915,13 @@ static int handle_existing_counter(struct mlx4_dev *dev, u8 slave, int port,
 
        spin_lock_irq(mlx4_tlock(dev));
        r = find_res(dev, counter_index, RES_COUNTER);
-       if (!r || r->owner != slave)
+       if (!r || r->owner != slave) {
                ret = -EINVAL;
-       counter = container_of(r, struct res_counter, com);
-       if (!counter->port)
-               counter->port = port;
+       } else {
+               counter = container_of(r, struct res_counter, com);
+               if (!counter->port)
+                       counter->port = port;
+       }
 
        spin_unlock_irq(mlx4_tlock(dev));
        return ret;
index 9ea49a8933231808fff014f5a4570b7e380f5b44..aac071a7e830b5fd777da7a5e4d014d802ad70d0 100644 (file)
@@ -39,8 +39,8 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
 #include "wq.h"
-#include "transobj.h"
 #include "mlx5_core.h"
 
 #define MLX5E_MAX_NUM_TC       8
index c56d91a2812b04bf0857be048a7fb11fc8b2c1d0..d4e1c30452009718d9761a8a4949f5195d7f4c59 100644 (file)
@@ -2024,18 +2024,37 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
                                            vf_stats);
 }
 
-static struct net_device_ops mlx5e_netdev_ops = {
+static const struct net_device_ops mlx5e_netdev_ops_basic = {
        .ndo_open                = mlx5e_open,
        .ndo_stop                = mlx5e_close,
        .ndo_start_xmit          = mlx5e_xmit,
        .ndo_get_stats64         = mlx5e_get_stats,
        .ndo_set_rx_mode         = mlx5e_set_rx_mode,
        .ndo_set_mac_address     = mlx5e_set_mac,
-       .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
-       .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
+       .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
        .ndo_set_features        = mlx5e_set_features,
-       .ndo_change_mtu          = mlx5e_change_mtu,
-       .ndo_do_ioctl            = mlx5e_ioctl,
+       .ndo_change_mtu          = mlx5e_change_mtu,
+       .ndo_do_ioctl            = mlx5e_ioctl,
+};
+
+static const struct net_device_ops mlx5e_netdev_ops_sriov = {
+       .ndo_open                = mlx5e_open,
+       .ndo_stop                = mlx5e_close,
+       .ndo_start_xmit          = mlx5e_xmit,
+       .ndo_get_stats64         = mlx5e_get_stats,
+       .ndo_set_rx_mode         = mlx5e_set_rx_mode,
+       .ndo_set_mac_address     = mlx5e_set_mac,
+       .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
+       .ndo_set_features        = mlx5e_set_features,
+       .ndo_change_mtu          = mlx5e_change_mtu,
+       .ndo_do_ioctl            = mlx5e_ioctl,
+       .ndo_set_vf_mac          = mlx5e_set_vf_mac,
+       .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
+       .ndo_get_vf_config       = mlx5e_get_vf_config,
+       .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
+       .ndo_get_vf_stats        = mlx5e_get_vf_stats,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2137,18 +2156,11 @@ static void mlx5e_build_netdev(struct net_device *netdev)
 
        SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
 
-       if (priv->params.num_tc > 1)
-               mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue;
-
-       if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
-               mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac;
-               mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan;
-               mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config;
-               mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state;
-               mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats;
-       }
+       if (MLX5_CAP_GEN(mdev, vport_group_manager))
+               netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
+       else
+               netdev->netdev_ops = &mlx5e_netdev_ops_basic;
 
-       netdev->netdev_ops        = &mlx5e_netdev_ops;
        netdev->watchdog_timeo    = 15 * HZ;
 
        netdev->ethtool_ops       = &mlx5e_ethtool_ops;
@@ -2241,7 +2253,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
                goto err_unmap_free_uar;
        }
 
-       err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+       err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
        if (err) {
                mlx5_core_err(mdev, "alloc td failed, %d\n", err);
                goto err_dealloc_pd;
@@ -2324,7 +2336,7 @@ err_destroy_mkey:
        mlx5_core_destroy_mkey(mdev, &priv->mr);
 
 err_dealloc_transport_domain:
-       mlx5_dealloc_transport_domain(mdev, priv->tdn);
+       mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
 
 err_dealloc_pd:
        mlx5_core_dealloc_pd(mdev, priv->pdn);
@@ -2356,7 +2368,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
        mlx5e_close_drop_rq(priv);
        mlx5e_destroy_tises(priv);
        mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
-       mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+       mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
        mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
        mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
        free_netdev(netdev);
index 23c244a7e5d73d5fe0042f793d7ca955d945bf7b..647a3ca2c2a925c1d6157febb59fa51ea39872da 100644 (file)
@@ -230,6 +230,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
                case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
                case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
                        rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+                       rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
                        mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
                                      eqe_type_str(eqe->type), eqe->type, rsn);
                        mlx5_rsc_event(dev, rsn, eqe->type);
index b37749a3730e576ef798d02ae380caad91d44fbf..1545a944c309bf3205ad49fc1de90bd21c3eaacd 100644 (file)
@@ -78,6 +78,11 @@ struct mlx5_device_context {
        void                   *context;
 };
 
+enum {
+       MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+       MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
 static struct mlx5_profile profile[] = {
        [0] = {
                .mask           = 0,
@@ -387,7 +392,7 @@ query_ex:
        return err;
 }
 
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
        u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
        int err;
@@ -395,6 +400,7 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
        memset(out, 0, sizeof(out));
 
        MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+       MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
        err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
        if (err)
                return err;
@@ -404,6 +410,46 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
        return err;
 }
 
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+       void *set_ctx;
+       void *set_hca_cap;
+       int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+       int req_endianness;
+       int err;
+
+       if (MLX5_CAP_GEN(dev, atomic)) {
+               err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
+                                        HCA_CAP_OPMOD_GET_CUR);
+               if (err)
+                       return err;
+       } else {
+               return 0;
+       }
+
+       req_endianness =
+               MLX5_CAP_ATOMIC(dev,
+                               supported_atomic_req_8B_endianess_mode_1);
+
+       if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+               return 0;
+
+       set_ctx = kzalloc(set_sz, GFP_KERNEL);
+       if (!set_ctx)
+               return -ENOMEM;
+
+       set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+       /* Set requestor to host endianness */
+       MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
+                MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+       err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+       kfree(set_ctx);
+       return err;
+}
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
        void *set_ctx = NULL;
@@ -445,7 +491,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 
        MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
 
-       err = set_caps(dev, set_ctx, set_sz);
+       err = set_caps(dev, set_ctx, set_sz,
+                      MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 
 query_ex:
        kfree(set_ctx);
@@ -667,7 +714,6 @@ clean:
        return err;
 }
 
-#ifdef CONFIG_MLX5_CORE_EN
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
        u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
@@ -720,7 +766,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 
        return -ENOTSUPP;
 }
-#endif
 
 static int map_bf_area(struct mlx5_core_dev *dev)
 {
@@ -966,13 +1011,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto err_pagealloc_cleanup;
        }
 
-#ifdef CONFIG_MLX5_CORE_EN
        err = mlx5_core_set_issi(dev);
        if (err) {
                dev_err(&pdev->dev, "failed to set issi\n");
                goto err_disable_hca;
        }
-#endif
 
        err = mlx5_satisfy_startup_pages(dev, 1);
        if (err) {
@@ -992,6 +1035,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto reclaim_boot_pages;
        }
 
+       err = handle_hca_cap_atomic(dev);
+       if (err) {
+               dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+               goto reclaim_boot_pages;
+       }
+
        err = mlx5_satisfy_startup_pages(dev, 0);
        if (err) {
                dev_err(&pdev->dev, "failed to allocate init pages\n");
index 30e2ba3f5f16cb871fd20b0d05bb99433ea22ab1..def289375ecbf1a1543dcbbd4cf04e0a9b03ce1b 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/mlx5/cmd.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
 
 #include "mlx5_core.h"
 
@@ -67,6 +68,52 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
                complete(&common->free);
 }
 
+static u64 qp_allowed_event_types(void)
+{
+       u64 mask;
+
+       mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+              BIT(MLX5_EVENT_TYPE_COMM_EST) |
+              BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+              BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+              BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+              BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+              BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+              BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+       return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+       u64 mask;
+
+       mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+              BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+       return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+       return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+       switch (rsc_type) {
+       case MLX5_EVENT_QUEUE_TYPE_QP:
+               return BIT(event_type) & qp_allowed_event_types();
+       case MLX5_EVENT_QUEUE_TYPE_RQ:
+               return BIT(event_type) & rq_allowed_event_types();
+       case MLX5_EVENT_QUEUE_TYPE_SQ:
+               return BIT(event_type) & sq_allowed_event_types();
+       default:
+               WARN(1, "Event arrived for unknown resource type");
+               return false;
+       }
+}
+
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
 {
        struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
@@ -75,8 +122,16 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
        if (!common)
                return;
 
+       if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+               mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+                              event_type, rsn);
+               return;
+       }
+
        switch (common->res) {
        case MLX5_RES_QP:
+       case MLX5_RES_RQ:
+       case MLX5_RES_SQ:
                qp = (struct mlx5_core_qp *)common;
                qp->event(qp, event_type);
                break;
@@ -177,27 +232,56 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
 }
 #endif
 
+static int create_qprqsq_common(struct mlx5_core_dev *dev,
+                               struct mlx5_core_qp *qp,
+                               int rsc_type)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       int err;
+
+       qp->common.res = rsc_type;
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree,
+                               qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+                               qp);
+       spin_unlock_irq(&table->lock);
+       if (err)
+               return err;
+
+       atomic_set(&qp->common.refcount, 1);
+       init_completion(&qp->common.free);
+       qp->pid = current->pid;
+
+       return 0;
+}
+
+static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *qp)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       unsigned long flags;
+
+       spin_lock_irqsave(&table->lock, flags);
+       radix_tree_delete(&table->tree,
+                         qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+       spin_unlock_irqrestore(&table->lock, flags);
+       mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+       wait_for_completion(&qp->common.free);
+}
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,
                        int inlen)
 {
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
        struct mlx5_create_qp_mbox_out out;
        struct mlx5_destroy_qp_mbox_in din;
        struct mlx5_destroy_qp_mbox_out dout;
        int err;
-       void *qpc;
 
        memset(&out, 0, sizeof(out));
        in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
 
-       if (dev->issi) {
-               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
-               /* 0xffffff means we ask to work with cqe version 0 */
-               MLX5_SET(qpc, qpc, user_index, 0xffffff);
-       }
-
        err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
        if (err) {
                mlx5_core_warn(dev, "ret %d\n", err);
@@ -213,24 +297,16 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
        qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
        mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
-       qp->common.res = MLX5_RES_QP;
-       spin_lock_irq(&table->lock);
-       err = radix_tree_insert(&table->tree, qp->qpn, qp);
-       spin_unlock_irq(&table->lock);
-       if (err) {
-               mlx5_core_warn(dev, "err %d\n", err);
+       err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
+       if (err)
                goto err_cmd;
-       }
 
        err = mlx5_debug_qp_add(dev, qp);
        if (err)
                mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
                              qp->qpn);
 
-       qp->pid = current->pid;
-       atomic_set(&qp->common.refcount, 1);
        atomic_inc(&dev->num_qps);
-       init_completion(&qp->common.free);
 
        return 0;
 
@@ -250,18 +326,11 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 {
        struct mlx5_destroy_qp_mbox_in in;
        struct mlx5_destroy_qp_mbox_out out;
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
-       unsigned long flags;
        int err;
 
        mlx5_debug_qp_remove(dev, qp);
 
-       spin_lock_irqsave(&table->lock, flags);
-       radix_tree_delete(&table->tree, qp->qpn);
-       spin_unlock_irqrestore(&table->lock, flags);
-
-       mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
-       wait_for_completion(&qp->common.free);
+       destroy_qprqsq_common(dev, qp);
 
        memset(&in, 0, sizeof(in));
        memset(&out, 0, sizeof(out));
@@ -279,59 +348,15 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
 
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
-                       enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
                        struct mlx5_modify_qp_mbox_in *in, int sqd_event,
                        struct mlx5_core_qp *qp)
 {
-       static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
-               [MLX5_QP_STATE_RST] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
-               },
-               [MLX5_QP_STATE_INIT]  = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
-                       [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
-               },
-               [MLX5_QP_STATE_RTR]   = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
-               },
-               [MLX5_QP_STATE_RTS]   = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
-               },
-               [MLX5_QP_STATE_SQD] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-               },
-               [MLX5_QP_STATE_SQER] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
-               },
-               [MLX5_QP_STATE_ERR] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-               }
-       };
-
        struct mlx5_modify_qp_mbox_out out;
        int err = 0;
-       u16 op;
-
-       if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
-           !optab[cur_state][new_state])
-               return -EINVAL;
 
        memset(&out, 0, sizeof(out));
-       op = optab[cur_state][new_state];
-       in->hdr.opcode = cpu_to_be16(op);
+       in->hdr.opcode = cpu_to_be16(operation);
        in->qpn = cpu_to_be32(qp->qpn);
        err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
        if (err)
@@ -449,3 +474,67 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
 #endif
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *rq)
+{
+       int err;
+       u32 rqn;
+
+       err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+       if (err)
+               return err;
+
+       rq->qpn = rqn;
+       err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
+       if (err)
+               goto err_destroy_rq;
+
+       return 0;
+
+err_destroy_rq:
+       mlx5_core_destroy_rq(dev, rq->qpn);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *rq)
+{
+       destroy_qprqsq_common(dev, rq);
+       mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *sq)
+{
+       int err;
+       u32 sqn;
+
+       err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+       if (err)
+               return err;
+
+       sq->qpn = sqn;
+       err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
+       if (err)
+               goto err_destroy_sq;
+
+       return 0;
+
+err_destroy_sq:
+       mlx5_core_destroy_sq(dev, sq->qpn);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *sq)
+{
+       destroy_qprqsq_common(dev, sq);
+       mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
index ffada801976bff52ef05320d5605f08798af3129..04bc522605a03bb9cb7e6602f9c003ccfc28433b 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/mlx5/srq.h>
 #include <rdma/ib_verbs.h>
 #include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
 
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
 {
@@ -241,8 +241,6 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 
        memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
        memcpy(pas, in->pas, pas_size);
-       /* 0xffffff means we ask to work with cqe version 0 */
-       MLX5_SET(xrc_srqc,          xrc_srqc,  user_index, 0xffffff);
        MLX5_SET(create_xrc_srq_in, create_in, opcode,
                 MLX5_CMD_OP_CREATE_XRC_SRQ);
 
index d7068f54e80066acdcaf6bf081082d50feeebc57..03a5093ffeb72ab6f5c490d1eff42967ceb7f0f4 100644 (file)
@@ -32,9 +32,9 @@
 
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
 
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 {
        u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
        u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
@@ -53,8 +53,9 @@ int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
 
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 {
        u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
        u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
@@ -68,6 +69,7 @@ void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
 
 int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
 {
@@ -94,6 +96,7 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
        memset(out, 0, sizeof(out));
        return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_modify_rq);
 
 void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
 {
@@ -108,6 +111,18 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
 
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+       u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+       int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+       MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+       MLX5_SET(query_rq_in, in, rqn, rqn);
+
+       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
 int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 {
        u32 out[MLX5_ST_SZ_DW(create_sq_out)];
@@ -133,6 +148,7 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
        memset(out, 0, sizeof(out));
        return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_modify_sq);
 
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
 {
@@ -147,6 +163,18 @@ void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
 
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+       u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+       int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+       MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+       MLX5_SET(query_sq_in, in, sqn, sqn);
+
+       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tirn)
 {
@@ -162,6 +190,7 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_tir);
 
 int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
                         int inlen)
@@ -187,6 +216,7 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
 
 int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tisn)
@@ -203,6 +233,19 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+                        int inlen)
+{
+       u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+       MLX5_SET(modify_tis_in, in, tisn, tisn);
+       MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
 
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 {
@@ -216,6 +259,7 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
 int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rmpn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h
deleted file mode 100644 (file)
index 74cae51..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __TRANSOBJ_H__
-#define __TRANSOBJ_H__
-
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
-int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                       u32 *rqn);
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
-void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
-int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                       u32 *sqn);
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
-void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *tirn);
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
-                        int inlen);
-void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *tisn);
-void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                         u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
-int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rqtn);
-int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
-                        int inlen);
-void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
-
-#endif /* __TRANSOBJ_H__ */
index 076197efea9b07916480a97a8258e84703a3db66..c7398b95aecdc0286480f85564e5a42660275bd6 100644 (file)
@@ -76,7 +76,7 @@ u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 
        return MLX5_GET(query_vport_state_out, out, admin_state);
 }
-EXPORT_SYMBOL(mlx5_query_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
 
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                                  u16 vport, u8 state)
@@ -104,7 +104,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 
        return err;
 }
-EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
 
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
                                        u32 *out, int outlen)
@@ -151,12 +151,9 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                nic_vport_context.permanent_address);
 
        err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
-       if (err)
-               goto out;
-
-       ether_addr_copy(addr, &out_addr[2]);
+       if (!err)
+               ether_addr_copy(addr, &out_addr[2]);
 
-out:
        kvfree(out);
        return err;
 }
@@ -197,7 +194,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 
        return err;
 }
-EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
 
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
                                  u32 vport,
@@ -430,6 +427,68 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
 
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+                                          u64 *system_image_guid)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+                                       nic_vport_context.system_image_guid);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+                               nic_vport_context.node_guid);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+                                       u16 *qkey_viol_cntr)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+                                  nic_vport_context.qkey_violation_counter);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
                             u8 port_num, u16  vf_num, u16 gid_index,
                             union ib_gid *gid)
@@ -750,3 +809,44 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
        return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum mlx5_vport_roce_state {
+       MLX5_VPORT_ROCE_DISABLED = 0,
+       MLX5_VPORT_ROCE_ENABLED  = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+                                           enum mlx5_vport_roce_state state)
+{
+       void *in;
+       int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+       int err;
+
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               mlx5_core_warn(mdev, "failed to allocate inbox\n");
+               return -ENOMEM;
+       }
+
+       MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+       MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+                state);
+
+       err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+       kvfree(in);
+
+       return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+       return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+       return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
index 726f5435b32f15ec3a39c7710a998c469fdd3837..ae65b9940aed4823319f1e4db5c2ac6fe6dc9b28 100644 (file)
@@ -49,7 +49,7 @@
 #define MLXSW_PORT_MID                 0xd000
 
 #define MLXSW_PORT_MAX_PHY_PORTS       0x40
-#define MLXSW_PORT_MAX_PORTS           MLXSW_PORT_MAX_PHY_PORTS
+#define MLXSW_PORT_MAX_PORTS           (MLXSW_PORT_MAX_PHY_PORTS + 1)
 
 #define MLXSW_PORT_DEVID_BITS_OFFSET   10
 #define MLXSW_PORT_PHY_BITS_OFFSET     4
index 0c5237264e3e29519933180f307a531c8b34c054..ffe4c030573332cf43bb8b3d66a0eb922031da5c 100644 (file)
@@ -873,6 +873,62 @@ static inline void mlxsw_reg_spvm_pack(char *payload, u8 local_port,
        }
 }
 
+/* SPAFT - Switch Port Acceptable Frame Types
+ * ------------------------------------------
+ * The Switch Port Acceptable Frame Types register configures the frame
+ * admittance of the port.
+ */
+#define MLXSW_REG_SPAFT_ID 0x2010
+#define MLXSW_REG_SPAFT_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_spaft = {
+       .id = MLXSW_REG_SPAFT_ID,
+       .len = MLXSW_REG_SPAFT_LEN,
+};
+
+/* reg_spaft_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is not supported (all tag types are allowed).
+ */
+MLXSW_ITEM32(reg, spaft, local_port, 0x00, 16, 8);
+
+/* reg_spaft_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, sub_port, 0x00, 8, 8);
+
+/* reg_spaft_allow_untagged
+ * When set, untagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_untagged, 0x04, 31, 1);
+
+/* reg_spaft_allow_prio_tagged
+ * When set, priority tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_prio_tagged, 0x04, 30, 1);
+
+/* reg_spaft_allow_tagged
+ * When set, tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_tagged, 0x04, 29, 1);
+
+static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port,
+                                       bool allow_untagged)
+{
+       MLXSW_REG_ZERO(spaft, payload);
+       mlxsw_reg_spaft_local_port_set(payload, local_port);
+       mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged);
+       mlxsw_reg_spaft_allow_prio_tagged_set(payload, true);
+       mlxsw_reg_spaft_allow_tagged_set(payload, true);
+}
+
 /* SFGC - Switch Flooding Group Configuration
  * ------------------------------------------
  * The following register controls the association of flooding tables and MIDs
@@ -1044,6 +1100,92 @@ static inline void mlxsw_reg_sftr_pack(char *payload,
        mlxsw_reg_sftr_port_mask_set(payload, port, 1);
 }
 
+/* SFDF - Switch Filtering DB Flush
+ * --------------------------------
+ * The switch filtering DB flush register is used to flush the FDB.
+ * Note that FDB notifications are flushed as well.
+ */
+#define MLXSW_REG_SFDF_ID 0x2013
+#define MLXSW_REG_SFDF_LEN 0x14
+
+static const struct mlxsw_reg_info mlxsw_reg_sfdf = {
+       .id = MLXSW_REG_SFDF_ID,
+       .len = MLXSW_REG_SFDF_LEN,
+};
+
+/* reg_sfdf_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfdf, swid, 0x00, 24, 8);
+
+enum mlxsw_reg_sfdf_flush_type {
+       MLXSW_REG_SFDF_FLUSH_PER_SWID,
+       MLXSW_REG_SFDF_FLUSH_PER_FID,
+       MLXSW_REG_SFDF_FLUSH_PER_PORT,
+       MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID,
+       MLXSW_REG_SFDF_FLUSH_PER_LAG,
+       MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID,
+};
+
+/* reg_sfdf_flush_type
+ * Flush type.
+ * 0 - All SWID dynamic entries are flushed.
+ * 1 - All FID dynamic entries are flushed.
+ * 2 - All dynamic entries pointing to port are flushed.
+ * 3 - All FID dynamic entries pointing to port are flushed.
+ * 4 - All dynamic entries pointing to LAG are flushed.
+ * 5 - All FID dynamic entries pointing to LAG are flushed.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4);
+
+/* reg_sfdf_flush_static
+ * Static.
+ * 0 - Flush only dynamic entries.
+ * 1 - Flush both dynamic and static entries.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_static, 0x04, 24, 1);
+
+static inline void mlxsw_reg_sfdf_pack(char *payload,
+                                      enum mlxsw_reg_sfdf_flush_type type)
+{
+       MLXSW_REG_ZERO(sfdf, payload);
+       mlxsw_reg_sfdf_flush_type_set(payload, type);
+       mlxsw_reg_sfdf_flush_static_set(payload, true);
+}
+
+/* reg_sfdf_fid
+ * FID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, fid, 0x0C, 0, 16);
+
+/* reg_sfdf_system_port
+ * Port to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, system_port, 0x0C, 0, 16);
+
+/* reg_sfdf_port_fid_system_port
+ * Port to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, port_fid_system_port, 0x08, 0, 16);
+
+/* reg_sfdf_lag_id
+ * LAG ID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_id, 0x0C, 0, 10);
+
+/* reg_sfdf_lag_fid_lag_id
+ * LAG ID to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10);
+
 /* SLDR - Switch LAG Descriptor Register
  * -----------------------------------------
  * The switch LAG descriptor register is populated by LAG descriptors.
@@ -1701,20 +1843,20 @@ MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8);
  * Module number.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false);
 
 /* reg_pmlp_tx_lane
  * Tx Lane. When rxtx field is cleared, this field is used for Rx as well.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 16, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false);
 
 /* reg_pmlp_rx_lane
  * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is
  * equal to Tx lane.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 24, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false);
 
 static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
 {
@@ -3117,10 +3259,14 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
                return "SPVID";
        case MLXSW_REG_SPVM_ID:
                return "SPVM";
+       case MLXSW_REG_SPAFT_ID:
+               return "SPAFT";
        case MLXSW_REG_SFGC_ID:
                return "SFGC";
        case MLXSW_REG_SFTR_ID:
                return "SFTR";
+       case MLXSW_REG_SFDF_ID:
+               return "SFDF";
        case MLXSW_REG_SLDR_ID:
                return "SLDR";
        case MLXSW_REG_SLCR_ID:
index ce6845d534a83f7acea04c0d0c4401103192f0ab..09ce451c283bb8dfb5dd9c4c69bdeb04c0e796b3 100644 (file)
@@ -1979,6 +1979,115 @@ static struct mlxsw_driver mlxsw_sp_driver = {
        .profile                = &mlxsw_sp_config_profile,
 };
 
+static int
+mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT);
+       mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                   u16 fid)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID);
+       mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+       mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl,
+                                               mlxsw_sp_port->local_port);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG);
+       mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                     u16 fid)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID);
+       mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+       mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       int err, last_err = 0;
+       u16 vid;
+
+       for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+               err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid);
+               if (err)
+                       last_err = err;
+       }
+
+       return last_err;
+}
+
+static int
+__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       int err, last_err = 0;
+       u16 vid;
+
+       for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+               err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid);
+               if (err)
+                       last_err = err;
+       }
+
+       return last_err;
+}
+
+static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       if (!list_empty(&mlxsw_sp_port->vports_list))
+               if (mlxsw_sp_port->lagged)
+                       return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port);
+               else
+                       return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port);
+       else
+               if (mlxsw_sp_port->lagged)
+                       return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port);
+               else
+                       return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport);
+       u16 fid = mlxsw_sp_vfid_to_fid(vfid);
+
+       if (mlxsw_sp_vport->lagged)
+               return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport,
+                                                            fid);
+       else
+               return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid);
+}
+
 static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
 {
        return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
@@ -2006,10 +2115,16 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port)
        return 0;
 }
 
-static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+                                     bool flush_fdb)
 {
        struct net_device *dev = mlxsw_sp_port->dev;
 
+       if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port))
+               netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
+
+       mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+
        mlxsw_sp_port->learning = 0;
        mlxsw_sp_port->learning_sync = 0;
        mlxsw_sp_port->uc_flood = 0;
@@ -2200,10 +2315,15 @@ err_col_port_enable:
        return err;
 }
 
+static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                      struct net_device *br_dev,
+                                      bool flush_fdb);
+
 static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
                                   struct net_device *lag_dev)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       struct mlxsw_sp_port *mlxsw_sp_vport;
        struct mlxsw_sp_upper *lag;
        u16 lag_id = mlxsw_sp_port->lag_id;
        int err;
@@ -2220,7 +2340,32 @@ static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
        if (err)
                return err;
 
+       /* In case we leave a LAG device that has bridges built on top,
+        * then their teardown sequence is never issued and we need to
+        * invoke the necessary cleanup routines ourselves.
+        */
+       list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
+                           vport.list) {
+               struct net_device *br_dev;
+
+               if (!mlxsw_sp_vport->bridged)
+                       continue;
+
+               br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
+               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false);
+       }
+
+       if (mlxsw_sp_port->bridged) {
+               mlxsw_sp_port_active_vlans_del(mlxsw_sp_port);
+               mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false);
+
+               if (lag->ref_count == 1)
+                       mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
+       }
+
        if (lag->ref_count == 1) {
+               if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port))
+                       netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
                err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
                if (err)
                        return err;
@@ -2272,9 +2417,6 @@ static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
 }
 
-static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                      struct net_device *br_dev);
-
 static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port,
                                   struct net_device *vlan_dev)
 {
@@ -2312,7 +2454,7 @@ static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port,
                struct net_device *br_dev;
 
                br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
-               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev);
+               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true);
        }
 
        mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
@@ -2374,7 +2516,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
                                }
                                mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev);
                        } else {
-                               err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port);
+                               err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
+                                                                true);
                                mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev);
                                if (err) {
                                        netdev_err(dev, "Failed to leave bridge\n");
@@ -2541,7 +2684,8 @@ static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                      struct net_device *br_dev)
+                                      struct net_device *br_dev,
+                                      bool flush_fdb)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
        u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
@@ -2604,6 +2748,16 @@ static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
                goto err_vport_flood_set;
        }
 
+       err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
+                                         MLXSW_REG_SPMS_STATE_FORWARDING);
+       if (err) {
+               netdev_err(dev, "Failed to set STP state\n");
+               goto err_port_stp_state_set;
+       }
+
+       if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport))
+               netdev_err(dev, "Failed to flush FDB\n");
+
        /* Switch between the vFIDs and destroy the old one if needed. */
        new_vfid->nr_vports++;
        mlxsw_sp_vport->vport.vfid = new_vfid;
@@ -2618,6 +2772,7 @@ static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
 
        return 0;
 
+err_port_stp_state_set:
 err_vport_flood_set:
 err_port_vid_learning_set:
 err_port_vid_to_fid_validate:
@@ -2777,7 +2932,7 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
                        if (!mlxsw_sp_vport)
                                return NOTIFY_DONE;
                        err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport,
-                                                         upper_dev);
+                                                         upper_dev, true);
                        if (err) {
                                netdev_err(dev, "Failed to leave bridge\n");
                                return NOTIFY_BAD;
index a23dc610d2593aedce974eddf94789d3aaf181a6..3b89ed2f3c76b8b6eebd5fa6f7d1f52a70d75585 100644 (file)
@@ -120,7 +120,6 @@ struct mlxsw_sp {
        } fdb_notify;
 #define MLXSW_SP_DEFAULT_AGEING_TIME 300
        u32 ageing_time;
-       struct mutex fdb_lock;  /* Make sure FDB sessions are atomic. */
        struct mlxsw_sp_upper master_bridge;
        struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
 };
@@ -254,5 +253,7 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
                           __be16 __always_unused proto, u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
                             bool set, bool only_uc);
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
 
 #endif
index 45479ef5bcf4f44848f9989f83f46ce66fe4c11b..7b56098acc58eb9e1d5cd4d586e75989ae83f01a 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/if_bridge.h>
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
+#include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
 #include "spectrum.h"
@@ -124,14 +125,14 @@ static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
        int err;
 
        switch (state) {
-       case BR_STATE_DISABLED: /* fall-through */
        case BR_STATE_FORWARDING:
                spms_state = MLXSW_REG_SPMS_STATE_FORWARDING;
                break;
-       case BR_STATE_LISTENING: /* fall-through */
        case BR_STATE_LEARNING:
                spms_state = MLXSW_REG_SPMS_STATE_LEARNING;
                break;
+       case BR_STATE_LISTENING: /* fall-through */
+       case BR_STATE_DISABLED: /* fall-through */
        case BR_STATE_BLOCKING:
                spms_state = MLXSW_REG_SPMS_STATE_DISCARDING;
                break;
@@ -369,7 +370,8 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
        return err;
 }
 
-static int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                   u16 vid)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        char spvid_pl[MLXSW_REG_SPVID_LEN];
@@ -378,6 +380,53 @@ static int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl);
 }
 
+static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                           bool allow)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char spaft_pl[MLXSW_REG_SPAFT_LEN];
+
+       mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl);
+}
+
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+       struct net_device *dev = mlxsw_sp_port->dev;
+       int err;
+
+       if (!vid) {
+               err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false);
+               if (err) {
+                       netdev_err(dev, "Failed to disallow untagged traffic\n");
+                       return err;
+               }
+       } else {
+               err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid);
+               if (err) {
+                       netdev_err(dev, "Failed to set PVID\n");
+                       return err;
+               }
+
+               /* Only allow if not already allowed. */
+               if (!mlxsw_sp_port->pvid) {
+                       err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port,
+                                                              true);
+                       if (err) {
+                               netdev_err(dev, "Failed to allow untagged traffic\n");
+                               goto err_port_allow_untagged_set;
+                       }
+               }
+       }
+
+       mlxsw_sp_port->pvid = vid;
+       return 0;
+
+err_port_allow_untagged_set:
+       __mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid);
+       return err;
+}
+
 static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid)
 {
        char sfmr_pl[MLXSW_REG_SFMR_LEN];
@@ -539,7 +588,12 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
                        netdev_err(dev, "Unable to add PVID %d\n", vid_begin);
                        goto err_port_pvid_set;
                }
-               mlxsw_sp_port->pvid = vid_begin;
+       } else if (!flag_pvid && old_pvid >= vid_begin && old_pvid <= vid_end) {
+               err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
+               if (err) {
+                       netdev_err(dev, "Unable to del PVID\n");
+                       goto err_port_pvid_set;
+               }
        }
 
        /* Changing activity bits only if HW operation succeded */
@@ -891,20 +945,18 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                return err;
        }
 
+       if (init)
+               goto out;
+
        pvid = mlxsw_sp_port->pvid;
-       if (pvid >= vid_begin && pvid <= vid_end && pvid != 1) {
-               /* Default VLAN is always 1 */
-               err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+       if (pvid >= vid_begin && pvid <= vid_end) {
+               err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
                if (err) {
                        netdev_err(dev, "Unable to del PVID %d\n", pvid);
                        return err;
                }
-               mlxsw_sp_port->pvid = 1;
        }
 
-       if (init)
-               goto out;
-
        err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end,
                                        false, false);
        if (err) {
@@ -936,6 +988,14 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                                         vlan->vid_begin, vlan->vid_end, false);
 }
 
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       u16 vid;
+
+       for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+               __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+}
+
 static int
 mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port,
                             const struct switchdev_obj_port_fdb *fdb)
@@ -1040,10 +1100,12 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
 
 static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                  struct switchdev_obj_port_fdb *fdb,
-                                 switchdev_obj_dump_cb_t *cb)
+                                 switchdev_obj_dump_cb_t *cb,
+                                 struct net_device *orig_dev)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       u16 vport_vid = 0, vport_fid = 0;
+       struct mlxsw_sp_port *tmp;
+       u16 vport_fid = 0;
        char *sfd_pl;
        char mac[ETH_ALEN];
        u16 fid;
@@ -1058,13 +1120,11 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        if (!sfd_pl)
                return -ENOMEM;
 
-       mutex_lock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
        if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
                u16 tmp;
 
                tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
                vport_fid = mlxsw_sp_vfid_to_fid(tmp);
-               vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
        }
 
        mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
@@ -1088,12 +1148,13 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid,
                                                        &local_port);
                                if (local_port == mlxsw_sp_port->local_port) {
-                                       if (vport_fid && vport_fid != fid)
-                                               continue;
-                                       else if (vport_fid)
-                                               fdb->vid = vport_vid;
-                                       else
+                                       if (vport_fid && vport_fid == fid)
+                                               fdb->vid = 0;
+                                       else if (!vport_fid &&
+                                                !mlxsw_sp_fid_is_vfid(fid))
                                                fdb->vid = fid;
+                                       else
+                                               continue;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
                                        err = cb(&fdb->obj);
@@ -1104,14 +1165,22 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                        case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG:
                                mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i,
                                                            mac, &fid, &lag_id);
-                               if (mlxsw_sp_port ==
-                                   mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) {
-                                       if (vport_fid && vport_fid != fid)
+                               tmp = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
+                               if (tmp && tmp->local_port ==
+                                   mlxsw_sp_port->local_port) {
+                                       /* LAG records can only point to LAG
+                                        * devices or VLAN devices on top.
+                                        */
+                                       if (!netif_is_lag_master(orig_dev) &&
+                                           !is_vlan_dev(orig_dev))
                                                continue;
-                                       else if (vport_fid)
-                                               fdb->vid = vport_vid;
-                                       else
+                                       if (vport_fid && vport_fid == fid)
+                                               fdb->vid = 0;
+                                       else if (!vport_fid &&
+                                                !mlxsw_sp_fid_is_vfid(fid))
                                                fdb->vid = fid;
+                                       else
+                                               continue;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
                                        err = cb(&fdb->obj);
@@ -1124,7 +1193,6 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT);
 
 out:
-       mutex_unlock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
        kfree(sfd_pl);
        return stored_err ? stored_err : err;
 }
@@ -1176,7 +1244,8 @@ static int mlxsw_sp_port_obj_dump(struct net_device *dev,
                break;
        case SWITCHDEV_OBJ_ID_PORT_FDB:
                err = mlxsw_sp_port_fdb_dump(mlxsw_sp_port,
-                                            SWITCHDEV_OBJ_PORT_FDB(obj), cb);
+                                            SWITCHDEV_OBJ_PORT_FDB(obj), cb,
+                                            obj->orig_dev);
                break;
        default:
                err = -EOPNOTSUPP;
@@ -1194,14 +1263,14 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
        .switchdev_port_obj_dump        = mlxsw_sp_port_obj_dump,
 };
 
-static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync,
-                                       bool adding, char *mac, u16 vid,
+static void mlxsw_sp_fdb_call_notifiers(bool learning_sync, bool adding,
+                                       char *mac, u16 vid,
                                        struct net_device *dev)
 {
        struct switchdev_notifier_fdb_info info;
        unsigned long notifier_type;
 
-       if (learning && learning_sync) {
+       if (learning_sync) {
                info.addr = mac;
                info.vid = vid;
                notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
@@ -1237,7 +1306,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
                        netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
                        goto just_remove;
                }
-               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               vid = 0;
                /* Override the physical port with the vPort. */
                mlxsw_sp_port = mlxsw_sp_vport;
        } else {
@@ -1257,8 +1326,7 @@ do_fdb_op:
 
        if (!do_notification)
                return;
-       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-                                   mlxsw_sp_port->learning_sync,
+       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync,
                                    adding, mac, vid, mlxsw_sp_port->dev);
        return;
 
@@ -1273,6 +1341,7 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
                                                bool adding)
 {
        struct mlxsw_sp_port *mlxsw_sp_port;
+       struct net_device *dev;
        char mac[ETH_ALEN];
        u16 lag_vid = 0;
        u16 lag_id;
@@ -1298,11 +1367,13 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
                        goto just_remove;
                }
 
-               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-               lag_vid = vid;
+               lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               dev = mlxsw_sp_vport->dev;
+               vid = 0;
                /* Override the physical port with the vPort. */
                mlxsw_sp_port = mlxsw_sp_vport;
        } else {
+               dev = mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev;
                vid = fid;
        }
 
@@ -1319,10 +1390,8 @@ do_fdb_op:
 
        if (!do_notification)
                return;
-       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-                                   mlxsw_sp_port->learning_sync,
-                                   adding, mac, vid,
-                                   mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev);
+       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, adding, mac,
+                                   vid, dev);
        return;
 
 just_remove:
@@ -1374,7 +1443,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
 
        mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
 
-       mutex_lock(&mlxsw_sp->fdb_lock);
+       rtnl_lock();
        do {
                mlxsw_reg_sfn_pack(sfn_pl);
                err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
@@ -1387,7 +1456,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
                        mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
 
        } while (num_rec);
-       mutex_unlock(&mlxsw_sp->fdb_lock);
+       rtnl_unlock();
 
        kfree(sfn_pl);
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
@@ -1402,7 +1471,6 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp)
                dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n");
                return err;
        }
-       mutex_init(&mlxsw_sp->fdb_lock);
        INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work);
        mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL;
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
index a10c928bbd6b9bf5d192dfc6453395312062fe3f..00cfd95ca59d53fe040ef5b002e7d30604d35e96 100644 (file)
 
 #include "moxart_ether.h"
 
+static inline void moxart_desc_write(u32 data, u32 *desc)
+{
+       *desc = cpu_to_le32(data);
+}
+
+static inline u32 moxart_desc_read(u32 *desc)
+{
+       return le32_to_cpu(*desc);
+}
+
 static inline void moxart_emac_write(struct net_device *ndev,
                                     unsigned int reg, unsigned long value)
 {
@@ -112,7 +122,7 @@ static void moxart_mac_enable(struct net_device *ndev)
 static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-       void __iomem *desc;
+       void *desc;
        int i;
 
        for (i = 0; i < TX_DESC_NUM; i++) {
@@ -121,7 +131,7 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 
                priv->tx_buf[i] = priv->tx_buf_base + priv->tx_buf_size * i;
        }
-       writel(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
+       moxart_desc_write(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
 
        priv->tx_head = 0;
        priv->tx_tail = 0;
@@ -129,8 +139,8 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
        for (i = 0; i < RX_DESC_NUM; i++) {
                desc = priv->rx_desc_base + i * RX_REG_DESC_SIZE;
                memset(desc, 0, RX_REG_DESC_SIZE);
-               writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
-               writel(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
+               moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+               moxart_desc_write(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
                       desc + RX_REG_OFFSET_DESC1);
 
                priv->rx_buf[i] = priv->rx_buf_base + priv->rx_buf_size * i;
@@ -141,12 +151,12 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
                if (dma_mapping_error(&ndev->dev, priv->rx_mapping[i]))
                        netdev_err(ndev, "DMA mapping error\n");
 
-               writel(priv->rx_mapping[i],
+               moxart_desc_write(priv->rx_mapping[i],
                       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_PHYS);
-               writel(priv->rx_buf[i],
+               moxart_desc_write((uintptr_t)priv->rx_buf[i],
                       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_VIRT);
        }
-       writel(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
+       moxart_desc_write(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
 
        priv->rx_head = 0;
 
@@ -201,14 +211,15 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
                                                      napi);
        struct net_device *ndev = priv->ndev;
        struct sk_buff *skb;
-       void __iomem *desc;
+       void *desc;
        unsigned int desc0, len;
        int rx_head = priv->rx_head;
        int rx = 0;
 
        while (rx < budget) {
                desc = priv->rx_desc_base + (RX_REG_DESC_SIZE * rx_head);
-               desc0 = readl(desc + RX_REG_OFFSET_DESC0);
+               desc0 = moxart_desc_read(desc + RX_REG_OFFSET_DESC0);
+               rmb(); /* ensure desc0 is up to date */
 
                if (desc0 & RX_DESC0_DMA_OWN)
                        break;
@@ -250,7 +261,8 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
                        priv->stats.multicast++;
 
 rx_next:
-               writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+               wmb(); /* prevent setting ownership back too early */
+               moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
 
                rx_head = RX_NEXT(rx_head);
                priv->rx_head = rx_head;
@@ -310,7 +322,7 @@ static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
 static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-       void __iomem *desc;
+       void *desc;
        unsigned int len;
        unsigned int tx_head = priv->tx_head;
        u32 txdes1;
@@ -319,11 +331,12 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
 
        spin_lock_irq(&priv->txlock);
-       if (readl(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
+       if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
                net_dbg_ratelimited("no TX space for packet\n");
                priv->stats.tx_dropped++;
                goto out_unlock;
        }
+       rmb(); /* ensure data is only read that had TX_DESC0_DMA_OWN cleared */
 
        len = skb->len > TX_BUF_SIZE ? TX_BUF_SIZE : skb->len;
 
@@ -337,9 +350,9 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        priv->tx_len[tx_head] = len;
        priv->tx_skb[tx_head] = skb;
 
-       writel(priv->tx_mapping[tx_head],
+       moxart_desc_write(priv->tx_mapping[tx_head],
               desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_PHYS);
-       writel(skb->data,
+       moxart_desc_write((uintptr_t)skb->data,
               desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_VIRT);
 
        if (skb->len < ETH_ZLEN) {
@@ -354,8 +367,9 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        txdes1 = TX_DESC1_LTS | TX_DESC1_FTS | (len & TX_DESC1_BUF_SIZE_MASK);
        if (tx_head == TX_DESC_NUM_MASK)
                txdes1 |= TX_DESC1_END;
-       writel(txdes1, desc + TX_REG_OFFSET_DESC1);
-       writel(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
+       moxart_desc_write(txdes1, desc + TX_REG_OFFSET_DESC1);
+       wmb(); /* flush descriptor before transferring ownership */
+       moxart_desc_write(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
 
        /* start to send packet */
        writel(0xffffffff, priv->base + REG_TX_POLL_DEMAND);
index 2be9280d608ce9efe320ea7b20b01b1c350a023e..93a9563ac7c6730eec8240ac187a86b9831c9741 100644 (file)
@@ -300,7 +300,7 @@ struct moxart_mac_priv_t {
 
        dma_addr_t rx_base;
        dma_addr_t rx_mapping[RX_DESC_NUM];
-       void __iomem *rx_desc_base;
+       void *rx_desc_base;
        unsigned char *rx_buf_base;
        unsigned char *rx_buf[RX_DESC_NUM];
        unsigned int rx_head;
@@ -308,7 +308,7 @@ struct moxart_mac_priv_t {
 
        dma_addr_t tx_base;
        dma_addr_t tx_mapping[TX_DESC_NUM];
-       void __iomem *tx_desc_base;
+       void *tx_desc_base;
        unsigned char *tx_buf_base;
        unsigned char *tx_buf[RX_DESC_NUM];
        unsigned int tx_head;
index 50d5604833edc8e0ffde1f7f8974002852f8bb97..e0993eba5df3f8081bff6329a97b4753e876cbe3 100644 (file)
@@ -2223,8 +2223,6 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
        return IRQ_NONE;
 }
 
-#ifdef CONFIG_PCI_MSI
-
 static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
 {
        struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id;
@@ -2442,16 +2440,13 @@ static void vxge_rem_msix_isr(struct vxgedev *vdev)
        if (vdev->config.intr_type == MSI_X)
                pci_disable_msix(vdev->pdev);
 }
-#endif
 
 static void vxge_rem_isr(struct vxgedev *vdev)
 {
-#ifdef CONFIG_PCI_MSI
-       if (vdev->config.intr_type == MSI_X) {
+       if (IS_ENABLED(CONFIG_PCI_MSI) &&
+           vdev->config.intr_type == MSI_X) {
                vxge_rem_msix_isr(vdev);
-       } else
-#endif
-       if (vdev->config.intr_type == INTA) {
+       } else if (vdev->config.intr_type == INTA) {
                        synchronize_irq(vdev->pdev->irq);
                        free_irq(vdev->pdev->irq, vdev);
        }
@@ -2460,11 +2455,10 @@ static void vxge_rem_isr(struct vxgedev *vdev)
 static int vxge_add_isr(struct vxgedev *vdev)
 {
        int ret = 0;
-#ifdef CONFIG_PCI_MSI
        int vp_idx = 0, intr_idx = 0, intr_cnt = 0, msix_idx = 0, irq_req = 0;
        int pci_fun = PCI_FUNC(vdev->pdev->devfn);
 
-       if (vdev->config.intr_type == MSI_X)
+       if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X)
                ret = vxge_enable_msix(vdev);
 
        if (ret) {
@@ -2475,7 +2469,7 @@ static int vxge_add_isr(struct vxgedev *vdev)
                vdev->config.intr_type = INTA;
        }
 
-       if (vdev->config.intr_type == MSI_X) {
+       if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X) {
                for (intr_idx = 0;
                     intr_idx < (vdev->no_of_vpath *
                        VXGE_HW_VPATH_MSIX_ACTIVE); intr_idx++) {
@@ -2576,9 +2570,8 @@ static int vxge_add_isr(struct vxgedev *vdev)
                vdev->vxge_entries[intr_cnt].in_use = 1;
                vdev->vxge_entries[intr_cnt].arg = &vdev->vpaths[0];
        }
-INTA_MODE:
-#endif
 
+INTA_MODE:
        if (vdev->config.intr_type == INTA) {
                snprintf(vdev->desc[0], VXGE_INTR_STRLEN,
                        "%s:vxge:INTA", vdev->ndev->name);
@@ -3889,12 +3882,12 @@ static void vxge_device_config_init(struct vxge_hw_device_config *device_config,
        if (max_mac_vpath > VXGE_MAX_MAC_ADDR_COUNT)
                max_mac_vpath = VXGE_MAX_MAC_ADDR_COUNT;
 
-#ifndef CONFIG_PCI_MSI
-       vxge_debug_init(VXGE_ERR,
-               "%s: This Kernel does not support "
-               "MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
-       *intr_type = INTA;
-#endif
+       if (!IS_ENABLED(CONFIG_PCI_MSI)) {
+               vxge_debug_init(VXGE_ERR,
+                       "%s: This Kernel does not support "
+                       "MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
+               *intr_type = INTA;
+       }
 
        /* Configure whether MSI-X or IRQL. */
        switch (*intr_type) {
index 17d5571d04322f1fd88adde8592e3f6ee1d80e69..537974cfd427091442acc9098a0a40535bbda431 100644 (file)
@@ -6137,28 +6137,28 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
                sw_cnt_1ms_ini = 16000000/rg_saw_cnt;
                sw_cnt_1ms_ini &= 0x0fff;
                data = r8168_mac_ocp_read(tp, 0xd412);
-               data &= 0x0fff;
+               data &= ~0x0fff;
                data |= sw_cnt_1ms_ini;
                r8168_mac_ocp_write(tp, 0xd412, data);
        }
 
        data = r8168_mac_ocp_read(tp, 0xe056);
-       data &= 0xf0;
-       data |= 0x07;
+       data &= ~0xf0;
+       data |= 0x70;
        r8168_mac_ocp_write(tp, 0xe056, data);
 
        data = r8168_mac_ocp_read(tp, 0xe052);
-       data &= 0x8008;
-       data |= 0x6000;
+       data &= ~0x6000;
+       data |= 0x8008;
        r8168_mac_ocp_write(tp, 0xe052, data);
 
        data = r8168_mac_ocp_read(tp, 0xe0d6);
-       data &= 0x01ff;
+       data &= ~0x01ff;
        data |= 0x017f;
        r8168_mac_ocp_write(tp, 0xe0d6, data);
 
        data = r8168_mac_ocp_read(tp, 0xd420);
-       data &= 0x0fff;
+       data &= ~0x0fff;
        data |= 0x047f;
        r8168_mac_ocp_write(tp, 0xd420, data);
 
index ac43ed914fcf270653a6101a2f6c736b7ac2ef00..744d7806a9eec63e1c4fb18cc7b57fa96d73fcbc 100644 (file)
@@ -1139,7 +1139,8 @@ static int ravb_set_ringparam(struct net_device *ndev,
        if (netif_running(ndev)) {
                netif_device_detach(ndev);
                /* Stop PTP Clock driver */
-               ravb_ptp_stop(ndev);
+               if (priv->chip_id == RCAR_GEN2)
+                       ravb_ptp_stop(ndev);
                /* Wait for DMA stopping */
                error = ravb_stop_dma(ndev);
                if (error) {
@@ -1170,7 +1171,8 @@ static int ravb_set_ringparam(struct net_device *ndev,
                ravb_emac_init(ndev);
 
                /* Initialise PTP Clock driver */
-               ravb_ptp_init(ndev, priv->pdev);
+               if (priv->chip_id == RCAR_GEN2)
+                       ravb_ptp_init(ndev, priv->pdev);
 
                netif_device_attach(ndev);
        }
@@ -1298,7 +1300,8 @@ static void ravb_tx_timeout_work(struct work_struct *work)
        netif_tx_stop_all_queues(ndev);
 
        /* Stop PTP Clock driver */
-       ravb_ptp_stop(ndev);
+       if (priv->chip_id == RCAR_GEN2)
+               ravb_ptp_stop(ndev);
 
        /* Wait for DMA stopping */
        ravb_stop_dma(ndev);
@@ -1311,7 +1314,8 @@ static void ravb_tx_timeout_work(struct work_struct *work)
        ravb_emac_init(ndev);
 
        /* Initialise PTP Clock driver */
-       ravb_ptp_init(ndev, priv->pdev);
+       if (priv->chip_id == RCAR_GEN2)
+               ravb_ptp_init(ndev, priv->pdev);
 
        netif_tx_start_all_queues(ndev);
 }
@@ -1814,10 +1818,6 @@ static int ravb_probe(struct platform_device *pdev)
                           CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC);
        }
 
-       /* Set CSEL value */
-       ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB,
-                  CCC);
-
        /* Set GTI value */
        error = ravb_set_gti(ndev);
        if (error)
index a4ab71d43e4e9d521e2a58f75aca4a4dff3f9c30..166a7fc87e2f41966ab2b54a641ea1c7bfc46f1d 100644 (file)
@@ -3531,12 +3531,14 @@ static void rocker_port_fdb_learn_work(struct work_struct *work)
        info.addr = lw->addr;
        info.vid = lw->vid;
 
+       rtnl_lock();
        if (learned && removing)
                call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
                                         lw->rocker_port->dev, &info.info);
        else if (learned && !removing)
                call_switchdev_notifiers(SWITCHDEV_FDB_ADD,
                                         lw->rocker_port->dev, &info.info);
+       rtnl_unlock();
 
        rocker_port_kfree(lw->trans, work);
 }
index 0e2fc1a844ab2b2453681750368e1cf9d288e779..db7db8ac4ca308a2f5f52e11f052bbd246c824a4 100644 (file)
@@ -2342,8 +2342,8 @@ static int smc_drv_probe(struct platform_device *pdev)
        }
 
        ndev->irq = platform_get_irq(pdev, 0);
-       if (ndev->irq <= 0) {
-               ret = -ENODEV;
+       if (ndev->irq < 0) {
+               ret = ndev->irq;
                goto out_release_io;
        }
        /*
index cc106d892e2975c924eafc8e850a4da8188ef227..23fa29877f5be79d39449284ecf8041d095d3712 100644 (file)
@@ -389,17 +389,27 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
        if (vio_version_after_eq(&port->vio, 1, 8)) {
                struct vio_net_dext *dext = vio_net_ext(desc);
 
+               skb_reset_network_header(skb);
+
                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
                        if (skb->protocol == ETH_P_IP) {
-                               struct iphdr *iph = (struct iphdr *)skb->data;
+                               struct iphdr *iph = ip_hdr(skb);
 
                                iph->check = 0;
                                ip_send_check(iph);
                        }
                }
                if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
-                   skb->ip_summed == CHECKSUM_NONE)
-                       vnet_fullcsum(skb);
+                   skb->ip_summed == CHECKSUM_NONE) {
+                       if (skb->protocol == htons(ETH_P_IP)) {
+                               struct iphdr *iph = ip_hdr(skb);
+                               int ihl = iph->ihl * 4;
+
+                               skb_reset_transport_header(skb);
+                               skb_set_transport_header(skb, ihl);
+                               vnet_fullcsum(skb);
+                       }
+               }
                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
                        skb->ip_summed = CHECKSUM_PARTIAL;
                        skb->csum_level = 0;
index 70814b7386b3119e55ac8be1a92cfdb8c0740c5c..fc8bbff2d7e37ec19d807008c1e9b70040551ea2 100644 (file)
@@ -1880,9 +1880,9 @@ static int dwceqos_open(struct net_device *ndev)
        }
        netdev_reset_queue(ndev);
 
+       dwceqos_init_hw(lp);
        napi_enable(&lp->napi);
        phy_start(lp->phy_dev);
-       dwceqos_init_hw(lp);
 
        netif_start_queue(ndev);
        tasklet_enable(&lp->tx_bdreclaim_tasklet);
index e9cc61e1ec742090157db6b792b1a6a9168e6635..c3e85acfdc70233a628f939477b208e3dd28121b 100644 (file)
@@ -63,8 +63,12 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv,
                mode = AM33XX_GMII_SEL_MODE_RGMII;
                break;
 
-       case PHY_INTERFACE_MODE_MII:
        default:
+               dev_warn(priv->dev,
+                        "Unsupported PHY mode: \"%s\". Defaulting to MII.\n",
+                       phy_modes(phy_mode));
+               /* fallthrough */
+       case PHY_INTERFACE_MODE_MII:
                mode = AM33XX_GMII_SEL_MODE_MII;
                break;
        };
@@ -106,8 +110,12 @@ static void cpsw_gmii_sel_dra7xx(struct cpsw_phy_sel_priv *priv,
                mode = AM33XX_GMII_SEL_MODE_RGMII;
                break;
 
-       case PHY_INTERFACE_MODE_MII:
        default:
+               dev_warn(priv->dev,
+                        "Unsupported PHY mode: \"%s\". Defaulting to MII.\n",
+                       phy_modes(phy_mode));
+               /* fallthrough */
+       case PHY_INTERFACE_MODE_MII:
                mode = AM33XX_GMII_SEL_MODE_MII;
                break;
        };
index 657b65bf5cac64254b4db7c845038d2e40549a8c..18bf3a8fdc505ad2ed8a0fa3b81dfeb7d833b94d 100644 (file)
@@ -82,7 +82,7 @@ struct cpdma_desc {
 
 struct cpdma_desc_pool {
        phys_addr_t             phys;
-       u32                     hw_addr;
+       dma_addr_t              hw_addr;
        void __iomem            *iomap;         /* ioremap map */
        void                    *cpumap;        /* dma_alloc map */
        int                     desc_size, mem_size;
@@ -152,7 +152,7 @@ struct cpdma_chan {
  * abstract out these details
  */
 static struct cpdma_desc_pool *
-cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
+cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
                                int size, int align)
 {
        int bitmap_size;
@@ -176,13 +176,13 @@ cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
 
        if (phys) {
                pool->phys  = phys;
-               pool->iomap = ioremap(phys, size);
+               pool->iomap = ioremap(phys, size); /* should be memremap? */
                pool->hw_addr = hw_addr;
        } else {
-               pool->cpumap = dma_alloc_coherent(dev, size, &pool->phys,
+               pool->cpumap = dma_alloc_coherent(dev, size, &pool->hw_addr,
                                                  GFP_KERNEL);
-               pool->iomap = pool->cpumap;
-               pool->hw_addr = pool->phys;
+               pool->iomap = (void __iomem __force *)pool->cpumap;
+               pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this value */
        }
 
        if (pool->iomap)
index c61d66d386346a2eda12b8fab2efaa8760b3fff3..029841f98c3216e4d9b30b1ab82448aba3ae004f 100644 (file)
@@ -117,21 +117,17 @@ static void get_pkt_info(dma_addr_t *buff, u32 *buff_len, dma_addr_t *ndesc,
        *ndesc = le32_to_cpu(desc->next_desc);
 }
 
-static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc)
+static u32 get_sw_data(int index, struct knav_dma_desc *desc)
 {
-       *pad0 = le32_to_cpu(desc->pad[0]);
-       *pad1 = le32_to_cpu(desc->pad[1]);
-       *pad2 = le32_to_cpu(desc->pad[2]);
+       /* No Endian conversion needed as this data is untouched by hw */
+       return desc->sw_data[index];
 }
 
-static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc)
-{
-       u64 pad64;
-
-       pad64 = le32_to_cpu(desc->pad[0]) +
-               ((u64)le32_to_cpu(desc->pad[1]) << 32);
-       *padptr = (void *)(uintptr_t)pad64;
-}
+/* use these macros to get sw data */
+#define GET_SW_DATA0(desc) get_sw_data(0, desc)
+#define GET_SW_DATA1(desc) get_sw_data(1, desc)
+#define GET_SW_DATA2(desc) get_sw_data(2, desc)
+#define GET_SW_DATA3(desc) get_sw_data(3, desc)
 
 static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len,
                             struct knav_dma_desc *desc)
@@ -163,13 +159,18 @@ static void set_desc_info(u32 desc_info, u32 pkt_info,
        desc->packet_info = cpu_to_le32(pkt_info);
 }
 
-static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc)
+static void set_sw_data(int index, u32 data, struct knav_dma_desc *desc)
 {
-       desc->pad[0] = cpu_to_le32(pad0);
-       desc->pad[1] = cpu_to_le32(pad1);
-       desc->pad[2] = cpu_to_le32(pad1);
+       /* No Endian conversion needed as this data is untouched by hw */
+       desc->sw_data[index] = data;
 }
 
+/* use these macros to set sw data */
+#define SET_SW_DATA0(data, desc) set_sw_data(0, data, desc)
+#define SET_SW_DATA1(data, desc) set_sw_data(1, data, desc)
+#define SET_SW_DATA2(data, desc) set_sw_data(2, data, desc)
+#define SET_SW_DATA3(data, desc) set_sw_data(3, data, desc)
+
 static void set_org_pkt_info(dma_addr_t buff, u32 buff_len,
                             struct knav_dma_desc *desc)
 {
@@ -581,7 +582,6 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp,
        dma_addr_t dma_desc, dma_buf;
        unsigned int buf_len, dma_sz = sizeof(*ndesc);
        void *buf_ptr;
-       u32 pad[2];
        u32 tmp;
 
        get_words(&dma_desc, 1, &desc->next_desc);
@@ -593,14 +593,20 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp,
                        break;
                }
                get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc);
-               get_pad_ptr(&buf_ptr, ndesc);
+               /* warning!!!! We are retrieving the virtual ptr in the sw_data
+                * field as a 32bit value. Will not work on 64bit machines
+                */
+               buf_ptr = (void *)GET_SW_DATA0(ndesc);
+               buf_len = (int)GET_SW_DATA1(desc);
                dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE);
                __free_page(buf_ptr);
                knav_pool_desc_put(netcp->rx_pool, desc);
        }
-
-       get_pad_info(&pad[0], &pad[1], &buf_len, desc);
-       buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+       /* warning!!!! We are retrieving the virtual ptr in the sw_data
+        * field as a 32bit value. Will not work on 64bit machines
+        */
+       buf_ptr = (void *)GET_SW_DATA0(desc);
+       buf_len = (int)GET_SW_DATA1(desc);
 
        if (buf_ptr)
                netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr);
@@ -639,7 +645,6 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
        dma_addr_t dma_desc, dma_buff;
        struct netcp_packet p_info;
        struct sk_buff *skb;
-       u32 pad[2];
        void *org_buf_ptr;
 
        dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
@@ -653,8 +658,11 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
        }
 
        get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc);
-       get_pad_info(&pad[0], &pad[1], &org_buf_len, desc);
-       org_buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+       /* warning!!!! We are retrieving the virtual ptr in the sw_data
+        * field as a 32bit value. Will not work on 64bit machines
+        */
+       org_buf_ptr = (void *)GET_SW_DATA0(desc);
+       org_buf_len = (int)GET_SW_DATA1(desc);
 
        if (unlikely(!org_buf_ptr)) {
                dev_err(netcp->ndev_dev, "NULL bufptr in desc\n");
@@ -679,7 +687,6 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
        /* Fill in the page fragment list */
        while (dma_desc) {
                struct page *page;
-               void *ptr;
 
                ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
                if (unlikely(!ndesc)) {
@@ -688,8 +695,10 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
                }
 
                get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc);
-               get_pad_ptr(&ptr, ndesc);
-               page = ptr;
+               /* warning!!!! We are retrieving the virtual ptr in the sw_data
+                * field as a 32bit value. Will not work on 64bit machines
+                */
+               page = (struct page *)GET_SW_DATA0(desc);
 
                if (likely(dma_buff && buf_len && page)) {
                        dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
@@ -777,7 +786,10 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq)
                }
 
                get_org_pkt_info(&dma, &buf_len, desc);
-               get_pad_ptr(&buf_ptr, desc);
+               /* warning!!!! We are retrieving the virtual ptr in the sw_data
+                * field as a 32bit value. Will not work on 64bit machines
+                */
+               buf_ptr = (void *)GET_SW_DATA0(desc);
 
                if (unlikely(!dma)) {
                        dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n");
@@ -829,7 +841,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
        struct page *page;
        dma_addr_t dma;
        void *bufptr;
-       u32 pad[3];
+       u32 sw_data[2];
 
        /* Allocate descriptor */
        hwdesc = knav_pool_desc_get(netcp->rx_pool);
@@ -846,7 +858,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
                                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
                bufptr = netdev_alloc_frag(primary_buf_len);
-               pad[2] = primary_buf_len;
+               sw_data[1] = primary_buf_len;
 
                if (unlikely(!bufptr)) {
                        dev_warn_ratelimited(netcp->ndev_dev,
@@ -858,9 +870,10 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
                if (unlikely(dma_mapping_error(netcp->dev, dma)))
                        goto fail;
 
-               pad[0] = lower_32_bits((uintptr_t)bufptr);
-               pad[1] = upper_32_bits((uintptr_t)bufptr);
-
+               /* warning!!!! We are saving the virtual ptr in the sw_data
+                * field as a 32bit value. Will not work on 64bit machines
+                */
+               sw_data[0] = (u32)bufptr;
        } else {
                /* Allocate a secondary receive queue entry */
                page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
@@ -870,9 +883,11 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
                }
                buf_len = PAGE_SIZE;
                dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE);
-               pad[0] = lower_32_bits(dma);
-               pad[1] = upper_32_bits(dma);
-               pad[2] = 0;
+               /* warning!!!! We are saving the virtual ptr in the sw_data
+                * field as a 32bit value. Will not work on 64bit machines
+                */
+               sw_data[0] = (u32)page;
+               sw_data[1] = 0;
        }
 
        desc_info =  KNAV_DMA_DESC_PS_INFO_IN_DESC;
@@ -882,7 +897,8 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
        pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) <<
                    KNAV_DMA_DESC_RETQ_SHIFT;
        set_org_pkt_info(dma, buf_len, hwdesc);
-       set_pad_info(pad[0], pad[1], pad[2], hwdesc);
+       SET_SW_DATA0(sw_data[0], hwdesc);
+       SET_SW_DATA1(sw_data[1], hwdesc);
        set_desc_info(desc_info, pkt_info, hwdesc);
 
        /* Push to FDQs */
@@ -971,7 +987,6 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
                                          unsigned int budget)
 {
        struct knav_dma_desc *desc;
-       void *ptr;
        struct sk_buff *skb;
        unsigned int dma_sz;
        dma_addr_t dma;
@@ -988,8 +1003,10 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
                        continue;
                }
 
-               get_pad_ptr(&ptr, desc);
-               skb = ptr;
+               /* warning!!!! We are retrieving the virtual ptr in the sw_data
+                * field as a 32bit value. Will not work on 64bit machines
+                */
+               skb = (struct sk_buff *)GET_SW_DATA0(desc);
                netcp_free_tx_desc_chain(netcp, desc, dma_sz);
                if (!skb) {
                        dev_err(netcp->ndev_dev, "No skb in Tx desc\n");
@@ -1194,10 +1211,10 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
        }
 
        set_words(&tmp, 1, &desc->packet_info);
-       tmp = lower_32_bits((uintptr_t)&skb);
-       set_words(&tmp, 1, &desc->pad[0]);
-       tmp = upper_32_bits((uintptr_t)&skb);
-       set_words(&tmp, 1, &desc->pad[1]);
+       /* warning!!!! We are saving the virtual ptr in the sw_data
+        * field as a 32bit value. Will not work on 64bit machines
+        */
+       SET_SW_DATA0((u32)skb, desc);
 
        if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) {
                tmp = tx_pipe->switch_to_port;
index 7f975a2c8990fcff4491b4171cae626490d45807..b0de8ecd7fe8ed8ab030e22da50aef82e283e4b2 100644 (file)
@@ -533,8 +533,8 @@ static int dfx_register(struct device *bdev)
        const char *print_name = dev_name(bdev);
        struct net_device *dev;
        DFX_board_t       *bp;                  /* board pointer */
-       resource_size_t bar_start[3];           /* pointers to ports */
-       resource_size_t bar_len[3];             /* resource length */
+       resource_size_t bar_start[3] = {0};     /* pointers to ports */
+       resource_size_t bar_len[3] = {0};       /* resource length */
        int alloc_size;                         /* total buffer size used */
        struct resource *region;
        int err = 0;
@@ -3697,8 +3697,8 @@ static void dfx_unregister(struct device *bdev)
        int dfx_bus_pci = dev_is_pci(bdev);
        int dfx_bus_tc = DFX_BUS_TC(bdev);
        int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
-       resource_size_t bar_start[3];           /* pointers to ports */
-       resource_size_t bar_len[3];             /* resource lengths */
+       resource_size_t bar_start[3] = {0};     /* pointers to ports */
+       resource_size_t bar_len[3] = {0};       /* resource lengths */
        int             alloc_size;             /* total buffer size used */
 
        unregister_netdev(dev);
index 7456569f53c1a312d5590dc88072505413126889..0bf7edd99573d336f179031d358f0684cf454cc7 100644 (file)
@@ -980,9 +980,9 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
                        opts = ip_tunnel_info_opts(info);
 
                if (key->tun_flags & TUNNEL_CSUM)
-                       flags |= GENEVE_F_UDP_CSUM;
+                       flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX;
                else
-                       flags &= ~GENEVE_F_UDP_CSUM;
+                       flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
 
                err = geneve6_build_skb(dst, skb, key->tun_flags, vni,
                                        info->options_len, opts,
@@ -1039,6 +1039,34 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
        return geneve_xmit_skb(skb, dev, info);
 }
 
+static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
+{
+       /* The max_mtu calculation does not take account of GENEVE
+        * options, to avoid excluding potentially valid
+        * configurations.
+        */
+       int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
+               - dev->hard_header_len;
+
+       if (new_mtu < 68)
+               return -EINVAL;
+
+       if (new_mtu > max_mtu) {
+               if (strict)
+                       return -EINVAL;
+
+               new_mtu = max_mtu;
+       }
+
+       dev->mtu = new_mtu;
+       return 0;
+}
+
+static int geneve_change_mtu(struct net_device *dev, int new_mtu)
+{
+       return __geneve_change_mtu(dev, new_mtu, true);
+}
+
 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
        struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -1083,7 +1111,7 @@ static const struct net_device_ops geneve_netdev_ops = {
        .ndo_stop               = geneve_stop,
        .ndo_start_xmit         = geneve_xmit,
        .ndo_get_stats64        = ip_tunnel_get_stats64,
-       .ndo_change_mtu         = eth_change_mtu,
+       .ndo_change_mtu         = geneve_change_mtu,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_fill_metadata_dst  = geneve_fill_metadata_dst,
@@ -1150,6 +1178,7 @@ static void geneve_setup(struct net_device *dev)
        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 
        netif_keep_dst(dev);
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
        eth_hw_addr_random(dev);
 }
@@ -1441,12 +1470,23 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
                return dev;
 
        err = geneve_configure(net, dev, &geneve_remote_unspec,
-                              0, 0, 0, htons(dst_port), true, 0);
-       if (err) {
-               free_netdev(dev);
-               return ERR_PTR(err);
-       }
+                              0, 0, 0, htons(dst_port), true,
+                              GENEVE_F_UDP_ZERO_CSUM6_RX);
+       if (err)
+               goto err;
+
+       /* openvswitch users expect packet sizes to be unrestricted,
+        * so set the largest MTU we can.
+        */
+       err = __geneve_change_mtu(dev, IP_MAX_MTU, false);
+       if (err)
+               goto err;
+
        return dev;
+
+ err:
+       free_netdev(dev);
+       return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
 
index f4130af09244fb1c6b24f230c7d79cbdaeb43a22..fcb92c0d0eb967e0deba99282152a25c804b73ea 100644 (file)
@@ -624,6 +624,7 @@ struct nvsp_message {
 #define RNDIS_PKT_ALIGN_DEFAULT 8
 
 struct multi_send_data {
+       struct sk_buff *skb; /* skb containing the pkt */
        struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
        u32 count; /* counter of batched packets */
 };
index 059fc523160107239d72080cadb1f2e7c0d246df..ec313fc08d82a3b6a3d8d79fd7a6527caa30733a 100644 (file)
@@ -841,6 +841,18 @@ static inline int netvsc_send_pkt(
        return ret;
 }
 
+/* Move packet out of multi send data (msd), and clear msd */
+static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
+                               struct sk_buff **msd_skb,
+                               struct multi_send_data *msdp)
+{
+       *msd_skb = msdp->skb;
+       *msd_send = msdp->pkt;
+       msdp->skb = NULL;
+       msdp->pkt = NULL;
+       msdp->count = 0;
+}
+
 int netvsc_send(struct hv_device *device,
                struct hv_netvsc_packet *packet,
                struct rndis_message *rndis_msg,
@@ -855,6 +867,7 @@ int netvsc_send(struct hv_device *device,
        unsigned int section_index = NETVSC_INVALID_INDEX;
        struct multi_send_data *msdp;
        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+       struct sk_buff *msd_skb = NULL;
        bool try_batch;
        bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
@@ -897,10 +910,8 @@ int netvsc_send(struct hv_device *device,
                   net_device->send_section_size) {
                section_index = netvsc_get_next_send_section(net_device);
                if (section_index != NETVSC_INVALID_INDEX) {
-                               msd_send = msdp->pkt;
-                               msdp->pkt = NULL;
-                               msdp->count = 0;
-                               msd_len = 0;
+                       move_pkt_msd(&msd_send, &msd_skb, msdp);
+                       msd_len = 0;
                }
        }
 
@@ -919,31 +930,31 @@ int netvsc_send(struct hv_device *device,
                        packet->total_data_buflen += msd_len;
                }
 
-               if (msdp->pkt)
-                       dev_kfree_skb_any(skb);
+               if (msdp->skb)
+                       dev_kfree_skb_any(msdp->skb);
 
                if (xmit_more && !packet->cp_partial) {
+                       msdp->skb = skb;
                        msdp->pkt = packet;
                        msdp->count++;
                } else {
                        cur_send = packet;
+                       msdp->skb = NULL;
                        msdp->pkt = NULL;
                        msdp->count = 0;
                }
        } else {
-               msd_send = msdp->pkt;
-               msdp->pkt = NULL;
-               msdp->count = 0;
+               move_pkt_msd(&msd_send, &msd_skb, msdp);
                cur_send = packet;
        }
 
        if (msd_send) {
-               m_ret = netvsc_send_pkt(msd_send, net_device, pb, skb);
+               m_ret = netvsc_send_pkt(msd_send, net_device, NULL, msd_skb);
 
                if (m_ret != 0) {
                        netvsc_free_send_slot(net_device,
                                              msd_send->send_buf_index);
-                       dev_kfree_skb_any(skb);
+                       dev_kfree_skb_any(msd_skb);
                }
        }
 
index 1c8db9afdcda8c0356db46db9fcfa4d971050b7b..98e34fee45c76d1df80c9c52770e9902f47720ed 100644 (file)
@@ -196,65 +196,6 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
        return ppi;
 }
 
-union sub_key {
-       u64 k;
-       struct {
-               u8 pad[3];
-               u8 kb;
-               u32 ka;
-       };
-};
-
-/* Toeplitz hash function
- * data: network byte order
- * return: host byte order
- */
-static u32 comp_hash(u8 *key, int klen, void *data, int dlen)
-{
-       union sub_key subk;
-       int k_next = 4;
-       u8 dt;
-       int i, j;
-       u32 ret = 0;
-
-       subk.k = 0;
-       subk.ka = ntohl(*(u32 *)key);
-
-       for (i = 0; i < dlen; i++) {
-               subk.kb = key[k_next];
-               k_next = (k_next + 1) % klen;
-               dt = ((u8 *)data)[i];
-               for (j = 0; j < 8; j++) {
-                       if (dt & 0x80)
-                               ret ^= subk.ka;
-                       dt <<= 1;
-                       subk.k <<= 1;
-               }
-       }
-
-       return ret;
-}
-
-static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
-{
-       struct flow_keys flow;
-       int data_len;
-
-       if (!skb_flow_dissect_flow_keys(skb, &flow, 0) ||
-           !(flow.basic.n_proto == htons(ETH_P_IP) ||
-             flow.basic.n_proto == htons(ETH_P_IPV6)))
-               return false;
-
-       if (flow.basic.ip_proto == IPPROTO_TCP)
-               data_len = 12;
-       else
-               data_len = 8;
-
-       *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len);
-
-       return true;
-}
-
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                        void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -267,11 +208,9 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
        if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
                return 0;
 
-       if (netvsc_set_hash(&hash, skb)) {
-               q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
-                       ndev->real_num_tx_queues;
-               skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
-       }
+       hash = skb_get_hash(skb);
+       q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+               ndev->real_num_tx_queues;
 
        if (!nvsc_dev->chn_table[q_idx])
                q_idx = 0;
@@ -1150,6 +1089,9 @@ static int netvsc_probe(struct hv_device *dev,
        net->ethtool_ops = &ethtool_ops;
        SET_NETDEV_DEV(net, &dev->device);
 
+       /* We always need headroom for rndis header */
+       net->needed_headroom = RNDIS_AND_PPI_SIZE;
+
        /* Notify the netvsc driver of the new device */
        memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
index 29cbde8501ed8e09750f771204dfe9fc38503d34..d47cf14bb4a5b43bf5c94e0bdc533e1033f79b2a 100644 (file)
@@ -82,9 +82,6 @@ struct bfin_sir_self {
 
 #define DRIVER_NAME "bfin_sir"
 
-#define port_membase(port)     (((struct bfin_sir_port *)(port))->membase)
-#define get_lsr_cache(port)    (((struct bfin_sir_port *)(port))->lsr)
-#define put_lsr_cache(port, v) (((struct bfin_sir_port *)(port))->lsr = (v))
 #include <asm/bfin_serial.h>
 
 static const unsigned short per[][4] = {
index 6a57a005e0ca8162d2a0b9334440d1b9cb7d75db..94e688805dd262317327a446d8e2e460a668d75a 100644 (file)
@@ -1323,6 +1323,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 
        list_add_tail_rcu(&vlan->list, &port->vlans);
        netif_stacked_transfer_operstate(lowerdev, dev);
+       linkwatch_fire_event(dev);
 
        return 0;
 
@@ -1522,6 +1523,7 @@ static int macvlan_device_event(struct notifier_block *unused,
        port = macvlan_port_get_rtnl(dev);
 
        switch (event) {
+       case NETDEV_UP:
        case NETDEV_CHANGE:
                list_for_each_entry(vlan, &port->vlans, list)
                        netif_stacked_transfer_operstate(vlan->lowerdev,
index 60994a83a0d68ca2e4c229fe16140b6017a9dd55..f0a77020037af0790f37b4098e4f6937c6a1ebee 100644 (file)
@@ -186,6 +186,7 @@ config MDIO_GPIO
 config MDIO_OCTEON
        tristate "Support for MDIO buses on Octeon and ThunderX SOCs"
        depends on 64BIT
+       depends on HAS_IOMEM
        help
 
          This module provides a driver for the Octeon and ThunderX MDIO
index bf241a3ec5e56ceb9d9f85aed73bab90f1559781..db507e3bcab92d65143c170c1338af5ff60b11c5 100644 (file)
@@ -250,10 +250,6 @@ static int bcm7xxx_config_init(struct phy_device *phydev)
        phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO);
        phy_read(phydev, MII_BCM7XXX_AUX_MODE);
 
-       /* Workaround only required for 100Mbits/sec capable PHYs */
-       if (phydev->supported & PHY_GBIT_FEATURES)
-               return 0;
-
        /* set shadow mode 2 */
        ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
                        MII_BCM7XXX_SHD_MODE_2, MII_BCM7XXX_SHD_MODE_2);
@@ -270,7 +266,7 @@ static int bcm7xxx_config_init(struct phy_device *phydev)
        phy_write(phydev, MII_BCM7XXX_100TX_FALSE_CAR, 0x7555);
 
        /* reset shadow mode 2 */
-       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, MII_BCM7XXX_SHD_MODE_2, 0);
+       ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, MII_BCM7XXX_SHD_MODE_2);
        if (ret < 0)
                return ret;
 
@@ -307,11 +303,6 @@ static int bcm7xxx_suspend(struct phy_device *phydev)
        return 0;
 }
 
-static int bcm7xxx_dummy_config_init(struct phy_device *phydev)
-{
-       return 0;
-}
-
 #define BCM7XXX_28NM_GPHY(_oui, _name)                                 \
 {                                                                      \
        .phy_id         = (_oui),                                       \
@@ -337,7 +328,7 @@ static struct phy_driver bcm7xxx_driver[] = {
        .phy_id         = PHY_ID_BCM7425,
        .phy_id_mask    = 0xfffffff0,
        .name           = "Broadcom BCM7425",
-       .features       = PHY_GBIT_FEATURES |
+       .features       = PHY_BASIC_FEATURES |
                          SUPPORTED_Pause | SUPPORTED_Asym_Pause,
        .flags          = PHY_IS_INTERNAL,
        .config_init    = bcm7xxx_config_init,
@@ -349,7 +340,7 @@ static struct phy_driver bcm7xxx_driver[] = {
        .phy_id         = PHY_ID_BCM7429,
        .phy_id_mask    = 0xfffffff0,
        .name           = "Broadcom BCM7429",
-       .features       = PHY_GBIT_FEATURES |
+       .features       = PHY_BASIC_FEATURES |
                          SUPPORTED_Pause | SUPPORTED_Asym_Pause,
        .flags          = PHY_IS_INTERNAL,
        .config_init    = bcm7xxx_config_init,
@@ -361,7 +352,7 @@ static struct phy_driver bcm7xxx_driver[] = {
        .phy_id         = PHY_ID_BCM7435,
        .phy_id_mask    = 0xfffffff0,
        .name           = "Broadcom BCM7435",
-       .features       = PHY_GBIT_FEATURES |
+       .features       = PHY_BASIC_FEATURES |
                          SUPPORTED_Pause | SUPPORTED_Asym_Pause,
        .flags          = PHY_IS_INTERNAL,
        .config_init    = bcm7xxx_config_init,
@@ -369,30 +360,6 @@ static struct phy_driver bcm7xxx_driver[] = {
        .read_status    = genphy_read_status,
        .suspend        = bcm7xxx_suspend,
        .resume         = bcm7xxx_config_init,
-}, {
-       .phy_id         = PHY_BCM_OUI_4,
-       .phy_id_mask    = 0xffff0000,
-       .name           = "Broadcom BCM7XXX 40nm",
-       .features       = PHY_GBIT_FEATURES |
-                         SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-       .flags          = PHY_IS_INTERNAL,
-       .config_init    = bcm7xxx_config_init,
-       .config_aneg    = genphy_config_aneg,
-       .read_status    = genphy_read_status,
-       .suspend        = bcm7xxx_suspend,
-       .resume         = bcm7xxx_config_init,
-}, {
-       .phy_id         = PHY_BCM_OUI_5,
-       .phy_id_mask    = 0xffffff00,
-       .name           = "Broadcom BCM7XXX 65nm",
-       .features       = PHY_BASIC_FEATURES |
-                         SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-       .flags          = PHY_IS_INTERNAL,
-       .config_init    = bcm7xxx_dummy_config_init,
-       .config_aneg    = genphy_config_aneg,
-       .read_status    = genphy_read_status,
-       .suspend        = bcm7xxx_suspend,
-       .resume         = bcm7xxx_config_init,
 } };
 
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
@@ -404,8 +371,6 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
        { PHY_ID_BCM7439, 0xfffffff0, },
        { PHY_ID_BCM7435, 0xfffffff0, },
        { PHY_ID_BCM7445, 0xfffffff0, },
-       { PHY_BCM_OUI_4, 0xffff0000 },
-       { PHY_BCM_OUI_5, 0xffffff00 },
        { }
 };
 
index 180f6995277944923b2aa57bacdf0bc17b9637ad..7a240fce3a7ea09edc9f10c6b93fadd94b2de3f5 100644 (file)
@@ -846,6 +846,11 @@ static void decode_rxts(struct dp83640_private *dp83640,
        struct skb_shared_hwtstamps *shhwtstamps = NULL;
        struct sk_buff *skb;
        unsigned long flags;
+       u8 overflow;
+
+       overflow = (phy_rxts->ns_hi >> 14) & 0x3;
+       if (overflow)
+               pr_debug("rx timestamp queue overflow, count %d\n", overflow);
 
        spin_lock_irqsave(&dp83640->rx_lock, flags);
 
@@ -888,6 +893,7 @@ static void decode_txts(struct dp83640_private *dp83640,
        struct skb_shared_hwtstamps shhwtstamps;
        struct sk_buff *skb;
        u64 ns;
+       u8 overflow;
 
        /* We must already have the skb that triggered this. */
 
@@ -897,6 +903,17 @@ static void decode_txts(struct dp83640_private *dp83640,
                pr_debug("have timestamp but tx_queue empty\n");
                return;
        }
+
+       overflow = (phy_txts->ns_hi >> 14) & 0x3;
+       if (overflow) {
+               pr_debug("tx timestamp queue overflow, count %d\n", overflow);
+               while (skb) {
+                       skb_complete_tx_timestamp(skb, NULL);
+                       skb = skb_dequeue(&dp83640->tx_queue);
+               }
+               return;
+       }
+
        ns = phy2txts(phy_txts);
        memset(&shhwtstamps, 0, sizeof(shhwtstamps));
        shhwtstamps.hwtstamp = ns_to_ktime(ns);
index e3eb96443c97ad0bc6e03ad0784a119d35b89ee7..ab1d0fcaf1d9cb704882bb4c4bce65fd579252d1 100644 (file)
@@ -446,6 +446,12 @@ static int m88e1510_config_aneg(struct phy_device *phydev)
        if (err < 0)
                return err;
 
+       return 0;
+}
+
+static int marvell_config_init(struct phy_device *phydev)
+{
+       /* Set registers from marvell,reg-init DT property */
        return marvell_of_reg_init(phydev);
 }
 
@@ -495,7 +501,7 @@ static int m88e1116r_config_init(struct phy_device *phydev)
 
        mdelay(500);
 
-       return 0;
+       return marvell_config_init(phydev);
 }
 
 static int m88e3016_config_init(struct phy_device *phydev)
@@ -514,7 +520,7 @@ static int m88e3016_config_init(struct phy_device *phydev)
        if (reg < 0)
                return reg;
 
-       return 0;
+       return marvell_config_init(phydev);
 }
 
 static int m88e1111_config_init(struct phy_device *phydev)
@@ -1078,6 +1084,7 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .probe = marvell_probe,
                .flags = PHY_HAS_INTERRUPT,
+               .config_init = &marvell_config_init,
                .config_aneg = &marvell_config_aneg,
                .read_status = &genphy_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
@@ -1149,6 +1156,7 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
+               .config_init = &marvell_config_init,
                .config_aneg = &m88e1121_config_aneg,
                .read_status = &marvell_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
@@ -1167,6 +1175,7 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
+               .config_init = &marvell_config_init,
                .config_aneg = &m88e1318_config_aneg,
                .read_status = &marvell_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
@@ -1259,6 +1268,7 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
+               .config_init = &marvell_config_init,
                .config_aneg = &m88e1510_config_aneg,
                .read_status = &marvell_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
@@ -1277,6 +1287,7 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
+               .config_init = &marvell_config_init,
                .config_aneg = &m88e1510_config_aneg,
                .read_status = &marvell_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
index 8763bb20988a0edf80091347987fb38086e85496..5590b9c182c967d80a186256162f30690c707506 100644 (file)
@@ -692,25 +692,29 @@ void phy_change(struct work_struct *work)
        struct phy_device *phydev =
                container_of(work, struct phy_device, phy_queue);
 
-       if (phydev->drv->did_interrupt &&
-           !phydev->drv->did_interrupt(phydev))
-               goto ignore;
+       if (phy_interrupt_is_valid(phydev)) {
+               if (phydev->drv->did_interrupt &&
+                   !phydev->drv->did_interrupt(phydev))
+                       goto ignore;
 
-       if (phy_disable_interrupts(phydev))
-               goto phy_err;
+               if (phy_disable_interrupts(phydev))
+                       goto phy_err;
+       }
 
        mutex_lock(&phydev->lock);
        if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
                phydev->state = PHY_CHANGELINK;
        mutex_unlock(&phydev->lock);
 
-       atomic_dec(&phydev->irq_disable);
-       enable_irq(phydev->irq);
+       if (phy_interrupt_is_valid(phydev)) {
+               atomic_dec(&phydev->irq_disable);
+               enable_irq(phydev->irq);
 
-       /* Reenable interrupts */
-       if (PHY_HALTED != phydev->state &&
-           phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
-               goto irq_enable_err;
+               /* Reenable interrupts */
+               if (PHY_HALTED != phydev->state &&
+                   phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
+                       goto irq_enable_err;
+       }
 
        /* reschedule state queue work to run as soon as possible */
        cancel_delayed_work_sync(&phydev->state_queue);
@@ -905,10 +909,10 @@ void phy_state_machine(struct work_struct *work)
                phydev->adjust_link(phydev->attached_dev);
                break;
        case PHY_RUNNING:
-               /* Only register a CHANGE if we are polling or ignoring
-                * interrupts and link changed since latest checking.
+               /* Only register a CHANGE if we are polling and link changed
+                * since latest checking.
                 */
-               if (!phy_interrupt_is_valid(phydev)) {
+               if (phydev->irq == PHY_POLL) {
                        old_link = phydev->link;
                        err = phy_read_status(phydev);
                        if (err)
@@ -1000,15 +1004,21 @@ void phy_state_machine(struct work_struct *work)
                   phy_state_to_str(old_state),
                   phy_state_to_str(phydev->state));
 
-       queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
-                          PHY_STATE_TIME * HZ);
+       /* Only re-schedule a PHY state machine change if we are polling the
+        * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
+        * between states from phy_mac_interrupt()
+        */
+       if (phydev->irq == PHY_POLL)
+               queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+                                  PHY_STATE_TIME * HZ);
 }
 
 void phy_mac_interrupt(struct phy_device *phydev, int new_link)
 {
-       cancel_work_sync(&phydev->phy_queue);
        phydev->link = new_link;
-       schedule_work(&phydev->phy_queue);
+
+       /* Trigger a state machine change */
+       queue_work(system_power_efficient_wq, &phydev->phy_queue);
 }
 EXPORT_SYMBOL(phy_mac_interrupt);
 
index bad3f005faee433608f1e059cf6f07f8cbae6093..e551f3a89cfd0212e381ec274c59f558eb07d2b0 100644 (file)
@@ -1410,7 +1410,7 @@ int genphy_config_init(struct phy_device *phydev)
 
        features = (SUPPORTED_TP | SUPPORTED_MII
                        | SUPPORTED_AUI | SUPPORTED_FIBRE |
-                       SUPPORTED_BNC);
+                       SUPPORTED_BNC | SUPPORTED_Pause | SUPPORTED_Asym_Pause);
 
        /* Do we support autonegotiation? */
        val = phy_read(phydev, MII_BMSR);
index e485f2653c8208578d7994284dd12faabcc6c096..2e21e9366f76a184aa2c8d770557e6d9ff0db235 100644 (file)
 #include <linux/netdevice.h>
 #include <linux/smscphy.h>
 
+struct smsc_phy_priv {
+       bool energy_enable;
+};
+
 static int smsc_phy_config_intr(struct phy_device *phydev)
 {
        int rc = phy_write (phydev, MII_LAN83C185_IM,
@@ -43,19 +47,14 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
 
 static int smsc_phy_config_init(struct phy_device *phydev)
 {
-       int __maybe_unused len;
-       struct device *dev __maybe_unused = &phydev->mdio.dev;
-       struct device_node *of_node __maybe_unused = dev->of_node;
+       struct smsc_phy_priv *priv = phydev->priv;
+
        int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
-       int enable_energy = 1;
 
        if (rc < 0)
                return rc;
 
-       if (of_find_property(of_node, "smsc,disable-energy-detect", &len))
-               enable_energy = 0;
-
-       if (enable_energy) {
+       if (priv->energy_enable) {
                /* Enable energy detect mode for this SMSC Transceivers */
                rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
                               rc | MII_LAN83C185_EDPWRDOWN);
@@ -110,10 +109,13 @@ static int lan911x_config_init(struct phy_device *phydev)
  */
 static int lan87xx_read_status(struct phy_device *phydev)
 {
+       struct smsc_phy_priv *priv = phydev->priv;
+
        int err = genphy_read_status(phydev);
-       int i;
 
-       if (!phydev->link) {
+       if (!phydev->link && priv->energy_enable) {
+               int i;
+
                /* Disable EDPD to wake up PHY */
                int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
                if (rc < 0)
@@ -149,6 +151,26 @@ static int lan87xx_read_status(struct phy_device *phydev)
        return err;
 }
 
+static int smsc_phy_probe(struct phy_device *phydev)
+{
+       struct device *dev = &phydev->mdio.dev;
+       struct device_node *of_node = dev->of_node;
+       struct smsc_phy_priv *priv;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->energy_enable = true;
+
+       if (of_property_read_bool(of_node, "smsc,disable-energy-detect"))
+               priv->energy_enable = false;
+
+       phydev->priv = priv;
+
+       return 0;
+}
+
 static struct phy_driver smsc_phy_driver[] = {
 {
        .phy_id         = 0x0007c0a0, /* OUI=0x00800f, Model#=0x0a */
@@ -159,6 +181,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -180,6 +204,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -201,6 +227,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
@@ -222,6 +250,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -242,6 +272,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
@@ -263,6 +295,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
index f3c63022eb3c582b46841e381e67af478c03f629..4ddae8118c8566e4de07a16b136af13597fe4f3e 100644 (file)
@@ -395,6 +395,8 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 
                if (!__pppoe_xmit(sk_pppox(relay_po), skb))
                        goto abort_put;
+
+               sock_put(sk_pppox(relay_po));
        } else {
                if (sock_queue_rcv_skb(sk, skb))
                        goto abort_kfree;
index 90868ca5e3412ffebd28630f571bdddfe389cc35..ae0905ed4a32b55183568e95512c6727458a9f96 100644 (file)
@@ -129,24 +129,27 @@ static int lookup_chan_dst(u16 call_id, __be32 d_addr)
        return i < MAX_CALLID;
 }
 
-static int add_chan(struct pppox_sock *sock)
+static int add_chan(struct pppox_sock *sock,
+                   struct pptp_addr *sa)
 {
        static int call_id;
 
        spin_lock(&chan_lock);
-       if (!sock->proto.pptp.src_addr.call_id) {
+       if (!sa->call_id)       {
                call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1);
                if (call_id == MAX_CALLID) {
                        call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1);
                        if (call_id == MAX_CALLID)
                                goto out_err;
                }
-               sock->proto.pptp.src_addr.call_id = call_id;
-       } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap))
+               sa->call_id = call_id;
+       } else if (test_bit(sa->call_id, callid_bitmap)) {
                goto out_err;
+       }
 
-       set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
-       rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock);
+       sock->proto.pptp.src_addr = *sa;
+       set_bit(sa->call_id, callid_bitmap);
+       rcu_assign_pointer(callid_sock[sa->call_id], sock);
        spin_unlock(&chan_lock);
 
        return 0;
@@ -416,7 +419,6 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
        struct sock *sk = sock->sk;
        struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
        struct pppox_sock *po = pppox_sk(sk);
-       struct pptp_opt *opt = &po->proto.pptp;
        int error = 0;
 
        if (sockaddr_len < sizeof(struct sockaddr_pppox))
@@ -424,10 +426,22 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
 
        lock_sock(sk);
 
-       opt->src_addr = sp->sa_addr.pptp;
-       if (add_chan(po))
+       if (sk->sk_state & PPPOX_DEAD) {
+               error = -EALREADY;
+               goto out;
+       }
+
+       if (sk->sk_state & PPPOX_BOUND) {
                error = -EBUSY;
+               goto out;
+       }
+
+       if (add_chan(po, &sp->sa_addr.pptp))
+               error = -EBUSY;
+       else
+               sk->sk_state |= PPPOX_BOUND;
 
+out:
        release_sock(sk);
        return error;
 }
@@ -498,7 +512,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
        }
 
        opt->dst_addr = sp->sa_addr.pptp;
-       sk->sk_state = PPPOX_CONNECTED;
+       sk->sk_state |= PPPOX_CONNECTED;
 
  end:
        release_sock(sk);
index 7f83504dfa69bba2c8db612d1d25196fa5b72d91..cdde59089f729fc3578e4aea56796df237c0cee2 100644 (file)
@@ -395,6 +395,10 @@ config USB_NET_RNDIS_HOST
          The protocol specification is incomplete, and is controlled by
          (and for) Microsoft; it isn't an "Open" ecosystem or market.
 
+config USB_NET_CDC_SUBSET_ENABLE
+       tristate
+       depends on USB_NET_CDC_SUBSET
+
 config USB_NET_CDC_SUBSET
        tristate "Simple USB Network Links (CDC Ethernet subset)"
        depends on USB_USBNET
@@ -413,6 +417,7 @@ config USB_NET_CDC_SUBSET
 config USB_ALI_M5632
        bool "ALi M5632 based 'USB 2.0 Data Link' cables"
        depends on USB_NET_CDC_SUBSET
+       select USB_NET_CDC_SUBSET_ENABLE
        help
          Choose this option if you're using a host-to-host cable
          based on this design, which supports USB 2.0 high speed.
@@ -420,6 +425,7 @@ config USB_ALI_M5632
 config USB_AN2720
        bool "AnchorChips 2720 based cables (Xircom PGUNET, ...)"
        depends on USB_NET_CDC_SUBSET
+       select USB_NET_CDC_SUBSET_ENABLE
        help
          Choose this option if you're using a host-to-host cable
          based on this design.  Note that AnchorChips is now a
@@ -428,6 +434,7 @@ config USB_AN2720
 config USB_BELKIN
        bool "eTEK based host-to-host cables (Advance, Belkin, ...)"
        depends on USB_NET_CDC_SUBSET
+       select USB_NET_CDC_SUBSET_ENABLE
        default y
        help
          Choose this option if you're using a host-to-host cable
@@ -437,6 +444,7 @@ config USB_BELKIN
 config USB_ARMLINUX
        bool "Embedded ARM Linux links (iPaq, ...)"
        depends on USB_NET_CDC_SUBSET
+       select USB_NET_CDC_SUBSET_ENABLE
        default y
        help
          Choose this option to support the "usb-eth" networking driver
@@ -454,6 +462,7 @@ config USB_ARMLINUX
 config USB_EPSON2888
        bool "Epson 2888 based firmware (DEVELOPMENT)"
        depends on USB_NET_CDC_SUBSET
+       select USB_NET_CDC_SUBSET_ENABLE
        help
          Choose this option to support the usb networking links used
          by some sample firmware from Epson.
@@ -461,6 +470,7 @@ config USB_EPSON2888
 config USB_KC2190
        bool "KT Technology KC2190 based cables (InstaNet)"
        depends on USB_NET_CDC_SUBSET
+       select USB_NET_CDC_SUBSET_ENABLE
        help
          Choose this option if you're using a host-to-host cable
          with one of these chips.
index b5f04068dbe4859fb9581304c8bd893addaaeb1d..37fb46aee341a77bf734ea55999fd70c9b8eac3a 100644 (file)
@@ -23,7 +23,7 @@ obj-$(CONFIG_USB_NET_GL620A)  += gl620a.o
 obj-$(CONFIG_USB_NET_NET1080)  += net1080.o
 obj-$(CONFIG_USB_NET_PLUSB)    += plusb.o
 obj-$(CONFIG_USB_NET_RNDIS_HOST)       += rndis_host.o
-obj-$(CONFIG_USB_NET_CDC_SUBSET)       += cdc_subset.o
+obj-$(CONFIG_USB_NET_CDC_SUBSET_ENABLE)        += cdc_subset.o
 obj-$(CONFIG_USB_NET_ZAURUS)   += zaurus.o
 obj-$(CONFIG_USB_NET_MCS7830)  += mcs7830.o
 obj-$(CONFIG_USB_USBNET)       += usbnet.o
index 2ed53331bfb28de22019754cb46a8a27c35c43ad..1c299b8a162d7243c5ae94b96618c3763083e91e 100644 (file)
@@ -36,7 +36,7 @@
 #define DRIVER_AUTHOR  "WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC    "LAN78XX USB 3.0 Gigabit Ethernet Devices"
 #define DRIVER_NAME    "lan78xx"
-#define DRIVER_VERSION "1.0.1"
+#define DRIVER_VERSION "1.0.2"
 
 #define TX_TIMEOUT_JIFFIES             (5 * HZ)
 #define THROTTLE_JIFFIES               (HZ / 8)
@@ -462,32 +462,53 @@ static int lan78xx_read_raw_eeprom(struct lan78xx_net *dev, u32 offset,
                                   u32 length, u8 *data)
 {
        u32 val;
+       u32 saved;
        int i, ret;
+       int retval;
 
-       ret = lan78xx_eeprom_confirm_not_busy(dev);
-       if (ret)
-               return ret;
+       /* depends on chip, some EEPROM pins are muxed with LED function.
+        * disable & restore LED function to access EEPROM.
+        */
+       ret = lan78xx_read_reg(dev, HW_CFG, &val);
+       saved = val;
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+               val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+               ret = lan78xx_write_reg(dev, HW_CFG, val);
+       }
+
+       retval = lan78xx_eeprom_confirm_not_busy(dev);
+       if (retval)
+               return retval;
 
        for (i = 0; i < length; i++) {
                val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_READ_;
                val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
                ret = lan78xx_write_reg(dev, E2P_CMD, val);
-               if (unlikely(ret < 0))
-                       return -EIO;
+               if (unlikely(ret < 0)) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
-               ret = lan78xx_wait_eeprom(dev);
-               if (ret < 0)
-                       return ret;
+               retval = lan78xx_wait_eeprom(dev);
+               if (retval < 0)
+                       goto exit;
 
                ret = lan78xx_read_reg(dev, E2P_DATA, &val);
-               if (unlikely(ret < 0))
-                       return -EIO;
+               if (unlikely(ret < 0)) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
                data[i] = val & 0xFF;
                offset++;
        }
 
-       return 0;
+       retval = 0;
+exit:
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+               ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+       return retval;
 }
 
 static int lan78xx_read_eeprom(struct lan78xx_net *dev, u32 offset,
@@ -509,44 +530,67 @@ static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset,
                                    u32 length, u8 *data)
 {
        u32 val;
+       u32 saved;
        int i, ret;
+       int retval;
 
-       ret = lan78xx_eeprom_confirm_not_busy(dev);
-       if (ret)
-               return ret;
+       /* depends on chip, some EEPROM pins are muxed with LED function.
+        * disable & restore LED function to access EEPROM.
+        */
+       ret = lan78xx_read_reg(dev, HW_CFG, &val);
+       saved = val;
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+               val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+               ret = lan78xx_write_reg(dev, HW_CFG, val);
+       }
+
+       retval = lan78xx_eeprom_confirm_not_busy(dev);
+       if (retval)
+               goto exit;
 
        /* Issue write/erase enable command */
        val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_EWEN_;
        ret = lan78xx_write_reg(dev, E2P_CMD, val);
-       if (unlikely(ret < 0))
-               return -EIO;
+       if (unlikely(ret < 0)) {
+               retval = -EIO;
+               goto exit;
+       }
 
-       ret = lan78xx_wait_eeprom(dev);
-       if (ret < 0)
-               return ret;
+       retval = lan78xx_wait_eeprom(dev);
+       if (retval < 0)
+               goto exit;
 
        for (i = 0; i < length; i++) {
                /* Fill data register */
                val = data[i];
                ret = lan78xx_write_reg(dev, E2P_DATA, val);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
                /* Send "write" command */
                val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_WRITE_;
                val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
                ret = lan78xx_write_reg(dev, E2P_CMD, val);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
-               ret = lan78xx_wait_eeprom(dev);
-               if (ret < 0)
-                       return ret;
+               retval = lan78xx_wait_eeprom(dev);
+               if (retval < 0)
+                       goto exit;
 
                offset++;
        }
 
-       return 0;
+       retval = 0;
+exit:
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+               ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+       return retval;
 }
 
 static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
@@ -904,7 +948,6 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 
        if (!phydev->link && dev->link_on) {
                dev->link_on = false;
-               netif_carrier_off(dev->net);
 
                /* reset MAC */
                ret = lan78xx_read_reg(dev, MAC_CR, &buf);
@@ -914,6 +957,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                ret = lan78xx_write_reg(dev, MAC_CR, buf);
                if (unlikely(ret < 0))
                        return -EIO;
+
+               phy_mac_interrupt(phydev, 0);
        } else if (phydev->link && !dev->link_on) {
                dev->link_on = true;
 
@@ -953,7 +998,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                          ethtool_cmd_speed(&ecmd), ecmd.duplex, ladv, radv);
 
                ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv);
-               netif_carrier_on(dev->net);
+               phy_mac_interrupt(phydev, 1);
        }
 
        return ret;
@@ -1495,7 +1540,6 @@ done:
 static int lan78xx_mdio_init(struct lan78xx_net *dev)
 {
        int ret;
-       int i;
 
        dev->mdiobus = mdiobus_alloc();
        if (!dev->mdiobus) {
@@ -1511,10 +1555,6 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
        snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d",
                 dev->udev->bus->busnum, dev->udev->devnum);
 
-       /* handle our own interrupt */
-       for (i = 0; i < PHY_MAX_ADDR; i++)
-               dev->mdiobus->irq[i] = PHY_IGNORE_INTERRUPT;
-
        switch (dev->devid & ID_REV_CHIP_ID_MASK_) {
        case 0x78000000:
        case 0x78500000:
@@ -1558,6 +1598,16 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                return -EIO;
        }
 
+       /* Enable PHY interrupts.
+        * We handle our own interrupt
+        */
+       ret = phy_read(phydev, LAN88XX_INT_STS);
+       ret = phy_write(phydev, LAN88XX_INT_MASK,
+                       LAN88XX_INT_MASK_MDINTPIN_EN_ |
+                       LAN88XX_INT_MASK_LINK_CHANGE_);
+
+       phydev->irq = PHY_IGNORE_INTERRUPT;
+
        ret = phy_connect_direct(dev->net, phydev,
                                 lan78xx_link_status_change,
                                 PHY_INTERFACE_MODE_GMII);
@@ -1580,14 +1630,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                              SUPPORTED_Pause | SUPPORTED_Asym_Pause);
        genphy_config_aneg(phydev);
 
-       /* Workaround to enable PHY interrupt.
-        * phy_start_interrupts() is API for requesting and enabling
-        * PHY interrupt. However, USB-to-Ethernet device can't use
-        * request_irq() called in phy_start_interrupts().
-        * Set PHY to PHY_HALTED and call phy_start()
-        * to make a call to phy_enable_interrupts()
-        */
-       phy_stop(phydev);
        phy_start(phydev);
 
        netif_dbg(dev, ifup, dev->net, "phy initialised successfully");
@@ -2221,7 +2263,9 @@ netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
        if (skb2) {
                skb_queue_tail(&dev->txq_pend, skb2);
 
-               if (skb_queue_len(&dev->txq_pend) > 10)
+               /* throttle TX patch at slower than SUPER SPEED USB */
+               if ((dev->udev->speed < USB_SPEED_SUPER) &&
+                   (skb_queue_len(&dev->txq_pend) > 10))
                        netif_stop_queue(net);
        } else {
                netif_dbg(dev, tx_err, dev->net,
index 23e9880791fcd3c05997e0af13d6806a75b0dffb..570deef53f74301896cfbb8e2ca5c622a5b67185 100644 (file)
@@ -637,6 +637,7 @@ static const struct usb_device_id products[] = {
 
        /* 3. Combined interface devices matching on interface number */
        {QMI_FIXED_INTF(0x0408, 0xea42, 4)},    /* Yota / Megafon M100-1 */
+       {QMI_FIXED_INTF(0x05c6, 0x6001, 3)},    /* 4G LTE usb-modem U901 */
        {QMI_FIXED_INTF(0x05c6, 0x7000, 0)},
        {QMI_FIXED_INTF(0x05c6, 0x7001, 1)},
        {QMI_FIXED_INTF(0x05c6, 0x7002, 1)},
index 221a53025fd023077adbc64f0e53a6246373032e..72ba8ae7f09a6a5a7a8b14303731997f966c9fea 100644 (file)
@@ -377,7 +377,7 @@ union Vmxnet3_GenericDesc {
 #define VMXNET3_TX_RING_MAX_SIZE   4096
 #define VMXNET3_TC_RING_MAX_SIZE   4096
 #define VMXNET3_RX_RING_MAX_SIZE   4096
-#define VMXNET3_RX_RING2_MAX_SIZE  2048
+#define VMXNET3_RX_RING2_MAX_SIZE  4096
 #define VMXNET3_RC_RING_MAX_SIZE   8192
 
 /* a list of reasons for queue stop */
index bdb8a6c0f8aa2c62e22edc64d3900530477b6fa8..729c344e677499b6f737aa622d6925c07d647a17 100644 (file)
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.5.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.6.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040500
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040600
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
index 2d88c799d2ac32a44c2bc304e19280f2e471ad7b..e6944b29588e1bd329c5b331988373d5297c0bd6 100644 (file)
@@ -73,7 +73,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 static int vxlan_net_id;
 static struct rtnl_link_ops vxlan_link_ops;
 
-static const u8 all_zeros_mac[ETH_ALEN];
+static const u8 all_zeros_mac[ETH_ALEN + 2];
 
 static int vxlan_sock_add(struct vxlan_dev *vxlan);
 
@@ -1985,11 +1985,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                     vxlan->cfg.port_max, true);
 
        if (info) {
-               if (info->key.tun_flags & TUNNEL_CSUM)
-                       flags |= VXLAN_F_UDP_CSUM;
-               else
-                       flags &= ~VXLAN_F_UDP_CSUM;
-
                ttl = info->key.ttl;
                tos = info->key.tos;
 
@@ -2004,8 +1999,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        goto drop;
                sk = vxlan->vn4_sock->sock->sk;
 
-               if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
-                       df = htons(IP_DF);
+               if (info) {
+                       if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+                               df = htons(IP_DF);
+
+                       if (info->key.tun_flags & TUNNEL_CSUM)
+                               flags |= VXLAN_F_UDP_CSUM;
+                       else
+                               flags &= ~VXLAN_F_UDP_CSUM;
+               }
 
                memset(&fl4, 0, sizeof(fl4));
                fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
@@ -2101,6 +2103,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        return;
                }
 
+               if (info) {
+                       if (info->key.tun_flags & TUNNEL_CSUM)
+                               flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
+                       else
+                               flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+               }
+
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
                                      0, ttl, src_port, dst_port, htonl(vni << 8), md,
@@ -2162,9 +2171,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 #endif
        }
 
-       if (vxlan->flags & VXLAN_F_COLLECT_METADATA &&
-           info && info->mode & IP_TUNNEL_INFO_TX) {
-               vxlan_xmit_one(skb, dev, NULL, false);
+       if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+               if (info && info->mode & IP_TUNNEL_INFO_TX)
+                       vxlan_xmit_one(skb, dev, NULL, false);
+               else
+                       kfree_skb(skb);
                return NETDEV_TX_OK;
        }
 
@@ -2358,29 +2369,43 @@ static void vxlan_set_multicast_list(struct net_device *dev)
 {
 }
 
-static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
+static int __vxlan_change_mtu(struct net_device *dev,
+                             struct net_device *lowerdev,
+                             struct vxlan_rdst *dst, int new_mtu, bool strict)
 {
-       struct vxlan_dev *vxlan = netdev_priv(dev);
-       struct vxlan_rdst *dst = &vxlan->default_dst;
-       struct net_device *lowerdev;
-       int max_mtu;
+       int max_mtu = IP_MAX_MTU;
 
-       lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
-       if (lowerdev == NULL)
-               return eth_change_mtu(dev, new_mtu);
+       if (lowerdev)
+               max_mtu = lowerdev->mtu;
 
        if (dst->remote_ip.sa.sa_family == AF_INET6)
-               max_mtu = lowerdev->mtu - VXLAN6_HEADROOM;
+               max_mtu -= VXLAN6_HEADROOM;
        else
-               max_mtu = lowerdev->mtu - VXLAN_HEADROOM;
+               max_mtu -= VXLAN_HEADROOM;
 
-       if (new_mtu < 68 || new_mtu > max_mtu)
+       if (new_mtu < 68)
                return -EINVAL;
 
+       if (new_mtu > max_mtu) {
+               if (strict)
+                       return -EINVAL;
+
+               new_mtu = max_mtu;
+       }
+
        dev->mtu = new_mtu;
        return 0;
 }
 
+static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
+{
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct vxlan_rdst *dst = &vxlan->default_dst;
+       struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+                                                        dst->remote_ifindex);
+       return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
+}
+
 static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
                                struct ip_tunnel_info *info,
                                __be16 sport, __be16 dport)
@@ -2514,6 +2539,7 @@ static void vxlan_setup(struct net_device *dev)
        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
        dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
        netif_keep_dst(dev);
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 
        INIT_LIST_HEAD(&vxlan->next);
@@ -2756,6 +2782,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
        int err;
        bool use_ipv6 = false;
        __be16 default_port = vxlan->cfg.dst_port;
+       struct net_device *lowerdev = NULL;
 
        vxlan->net = src_net;
 
@@ -2776,9 +2803,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
        }
 
        if (conf->remote_ifindex) {
-               struct net_device *lowerdev
-                        = __dev_get_by_index(src_net, conf->remote_ifindex);
-
+               lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
                dst->remote_ifindex = conf->remote_ifindex;
 
                if (!lowerdev) {
@@ -2802,6 +2827,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
                needed_headroom = lowerdev->hard_header_len;
        }
 
+       if (conf->mtu) {
+               err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
+               if (err)
+                       return err;
+       }
+
        if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
                needed_headroom += VXLAN6_HEADROOM;
        else
index 7a72407208b161772ba66dc284c7701ea5a85f59..6292259804634e0c6251e23dd8eb7213f16211fb 100644 (file)
@@ -1626,7 +1626,7 @@ try:
                if (state & Xpr) {
                        void __iomem *scc_addr;
                        unsigned long ring;
-                       int i;
+                       unsigned int i;
 
                        /*
                         * - the busy condition happens (sometimes);
index a7afdeee698c690b9decf1e5da83c884071d9690..73fb4232f9f28c09e6a3f44090dc401684be8747 100644 (file)
@@ -150,18 +150,18 @@ int ath9k_hw_nvram_swap_data(struct ath_hw *ah, bool *swap_needed, int size)
                return -EIO;
        }
 
-       if (magic == AR5416_EEPROM_MAGIC) {
-               *swap_needed = false;
-       } else if (swab16(magic) == AR5416_EEPROM_MAGIC) {
+       *swap_needed = false;
+       if (swab16(magic) == AR5416_EEPROM_MAGIC) {
                if (ah->ah_flags & AH_NO_EEP_SWAP) {
                        ath_info(common,
                                 "Ignoring endianness difference in EEPROM magic bytes.\n");
-
-                       *swap_needed = false;
                } else {
                        *swap_needed = true;
                }
-       } else {
+       } else if (magic != AR5416_EEPROM_MAGIC) {
+               if (ath9k_hw_use_flash(ah))
+                       return 0;
+
                ath_err(common,
                        "Invalid EEPROM Magic (0x%04x).\n", magic);
                return -EINVAL;
index 53637399bb99d0743663c50cd674e2a1711cfc3b..b98db8a0a069bdf71e3c09dcf873aa6905700823 100644 (file)
@@ -879,11 +879,24 @@ int brcmf_sdiod_abort(struct brcmf_sdio_dev *sdiodev, uint fn)
        return 0;
 }
 
-static void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
 {
+       struct sdio_func *func;
+       struct mmc_host *host;
+       uint max_blocks;
        uint nents;
        int err;
 
+       func = sdiodev->func[2];
+       host = func->card->host;
+       sdiodev->sg_support = host->max_segs > 1;
+       max_blocks = min_t(uint, host->max_blk_count, 511u);
+       sdiodev->max_request_size = min_t(uint, host->max_req_size,
+                                         max_blocks * func->cur_blksize);
+       sdiodev->max_segment_count = min_t(uint, host->max_segs,
+                                          SG_MAX_SINGLE_ALLOC);
+       sdiodev->max_segment_size = host->max_seg_size;
+
        if (!sdiodev->sg_support)
                return;
 
@@ -1021,9 +1034,6 @@ static void brcmf_sdiod_host_fixup(struct mmc_host *host)
 
 static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
 {
-       struct sdio_func *func;
-       struct mmc_host *host;
-       uint max_blocks;
        int ret = 0;
 
        sdiodev->num_funcs = 2;
@@ -1054,26 +1064,6 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
                goto out;
        }
 
-       /*
-        * determine host related variables after brcmf_sdiod_probe()
-        * as func->cur_blksize is properly set and F2 init has been
-        * completed successfully.
-        */
-       func = sdiodev->func[2];
-       host = func->card->host;
-       sdiodev->sg_support = host->max_segs > 1;
-       max_blocks = min_t(uint, host->max_blk_count, 511u);
-       sdiodev->max_request_size = min_t(uint, host->max_req_size,
-                                         max_blocks * func->cur_blksize);
-       sdiodev->max_segment_count = min_t(uint, host->max_segs,
-                                          SG_MAX_SINGLE_ALLOC);
-       sdiodev->max_segment_size = host->max_seg_size;
-
-       /* allocate scatter-gather table. sg support
-        * will be disabled upon allocation failure.
-        */
-       brcmf_sdiod_sgtable_alloc(sdiodev);
-
        ret = brcmf_sdiod_freezer_attach(sdiodev);
        if (ret)
                goto out;
@@ -1084,7 +1074,7 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
                ret = -ENODEV;
                goto out;
        }
-       brcmf_sdiod_host_fixup(host);
+       brcmf_sdiod_host_fixup(sdiodev->func[2]->card->host);
 out:
        if (ret)
                brcmf_sdiod_remove(sdiodev);
index 4265b50faa98126400434f1c28995ded21e9e7f5..cfee477a6eb1f4b3f4ce5af01d3d19d37d768fb6 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/netdevice.h>
+#include <linux/module.h>
 #include <brcmu_wifi.h>
 #include <brcmu_utils.h>
 #include "core.h"
index dd6614332836a86443e505d9ee6394ec26ccfb6b..a14d9d9da094224b0c8c6d326547a5001c29d925 100644 (file)
@@ -4114,6 +4114,11 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
                goto fail;
        }
 
+       /* allocate scatter-gather table. sg support
+        * will be disabled upon allocation failure.
+        */
+       brcmf_sdiod_sgtable_alloc(bus->sdiodev);
+
        /* Query the F2 block size, set roundup accordingly */
        bus->blocksize = bus->sdiodev->func[2]->cur_blksize;
        bus->roundup = min(max_roundup, bus->blocksize);
index 5ec7a6d876723029f3a98a1047e0da9bcbf53d53..23f223150cef2cf4ec768b196d906603b92f36b2 100644 (file)
@@ -342,6 +342,7 @@ int brcmf_sdiod_ramrw(struct brcmf_sdio_dev *sdiodev, bool write, u32 address,
 
 /* Issue an abort to the specified function */
 int brcmf_sdiod_abort(struct brcmf_sdio_dev *sdiodev, uint fn);
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev);
 void brcmf_sdiod_change_state(struct brcmf_sdio_dev *sdiodev,
                              enum brcmf_sdiod_state state);
 #ifdef CONFIG_PM_SLEEP
index 866067789330ad4f4eaf0cebd7f807aa9403d1ec..7438fbeef744d8dd6d237650b81ac48b5bc3e969 100644 (file)
@@ -53,7 +53,6 @@ config IWLWIFI_LEDS
 
 config IWLDVM
        tristate "Intel Wireless WiFi DVM Firmware support"
-       depends on m
        help
          This is the driver that supports the DVM firmware. The list
          of the devices that use this firmware is available here:
index e60cf141ed796f0bbcaf1c8b68cf7a08676ce86e..fa41a5e1c8902002d3bf8a1d3991a416d6600499 100644 (file)
 #define IWL7260_UCODE_API_MAX  17
 #define IWL7265_UCODE_API_MAX  17
 #define IWL7265D_UCODE_API_MAX 20
+#define IWL3168_UCODE_API_MAX  20
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK   13
 #define IWL7265_UCODE_API_OK   13
 #define IWL7265D_UCODE_API_OK  13
+#define IWL3168_UCODE_API_OK   20
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN  13
 #define IWL7265_UCODE_API_MIN  13
 #define IWL7265D_UCODE_API_MIN 13
+#define IWL3168_UCODE_API_MIN  20
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION            0x0a1d
@@ -92,6 +95,8 @@
 #define IWL3160_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL3165_NVM_VERSION            0x709
 #define IWL3165_TX_POWER_VERSION       0xffff /* meaningless */
+#define IWL3168_NVM_VERSION            0xd01
+#define IWL3168_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL7265_NVM_VERSION            0x0a1d
 #define IWL7265_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL7265D_NVM_VERSION           0x0c11
 #define IWL3160_FW_PRE "iwlwifi-3160-"
 #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode"
 
+#define IWL3168_FW_PRE "iwlwifi-3168-"
+#define IWL3168_MODULE_FIRMWARE(api) IWL3168_FW_PRE __stringify(api) ".ucode"
+
 #define IWL7265_FW_PRE "iwlwifi-7265-"
 #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode"
 
@@ -180,6 +188,12 @@ static const struct iwl_ht_params iwl7000_ht_params = {
        .ucode_api_ok = IWL7265_UCODE_API_OK,                   \
        .ucode_api_min = IWL7265_UCODE_API_MIN
 
+#define IWL_DEVICE_3008                                                \
+       IWL_DEVICE_7000_COMMON,                                 \
+       .ucode_api_max = IWL3168_UCODE_API_MAX,                 \
+       .ucode_api_ok = IWL3168_UCODE_API_OK,                   \
+       .ucode_api_min = IWL3168_UCODE_API_MIN
+
 #define IWL_DEVICE_7005D                                       \
        IWL_DEVICE_7000_COMMON,                                 \
        .ucode_api_max = IWL7265D_UCODE_API_MAX,                \
@@ -299,11 +313,11 @@ const struct iwl_cfg iwl3165_2ac_cfg = {
 
 const struct iwl_cfg iwl3168_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 3168",
-       .fw_name_pre = IWL7265D_FW_PRE,
-       IWL_DEVICE_7000,
+       .fw_name_pre = IWL3168_FW_PRE,
+       IWL_DEVICE_3008,
        .ht_params = &iwl7000_ht_params,
-       .nvm_ver = IWL3165_NVM_VERSION,
-       .nvm_calib_ver = IWL3165_TX_POWER_VERSION,
+       .nvm_ver = IWL3168_NVM_VERSION,
+       .nvm_calib_ver = IWL3168_TX_POWER_VERSION,
        .pwr_tx_backoffs = iwl7265_pwr_tx_backoffs,
        .dccm_len = IWL7265_DCCM_LEN,
 };
@@ -376,5 +390,6 @@ const struct iwl_cfg iwl7265d_n_cfg = {
 
 MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
+MODULE_FIRMWARE(IWL3168_MODULE_FIRMWARE(IWL3168_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7265_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7265D_UCODE_API_OK));
index c84a0299d43e0f12b3ad80128eea8838cb2bb2ff..bce9b3420a13529926d3919fb7f14fddca559ef6 100644 (file)
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
 
 /* Highest firmware API version supported */
 #define IWL8000_UCODE_API_MAX  20
+#define IWL8265_UCODE_API_MAX  20
 
 /* Oldest version we won't warn about */
 #define IWL8000_UCODE_API_OK   13
+#define IWL8265_UCODE_API_OK   20
 
 /* Lowest firmware API version supported */
 #define IWL8000_UCODE_API_MIN  13
+#define IWL8265_UCODE_API_MIN  20
 
 /* NVM versions */
 #define IWL8000_NVM_VERSION            0x0a1d
 #define IWL8000_MODULE_FIRMWARE(api) \
        IWL8000_FW_PRE "-" __stringify(api) ".ucode"
 
+#define IWL8265_FW_PRE "iwlwifi-8265-"
+#define IWL8265_MODULE_FIRMWARE(api) \
+       IWL8265_FW_PRE __stringify(api) ".ucode"
+
 #define NVM_HW_SECTION_NUM_FAMILY_8000         10
 #define DEFAULT_NVM_FILE_FAMILY_8000B          "nvmData-8000B"
 #define DEFAULT_NVM_FILE_FAMILY_8000C          "nvmData-8000C"
@@ -144,10 +152,7 @@ static const struct iwl_tt_params iwl8000_tt_params = {
        .support_tx_backoff = true,
 };
 
-#define IWL_DEVICE_8000                                                        \
-       .ucode_api_max = IWL8000_UCODE_API_MAX,                         \
-       .ucode_api_ok = IWL8000_UCODE_API_OK,                           \
-       .ucode_api_min = IWL8000_UCODE_API_MIN,                         \
+#define IWL_DEVICE_8000_COMMON                                         \
        .device_family = IWL_DEVICE_FAMILY_8000,                        \
        .max_inst_size = IWL60_RTC_INST_SIZE,                           \
        .max_data_size = IWL60_RTC_DATA_SIZE,                           \
@@ -167,10 +172,28 @@ static const struct iwl_tt_params iwl8000_tt_params = {
        .thermal_params = &iwl8000_tt_params,                           \
        .apmg_not_supported = true
 
+#define IWL_DEVICE_8000                                                        \
+       IWL_DEVICE_8000_COMMON,                                         \
+       .ucode_api_max = IWL8000_UCODE_API_MAX,                         \
+       .ucode_api_ok = IWL8000_UCODE_API_OK,                           \
+       .ucode_api_min = IWL8000_UCODE_API_MIN                          \
+
+#define IWL_DEVICE_8260                                                        \
+       IWL_DEVICE_8000_COMMON,                                         \
+       .ucode_api_max = IWL8000_UCODE_API_MAX,                         \
+       .ucode_api_ok = IWL8000_UCODE_API_OK,                           \
+       .ucode_api_min = IWL8000_UCODE_API_MIN                          \
+
+#define IWL_DEVICE_8265                                                        \
+       IWL_DEVICE_8000_COMMON,                                         \
+       .ucode_api_max = IWL8265_UCODE_API_MAX,                         \
+       .ucode_api_ok = IWL8265_UCODE_API_OK,                           \
+       .ucode_api_min = IWL8265_UCODE_API_MIN                          \
+
 const struct iwl_cfg iwl8260_2n_cfg = {
        .name = "Intel(R) Dual Band Wireless N 8260",
        .fw_name_pre = IWL8000_FW_PRE,
-       IWL_DEVICE_8000,
+       IWL_DEVICE_8260,
        .ht_params = &iwl8000_ht_params,
        .nvm_ver = IWL8000_NVM_VERSION,
        .nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -179,7 +202,7 @@ const struct iwl_cfg iwl8260_2n_cfg = {
 const struct iwl_cfg iwl8260_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 8260",
        .fw_name_pre = IWL8000_FW_PRE,
-       IWL_DEVICE_8000,
+       IWL_DEVICE_8260,
        .ht_params = &iwl8000_ht_params,
        .nvm_ver = IWL8000_NVM_VERSION,
        .nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -188,8 +211,8 @@ const struct iwl_cfg iwl8260_2ac_cfg = {
 
 const struct iwl_cfg iwl8265_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 8265",
-       .fw_name_pre = IWL8000_FW_PRE,
-       IWL_DEVICE_8000,
+       .fw_name_pre = IWL8265_FW_PRE,
+       IWL_DEVICE_8265,
        .ht_params = &iwl8000_ht_params,
        .nvm_ver = IWL8000_NVM_VERSION,
        .nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -209,7 +232,7 @@ const struct iwl_cfg iwl4165_2ac_cfg = {
 const struct iwl_cfg iwl8260_2ac_sdio_cfg = {
        .name = "Intel(R) Dual Band Wireless-AC 8260",
        .fw_name_pre = IWL8000_FW_PRE,
-       IWL_DEVICE_8000,
+       IWL_DEVICE_8260,
        .ht_params = &iwl8000_ht_params,
        .nvm_ver = IWL8000_NVM_VERSION,
        .nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -236,3 +259,4 @@ const struct iwl_cfg iwl4165_2ac_sdio_cfg = {
 };
 
 MODULE_FIRMWARE(IWL8000_MODULE_FIRMWARE(IWL8000_UCODE_API_OK));
+MODULE_FIRMWARE(IWL8265_MODULE_FIRMWARE(IWL8265_UCODE_API_OK));
index 7acb49075683c115d602b20619eb03700718123f..ab4c2a0470b264cc9da8623c0f7bb198563c6e28 100644 (file)
@@ -243,8 +243,10 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first)
        if (drv->trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) {
                char rev_step = 'A' + CSR_HW_REV_STEP(drv->trans->hw_rev);
 
-               snprintf(drv->firmware_name, sizeof(drv->firmware_name),
-                        "%s%c-%s.ucode", name_pre, rev_step, tag);
+               if (rev_step != 'A')
+                       snprintf(drv->firmware_name,
+                                sizeof(drv->firmware_name), "%s%c-%s.ucode",
+                                name_pre, rev_step, tag);
        }
 
        IWL_DEBUG_INFO(drv, "attempting to load firmware %s'%s'\n",
index 0036d18334af4ab893b0b45214e972a3836d7885..ba3f0bbddde8874db03a74cd9a0ca01df862bc38 100644 (file)
@@ -510,6 +510,9 @@ struct iwl_mvm_tx_resp {
  * @scd_ssn: the index of the last contiguously sent packet
  * @txed: number of Txed frames in this batch
  * @txed_2_done: number of Acked frames in this batch
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *     not a copy from the LQ command. Thus, if not the first rate was used
+ *     for Tx-ing then this value will be set to 0 by FW.
  */
 struct iwl_mvm_ba_notif {
        __le32 sta_addr_lo32;
@@ -524,7 +527,8 @@ struct iwl_mvm_ba_notif {
        __le16 scd_ssn;
        u8 txed;
        u8 txed_2_done;
-       __le16 reserved1;
+       u8 reduced_txp;
+       u8 reserved1;
 } __packed;
 
 /*
index 7bb6fd0e4391a92f0200aad6e62362fc20c48802..94caa88df4422764573dbd6099af5315c962bdc5 100644 (file)
@@ -2,6 +2,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -724,14 +725,28 @@ static int _rs_collect_tx_data(struct iwl_mvm *mvm,
        return 0;
 }
 
-static int rs_collect_tx_data(struct iwl_mvm *mvm,
-                             struct iwl_lq_sta *lq_sta,
-                             struct iwl_scale_tbl_info *tbl,
-                             int scale_index, int attempts, int successes,
-                             u8 reduced_txp)
+static int rs_collect_tpc_data(struct iwl_mvm *mvm,
+                              struct iwl_lq_sta *lq_sta,
+                              struct iwl_scale_tbl_info *tbl,
+                              int scale_index, int attempts, int successes,
+                              u8 reduced_txp)
+{
+       struct iwl_rate_scale_data *window = NULL;
+
+       if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
+               return -EINVAL;
+
+       window = &tbl->tpc_win[reduced_txp];
+       return  _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
+                                   window);
+}
+
+static int rs_collect_tlc_data(struct iwl_mvm *mvm,
+                              struct iwl_lq_sta *lq_sta,
+                              struct iwl_scale_tbl_info *tbl,
+                              int scale_index, int attempts, int successes)
 {
        struct iwl_rate_scale_data *window = NULL;
-       int ret;
 
        if (scale_index < 0 || scale_index >= IWL_RATE_COUNT)
                return -EINVAL;
@@ -745,16 +760,6 @@ static int rs_collect_tx_data(struct iwl_mvm *mvm,
 
        /* Select window for current tx bit rate */
        window = &(tbl->win[scale_index]);
-
-       ret = _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
-                                 window);
-       if (ret)
-               return ret;
-
-       if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
-               return -EINVAL;
-
-       window = &tbl->tpc_win[reduced_txp];
        return _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
                                   window);
 }
@@ -1301,17 +1306,30 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
         * first index into rate scale table.
         */
        if (info->flags & IEEE80211_TX_STAT_AMPDU) {
-               /* ampdu_ack_len = 0 marks no BA was received. In this case
-                * treat it as a single frame loss as we don't want the success
-                * ratio to dip too quickly because a BA wasn't received
+               rs_collect_tpc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+                                   info->status.ampdu_len,
+                                   info->status.ampdu_ack_len,
+                                   reduced_txp);
+
+               /* ampdu_ack_len = 0 marks no BA was received. For TLC, treat
+                * it as a single frame loss as we don't want the success ratio
+                * to dip too quickly because a BA wasn't received.
+                * For TPC, there's no need for this optimisation since we want
+                * to recover very quickly from a bad power reduction and,
+                * therefore we'd like the success ratio to get an immediate hit
+                * when failing to get a BA, so we'd switch back to a lower or
+                * zero power reduction. When FW transmits agg with a rate
+                * different from the initial rate, it will not use reduced txp
+                * and will send BA notification twice (one empty with reduced
+                * txp equal to the value from LQ and one with reduced txp 0).
+                * We need to update counters for each txp level accordingly.
                 */
                if (info->status.ampdu_ack_len == 0)
                        info->status.ampdu_len = 1;
 
-               rs_collect_tx_data(mvm, lq_sta, curr_tbl, lq_rate.index,
-                                  info->status.ampdu_len,
-                                  info->status.ampdu_ack_len,
-                                  reduced_txp);
+               rs_collect_tlc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+                                   info->status.ampdu_len,
+                                   info->status.ampdu_ack_len);
 
                /* Update success/fail counts if not searching for new mode */
                if (lq_sta->rs_state == RS_STATE_STAY_IN_COLUMN) {
@@ -1344,9 +1362,13 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
                        else
                                continue;
 
-                       rs_collect_tx_data(mvm, lq_sta, tmp_tbl, lq_rate.index,
-                                          1, i < retries ? 0 : legacy_success,
-                                          reduced_txp);
+                       rs_collect_tpc_data(mvm, lq_sta, tmp_tbl,
+                                           lq_rate.index, 1,
+                                           i < retries ? 0 : legacy_success,
+                                           reduced_txp);
+                       rs_collect_tlc_data(mvm, lq_sta, tmp_tbl,
+                                           lq_rate.index, 1,
+                                           i < retries ? 0 : legacy_success);
                }
 
                /* Update success/fail counts if not searching for new mode */
index 9a15642f80dd28b88dc4ae8326f465a7ec583e35..ea1e177c2ea101463a43a1c73b7dfb995dc9b135 100644 (file)
@@ -1298,6 +1298,10 @@ int iwl_mvm_sched_scan_start(struct iwl_mvm *mvm,
                return -EBUSY;
        }
 
+       /* we don't support "match all" in the firmware */
+       if (!req->n_match_sets)
+               return -EOPNOTSUPP;
+
        ret = iwl_mvm_check_running_scans(mvm, type);
        if (ret)
                return ret;
index 8bf48a7d0f4e99e8297feace5368375b6a16a151..0914ec2fd57467023b0f5336b2e630856c06cf81 100644 (file)
@@ -1029,7 +1029,6 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm,
                struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
                mvmsta->tid_data[tid].rate_n_flags =
                        le32_to_cpu(tx_resp->initial_rate);
-               mvmsta->tid_data[tid].reduced_tpc = tx_resp->reduced_tpc;
                mvmsta->tid_data[tid].tx_time =
                        le16_to_cpu(tx_resp->wireless_media_time);
        }
@@ -1060,7 +1059,7 @@ static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info,
        /* TODO: not accounted if the whole A-MPDU failed */
        info->status.tx_time = tid_data->tx_time;
        info->status.status_driver_data[0] =
-               (void *)(uintptr_t)tid_data->reduced_tpc;
+               (void *)(uintptr_t)ba_notif->reduced_txp;
        info->status.status_driver_data[1] =
                (void *)(uintptr_t)tid_data->rate_n_flags;
 }
@@ -1133,6 +1132,8 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
                           scd_flow, ba_resp_scd_ssn, ba_notif->txed,
                           ba_notif->txed_2_done);
 
+       IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
+                          ba_notif->reduced_txp);
        tid_data->next_reclaimed = ba_resp_scd_ssn;
 
        iwl_mvm_check_ratid_empty(mvm, sta, tid);
index 6261a68cae907d793cdbf62ceb9dda97e5031fee..00335ea6b3eb5fa6035a2ce06e9fcfb0ea612ac7 100644 (file)
@@ -378,7 +378,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x3165, 0x8110, iwl3165_2ac_cfg)},
 
 /* 3168 Series */
+       {IWL_PCI_DEVICE(0x24FB, 0x2010, iwl3168_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FB, 0x2110, iwl3168_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FB, 0x2050, iwl3168_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FB, 0x2150, iwl3168_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FB, 0x0000, iwl3168_2ac_cfg)},
 
 /* 7265 Series */
@@ -475,6 +478,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x24F3, 0x0000, iwl8265_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FD, 0x8010, iwl8265_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FD, 0x0810, iwl8265_2ac_cfg)},
 
 /* 9000 Series */
        {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)},
index cc3888e2700daf1cfe07ebb4e003ca53bcf6d934..73c95594eabe2c530d503fa51aedce05f0ffb8a9 100644 (file)
@@ -490,6 +490,15 @@ static inline void iwl_enable_interrupts(struct iwl_trans *trans)
        iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
 }
 
+static inline void iwl_enable_fw_load_int(struct iwl_trans *trans)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+       IWL_DEBUG_ISR(trans, "Enabling FW load interrupt\n");
+       trans_pcie->inta_mask = CSR_INT_BIT_FH_TX;
+       iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
+}
+
 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
index ccafbd8cf4b3b224649b3fbc87da69207884743c..152cf9ad9566587320a4a8d5dd34140b3d6b24b9 100644 (file)
@@ -1438,9 +1438,11 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id)
                         inta & ~trans_pcie->inta_mask);
        }
 
-       /* Re-enable all interrupts */
-       /* only Re-enable if disabled by irq */
-       if (test_bit(STATUS_INT_ENABLED, &trans->status))
+       /* we are loading the firmware, enable FH_TX interrupt only */
+       if (handled & CSR_INT_BIT_FH_TX)
+               iwl_enable_fw_load_int(trans);
+       /* only Re-enable all interrupt if disabled by irq */
+       else if (test_bit(STATUS_INT_ENABLED, &trans->status))
                iwl_enable_interrupts(trans);
        /* Re-enable RF_KILL if it occurred */
        else if (handled & CSR_INT_BIT_RF_KILL)
index d60a467a983c6f220384b701953eb42d2cc4c1ae..5a854c60947749cc6dd9dfd90ec90ed9b20667c0 100644 (file)
@@ -1021,82 +1021,6 @@ static int iwl_pcie_load_given_ucode_8000(struct iwl_trans *trans,
                                               &first_ucode_section);
 }
 
-static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
-                                  const struct fw_img *fw, bool run_in_rfkill)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       bool hw_rfkill;
-       int ret;
-
-       mutex_lock(&trans_pcie->mutex);
-
-       /* Someone called stop_device, don't try to start_fw */
-       if (trans_pcie->is_down) {
-               IWL_WARN(trans,
-                        "Can't start_fw since the HW hasn't been started\n");
-               ret = EIO;
-               goto out;
-       }
-
-       /* This may fail if AMT took ownership of the device */
-       if (iwl_pcie_prepare_card_hw(trans)) {
-               IWL_WARN(trans, "Exit HW not ready\n");
-               ret = -EIO;
-               goto out;
-       }
-
-       iwl_enable_rfkill_int(trans);
-
-       /* If platform's RF_KILL switch is NOT set to KILL */
-       hw_rfkill = iwl_is_rfkill_set(trans);
-       if (hw_rfkill)
-               set_bit(STATUS_RFKILL, &trans->status);
-       else
-               clear_bit(STATUS_RFKILL, &trans->status);
-       iwl_trans_pcie_rf_kill(trans, hw_rfkill);
-       if (hw_rfkill && !run_in_rfkill) {
-               ret = -ERFKILL;
-               goto out;
-       }
-
-       iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
-
-       ret = iwl_pcie_nic_init(trans);
-       if (ret) {
-               IWL_ERR(trans, "Unable to init nic\n");
-               goto out;
-       }
-
-       /* make sure rfkill handshake bits are cleared */
-       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
-                   CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
-
-       /* clear (again), then enable host interrupts */
-       iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
-       iwl_enable_interrupts(trans);
-
-       /* really make sure rfkill handshake bits are cleared */
-       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-
-       /* Load the given image to the HW */
-       if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
-               ret = iwl_pcie_load_given_ucode_8000(trans, fw);
-       else
-               ret = iwl_pcie_load_given_ucode(trans, fw);
-
-out:
-       mutex_unlock(&trans_pcie->mutex);
-       return ret;
-}
-
-static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
-{
-       iwl_pcie_reset_ict(trans);
-       iwl_pcie_tx_start(trans, scd_addr);
-}
-
 static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -1127,7 +1051,8 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
         * already dead.
         */
        if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
-               IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n");
+               IWL_DEBUG_INFO(trans,
+                              "DEVICE_ENABLED bit was set and is now cleared\n");
                iwl_pcie_tx_stop(trans);
                iwl_pcie_rx_stop(trans);
 
@@ -1161,7 +1086,6 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
        iwl_disable_interrupts(trans);
        spin_unlock(&trans_pcie->irq_lock);
 
-
        /* clear all status bits */
        clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
        clear_bit(STATUS_INT_ENABLED, &trans->status);
@@ -1194,10 +1118,116 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
        if (hw_rfkill != was_hw_rfkill)
                iwl_trans_pcie_rf_kill(trans, hw_rfkill);
 
-       /* re-take ownership to prevent other users from stealing the deivce */
+       /* re-take ownership to prevent other users from stealing the device */
        iwl_pcie_prepare_card_hw(trans);
 }
 
+static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
+                                  const struct fw_img *fw, bool run_in_rfkill)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       bool hw_rfkill;
+       int ret;
+
+       /* This may fail if AMT took ownership of the device */
+       if (iwl_pcie_prepare_card_hw(trans)) {
+               IWL_WARN(trans, "Exit HW not ready\n");
+               ret = -EIO;
+               goto out;
+       }
+
+       iwl_enable_rfkill_int(trans);
+
+       iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+       /*
+        * We enabled the RF-Kill interrupt and the handler may very
+        * well be running. Disable the interrupts to make sure no other
+        * interrupt can be fired.
+        */
+       iwl_disable_interrupts(trans);
+
+       /* Make sure it finished running */
+       synchronize_irq(trans_pcie->pci_dev->irq);
+
+       mutex_lock(&trans_pcie->mutex);
+
+       /* If platform's RF_KILL switch is NOT set to KILL */
+       hw_rfkill = iwl_is_rfkill_set(trans);
+       if (hw_rfkill)
+               set_bit(STATUS_RFKILL, &trans->status);
+       else
+               clear_bit(STATUS_RFKILL, &trans->status);
+       iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+       if (hw_rfkill && !run_in_rfkill) {
+               ret = -ERFKILL;
+               goto out;
+       }
+
+       /* Someone called stop_device, don't try to start_fw */
+       if (trans_pcie->is_down) {
+               IWL_WARN(trans,
+                        "Can't start_fw since the HW hasn't been started\n");
+               ret = -EIO;
+               goto out;
+       }
+
+       /* make sure rfkill handshake bits are cleared */
+       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
+                   CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
+
+       /* clear (again), then enable host interrupts */
+       iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+       ret = iwl_pcie_nic_init(trans);
+       if (ret) {
+               IWL_ERR(trans, "Unable to init nic\n");
+               goto out;
+       }
+
+       /*
+        * Now, we load the firmware and don't want to be interrupted, even
+        * by the RF-Kill interrupt (hence mask all the interrupt besides the
+        * FH_TX interrupt which is needed to load the firmware). If the
+        * RF-Kill switch is toggled, we will find out after having loaded
+        * the firmware and return the proper value to the caller.
+        */
+       iwl_enable_fw_load_int(trans);
+
+       /* really make sure rfkill handshake bits are cleared */
+       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+       iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+
+       /* Load the given image to the HW */
+       if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
+               ret = iwl_pcie_load_given_ucode_8000(trans, fw);
+       else
+               ret = iwl_pcie_load_given_ucode(trans, fw);
+       iwl_enable_interrupts(trans);
+
+       /* re-check RF-Kill state since we may have missed the interrupt */
+       hw_rfkill = iwl_is_rfkill_set(trans);
+       if (hw_rfkill)
+               set_bit(STATUS_RFKILL, &trans->status);
+       else
+               clear_bit(STATUS_RFKILL, &trans->status);
+
+       iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+       if (hw_rfkill && !run_in_rfkill)
+               ret = -ERFKILL;
+
+out:
+       mutex_unlock(&trans_pcie->mutex);
+       return ret;
+}
+
+static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
+{
+       iwl_pcie_reset_ict(trans);
+       iwl_pcie_tx_start(trans, scd_addr);
+}
+
 static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
index c32889a1e39cf53d4e2d1ab3a4bfc2b8b492af1e..a28414c50edf11131e41aa65f02957a5a07c796f 100644 (file)
@@ -991,7 +991,8 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
                goto nla_put_failure;
        }
 
-       if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER, ETH_ALEN, hdr->addr2))
+       if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER,
+                   ETH_ALEN, data->addresses[1].addr))
                goto nla_put_failure;
 
        /* We get the skb->data */
@@ -2736,7 +2737,7 @@ static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr)
 
        spin_lock_bh(&hwsim_radio_lock);
        list_for_each_entry(data, &hwsim_radios, list) {
-               if (mac80211_hwsim_addr_match(data, addr)) {
+               if (memcmp(data->addresses[1].addr, addr, ETH_ALEN) == 0) {
                        _found = true;
                        break;
                }
index 9a3966cd6fbedb0e41e1f2c741405b10aee92fdc..155f343981fe29679d278b0621d3cf7ce64b2a77 100644 (file)
@@ -273,8 +273,10 @@ static void rt2400pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
        rt2x00mmio_register_write(rt2x00dev, RXCSR0, reg);
index 1a6740b4d396211cd3c00290cd9abb06128c55bf..2553cdd7406623cde30a99531e4d86a74ae4836e 100644 (file)
@@ -274,8 +274,10 @@ static void rt2500pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, RXCSR0_DROP_MCAST,
index d26018f30b7dfa9e8381ceb609d5eddd10ff47a8..2d64611de300146d98bcc254171f961e3af26831 100644 (file)
@@ -437,8 +437,10 @@ static void rt2500usb_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_VERSION_ERROR, 1);
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_MULTICAST,
index 9733b31a780d380fd2bfd550efe155943b0fdb34..a26afcab03ed02242fdc7fa2f3b94307fc52024e 100644 (file)
@@ -1490,7 +1490,8 @@ void rt2800_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_FCSFAIL));
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_PHY_ERROR,
                           !(filter_flags & FIF_PLCPFAIL));
-       rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_MY_BSSD, 0);
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_VER_ERROR, 1);
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_MULTICAST,
index 3282ddb766f4224a8e10e037fa7ea4332dfe0e27..26427140a963b592a52d6c2443e45dfa24f73cf6 100644 (file)
@@ -669,6 +669,7 @@ enum rt2x00_state_flags {
        CONFIG_POWERSAVING,
        CONFIG_HT_DISABLED,
        CONFIG_QOS_DISABLED,
+       CONFIG_MONITORING,
 
        /*
         * Mark we currently are sequentially reading TX_STA_FIFO register
index 7e8bb1198ae9f749dfc3e419f1ae652ffa7a0b8d..6a1f508d472f0e6ac3be534d780d209e90a0278d 100644 (file)
@@ -277,6 +277,11 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev,
        else
                clear_bit(CONFIG_POWERSAVING, &rt2x00dev->flags);
 
+       if (conf->flags & IEEE80211_CONF_MONITOR)
+               set_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+       else
+               clear_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+
        rt2x00dev->curr_band = conf->chandef.chan->band;
        rt2x00dev->curr_freq = conf->chandef.chan->center_freq;
        rt2x00dev->tx_power = conf->power_level;
index 3c26ee65a41513127cbc999416ee90fabc11d890..13da95a24cf77bb366481252226f3c4566352d67 100644 (file)
@@ -385,11 +385,6 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
                        *total_flags |= FIF_PSPOLL;
        }
 
-       /*
-        * Check if there is any work left for us.
-        */
-       if (rt2x00dev->packet_filter == *total_flags)
-               return;
        rt2x00dev->packet_filter = *total_flags;
 
        rt2x00dev->ops->lib->config_filter(rt2x00dev, *total_flags);
index c0e730ea1b69058a663f953e73165fa23a850c72..24a3436ef952870a118e0adddf5f06fa3c5ca6f5 100644 (file)
@@ -530,8 +530,10 @@ static void rt61pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
                           !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
index 7081e13b4fd67ebbaa7fa8af3f15a1d81fbc219e..7bbc869311681988dbbcd50907fd33d84f4f1a85 100644 (file)
@@ -480,8 +480,10 @@ static void rt73usb_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
                           !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
index 74c14ce28238eed70f8ad798f0724642147411d3..28f7010e7108bf3c3f8d3a89296b2e0b16f2c555 100644 (file)
@@ -138,6 +138,11 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
                    ((wireless_mode == WIRELESS_MODE_N_5G) ||
                     (wireless_mode == WIRELESS_MODE_N_24G)))
                        rate->flags |= IEEE80211_TX_RC_MCS;
+               if (sta && sta->vht_cap.vht_supported &&
+                   (wireless_mode == WIRELESS_MODE_AC_5G ||
+                    wireless_mode == WIRELESS_MODE_AC_24G ||
+                    wireless_mode == WIRELESS_MODE_AC_ONLY))
+                       rate->flags |= IEEE80211_TX_RC_VHT_MCS;
        }
 }
 
index a62bf0a65c321bb73553c403c5233ccd30d1f8c7..5be34118e0af22b69392f5caf07f9eef5672c7fe 100644 (file)
@@ -351,7 +351,6 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select(
        case COUNTRY_CODE_SPAIN:
        case COUNTRY_CODE_FRANCE:
        case COUNTRY_CODE_ISRAEL:
-       case COUNTRY_CODE_WORLD_WIDE_13:
                return &rtl_regdom_12_13;
        case COUNTRY_CODE_MKK:
        case COUNTRY_CODE_MKK1:
@@ -360,6 +359,7 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select(
                return &rtl_regdom_14_60_64;
        case COUNTRY_CODE_GLOBAL_DOMAIN:
                return &rtl_regdom_14;
+       case COUNTRY_CODE_WORLD_WIDE_13:
        case COUNTRY_CODE_WORLD_WIDE_13_5G_ALL:
                return &rtl_regdom_12_13_5g_all;
        default:
index 9ac118e727e9402032bdfe972ee40c9d34d12a00..564ca750c5ee8895a702e9a9ea145e75391e02bf 100644 (file)
@@ -175,14 +175,14 @@ int wlcore_set_partition(struct wl1271 *wl,
        if (ret < 0)
                goto out;
 
+       /* We don't need the size of the last partition, as it is
+        * automatically calculated based on the total memory size and
+        * the sizes of the previous partitions.
+        */
        ret = wlcore_raw_write32(wl, HW_PART3_START_ADDR, p->mem3.start);
        if (ret < 0)
                goto out;
 
-       ret = wlcore_raw_write32(wl, HW_PART3_SIZE_ADDR, p->mem3.size);
-       if (ret < 0)
-               goto out;
-
 out:
        return ret;
 }
index 6c257b54f4150323811c46d2a49251251c113170..10cf3747694d5bf46b865fc68a990b5b4c7c5a59 100644 (file)
@@ -36,8 +36,8 @@
 #define HW_PART1_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 12)
 #define HW_PART2_SIZE_ADDR              (HW_PARTITION_REGISTERS_ADDR + 16)
 #define HW_PART2_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 20)
-#define HW_PART3_SIZE_ADDR              (HW_PARTITION_REGISTERS_ADDR + 24)
-#define HW_PART3_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 28)
+#define HW_PART3_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 24)
+
 #define HW_ACCESS_REGISTER_SIZE         4
 
 #define HW_ACCESS_PRAM_MAX_RANGE       0x3c000
index d6abf191122a3260cbd97f5c1a2dbd1c2cb5c5e2..96ccd4e943db49908b0821f1d515aef36072db15 100644 (file)
@@ -364,6 +364,7 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
        RING_IDX cons, prod;
        unsigned short id;
        struct sk_buff *skb;
+       bool more_to_do;
 
        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 
@@ -398,18 +399,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
 
                queue->tx.rsp_cons = prod;
 
-               /*
-                * Set a new event, then check for race with update of tx_cons.
-                * Note that it is essential to schedule a callback, no matter
-                * how few buffers are pending. Even if there is space in the
-                * transmit ring, higher layers may be blocked because too much
-                * data is outstanding: in such cases notification from Xen is
-                * likely to be the only kick that we'll get.
-                */
-               queue->tx.sring->rsp_event =
-                       prod + ((queue->tx.sring->req_prod - prod) >> 1) + 1;
-               mb();           /* update shared area */
-       } while ((cons == prod) && (prod != queue->tx.sring->rsp_prod));
+               RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
+       } while (more_to_do);
 
        xennet_maybe_wake_tx(queue);
 }
index 4d5535c4cddf3877109c9a0970f88606ec7f42de..7116472b462577935eb0bad50b986ba510a038b5 100644 (file)
@@ -1 +1,2 @@
+source "drivers/ntb/hw/amd/Kconfig"
 source "drivers/ntb/hw/intel/Kconfig"
index 175d7c92a569a5af90dc026acbd177444d8b9d90..532e0859b4a11bb6c90d357ad8bc00cbea86e006 100644 (file)
@@ -1 +1,2 @@
+obj-$(CONFIG_NTB_AMD)  += amd/
 obj-$(CONFIG_NTB_INTEL)        += intel/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644 (file)
index 0000000..cfe903c
--- /dev/null
@@ -0,0 +1,7 @@
+config NTB_AMD
+       tristate "AMD Non-Transparent Bridge support"
+       depends on X86_64
+       help
+        This driver supports AMD NTB on capable Zeppelin hardware.
+
+        If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644 (file)
index 0000000..ad54da9
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644 (file)
index 0000000..588803a
--- /dev/null
@@ -0,0 +1,1143 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME       "ntb_hw_amd"
+#define NTB_DESC       "AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER                "1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+       if (idx < 0 || idx > ndev->mw_count)
+               return -EINVAL;
+
+       return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+                               phys_addr_t *base,
+                               resource_size_t *size,
+                               resource_size_t *align,
+                               resource_size_t *align_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       if (base)
+               *base = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (size)
+               *size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       if (align)
+               *align = SZ_4K;
+
+       if (align_size)
+               *align_size = 1;
+
+       return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+                               dma_addr_t addr, resource_size_t size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       unsigned long xlat_reg, limit_reg = 0;
+       resource_size_t mw_size;
+       void __iomem *mmio, *peer_mmio;
+       u64 base_addr, limit, reg_val;
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       /* make sure the range fits in the usable mw size */
+       if (size > mw_size)
+               return -EINVAL;
+
+       mmio = ndev->self_mmio;
+       peer_mmio = ndev->peer_mmio;
+
+       base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (bar != 1) {
+               xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+               limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               write64(limit, mmio + limit_reg);
+               reg_val = read64(mmio + limit_reg);
+               if (reg_val != limit) {
+                       write64(base_addr, mmio + limit_reg);
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       } else {
+               xlat_reg = AMD_BAR1XLAT_OFFSET;
+               limit_reg = AMD_BAR1LMT_OFFSET;
+
+               /* split bar addr range must all be 32 bit */
+               if (addr & (~0ull << 32))
+                       return -EINVAL;
+               if ((addr + size) & (~0ull << 32))
+                       return -EINVAL;
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               writel(limit, mmio + limit_reg);
+               reg_val = readl(mmio + limit_reg);
+               if (reg_val != limit) {
+                       writel(base_addr, mmio + limit_reg);
+                       writel(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+       if (!ndev->peer_sta)
+               return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+       /* If peer_sta is reset or D0 event, the ISR has
+        * started a timer to check link status of hardware.
+        * So here just clear status bit. And if peer_sta is
+        * D3 or PME_TO, D0/reset event will be happened when
+        * system wakeup/poweron, so do nothing here.
+        */
+       if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+               ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+       else if (ndev->peer_sta & AMD_PEER_D0_EVENT)
+               ndev->peer_sta = 0;
+
+       return 0;
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+                             enum ntb_speed *speed,
+                             enum ntb_width *width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int ret = 0;
+
+       if (amd_link_is_up(ndev)) {
+               if (speed)
+                       *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+               if (width)
+                       *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+               dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+               ret = 1;
+       } else {
+               if (speed)
+                       *speed = NTB_SPEED_NONE;
+               if (width)
+                       *width = NTB_WIDTH_NONE;
+
+               dev_dbg(ndev_dev(ndev), "link is down.\n");
+       }
+
+       return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+                              enum ntb_speed max_speed,
+                              enum ntb_width max_width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Enable event interrupt */
+       ndev->int_mask &= ~AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Disable event interrupt */
+       ndev->int_mask |= AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_vector < 0 || db_vector > ndev->db_count)
+               return 0;
+
+       return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask |= db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask &= ~db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+                               phys_addr_t *db_addr,
+                               resource_size_t *db_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_addr)
+               *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+       if (db_size)
+               *db_size = sizeof(u32);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return 0;
+
+       offset = ndev->self_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+                             int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->self_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+                                 phys_addr_t *spad_addr)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       if (spad_addr)
+               *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+                                          ndev->peer_spad + (idx << 2));
+       return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+                                  int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+       .mw_count               = amd_ntb_mw_count,
+       .mw_get_range           = amd_ntb_mw_get_range,
+       .mw_set_trans           = amd_ntb_mw_set_trans,
+       .link_is_up             = amd_ntb_link_is_up,
+       .link_enable            = amd_ntb_link_enable,
+       .link_disable           = amd_ntb_link_disable,
+       .db_valid_mask          = amd_ntb_db_valid_mask,
+       .db_vector_count        = amd_ntb_db_vector_count,
+       .db_vector_mask         = amd_ntb_db_vector_mask,
+       .db_read                = amd_ntb_db_read,
+       .db_clear               = amd_ntb_db_clear,
+       .db_set_mask            = amd_ntb_db_set_mask,
+       .db_clear_mask          = amd_ntb_db_clear_mask,
+       .peer_db_addr           = amd_ntb_peer_db_addr,
+       .peer_db_set            = amd_ntb_peer_db_set,
+       .spad_count             = amd_ntb_spad_count,
+       .spad_read              = amd_ntb_spad_read,
+       .spad_write             = amd_ntb_spad_write,
+       .peer_spad_addr         = amd_ntb_peer_spad_addr,
+       .peer_spad_read         = amd_ntb_peer_spad_read,
+       .peer_spad_write        = amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       int reg;
+
+       reg = readl(mmio + AMD_SMUACK_OFFSET);
+       reg |= bit;
+       writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+       ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 status;
+
+       status = readl(mmio + AMD_INTSTAT_OFFSET);
+       if (!(status & AMD_EVENT_INTMASK))
+               return;
+
+       dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+       status &= AMD_EVENT_INTMASK;
+       switch (status) {
+       case AMD_PEER_FLUSH_EVENT:
+               dev_info(ndev_dev(ndev), "Flush is done.\n");
+               break;
+       case AMD_PEER_RESET_EVENT:
+               amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+               /* link down first */
+               ntb_link_event(&ndev->ntb);
+               /* polling peer status */
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       case AMD_PEER_D3_EVENT:
+       case AMD_PEER_PMETO_EVENT:
+               amd_ack_smu(ndev, status);
+
+               /* link down */
+               ntb_link_event(&ndev->ntb);
+
+               break;
+       case AMD_PEER_D0_EVENT:
+               mmio = ndev->peer_mmio;
+               status = readl(mmio + AMD_PMESTAT_OFFSET);
+               /* check if this is WAKEUP event */
+               if (status & 0x1)
+                       dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+               amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+               /* start a timer to poll link status */
+               schedule_delayed_work(&ndev->hb_timer,
+                                     AMD_LINK_HB_TIMEOUT);
+               break;
+       default:
+               dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+               break;
+       }
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+       dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+       if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+               amd_handle_event(ndev, vec);
+
+       if (vec < AMD_DB_CNT)
+               ntb_db_event(&ndev->ntb, vec);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+       struct amd_ntb_vec *nvec = dev;
+
+       return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+       struct amd_ntb_dev *ndev = dev;
+
+       return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+                        int msix_min, int msix_max)
+{
+       struct pci_dev *pdev;
+       int rc, i, msix_count, node;
+
+       pdev = ndev_pdev(ndev);
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev->db_mask = ndev->db_valid_mask;
+
+       /* Try to set up msix irq */
+       ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+                                GFP_KERNEL, node);
+       if (!ndev->vec)
+               goto err_msix_vec_alloc;
+
+       ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+                                 GFP_KERNEL, node);
+       if (!ndev->msix)
+               goto err_msix_alloc;
+
+       for (i = 0; i < msix_max; ++i)
+               ndev->msix[i].entry = i;
+
+       msix_count = pci_enable_msix_range(pdev, ndev->msix,
+                                          msix_min, msix_max);
+       if (msix_count < 0)
+               goto err_msix_enable;
+
+       /* NOTE: Disable MSIX if msix count is less than 16 because of
+        * hardware limitation.
+        */
+       if (msix_count < msix_min) {
+               pci_disable_msix(pdev);
+               goto err_msix_enable;
+       }
+
+       for (i = 0; i < msix_count; ++i) {
+               ndev->vec[i].ndev = ndev;
+               ndev->vec[i].num = i;
+               rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+                                "ndev_vec_isr", &ndev->vec[i]);
+               if (rc)
+                       goto err_msix_request;
+       }
+
+       dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+       ndev->db_count = msix_min;
+       ndev->msix_vec_count = msix_max;
+       return 0;
+
+err_msix_request:
+       while (i-- > 0)
+               free_irq(ndev->msix[i].vector, ndev);
+       pci_disable_msix(pdev);
+err_msix_enable:
+       kfree(ndev->msix);
+err_msix_alloc:
+       kfree(ndev->vec);
+err_msix_vec_alloc:
+       ndev->msix = NULL;
+       ndev->vec = NULL;
+
+       /* Try to set up msi irq */
+       rc = pci_enable_msi(pdev);
+       if (rc)
+               goto err_msi_enable;
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_msi_request;
+
+       dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_msi_request:
+       pci_disable_msi(pdev);
+err_msi_enable:
+
+       /* Try to set up intx irq */
+       pci_intx(pdev, 1);
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_intx_request;
+
+       dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_intx_request:
+       return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       void __iomem *mmio = ndev->self_mmio;
+       int i;
+
+       pdev = ndev_pdev(ndev);
+
+       /* Mask all doorbell interrupts */
+       ndev->db_mask = ndev->db_valid_mask;
+       writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+       if (ndev->msix) {
+               i = ndev->msix_vec_count;
+               while (i--)
+                       free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+               pci_disable_msix(pdev);
+               kfree(ndev->msix);
+               kfree(ndev->vec);
+       } else {
+               free_irq(pdev->irq, ndev);
+               if (pci_dev_msi_enabled(pdev))
+                       pci_disable_msi(pdev);
+               else
+                       pci_intx(pdev, 0);
+       }
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct amd_ntb_dev *ndev;
+       void __iomem *mmio;
+       char *buf;
+       size_t buf_size;
+       ssize_t ret, off;
+       union { u64 v64; u32 v32; u16 v16; } u;
+
+       ndev = filp->private_data;
+       mmio = ndev->self_mmio;
+
+       buf_size = min(count, 0x800ul);
+
+       buf = kmalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       off = 0;
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "NTB Device Information:\n");
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Connection Topology -\t%s\n",
+                        ntb_topo_string(ndev->ntb.topo));
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+       if (!amd_link_is_up(ndev)) {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tDown\n");
+       } else {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tUp\n");
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Speed -\t\tPCI-E Gen %u\n",
+                                NTB_LNK_STA_SPEED(ndev->lnk_sta));
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Width -\t\tx%u\n",
+                                NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+       }
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Memory Window Count -\t%u\n", ndev->mw_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Scratchpad Count -\t%u\n", ndev->spad_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Count -\t%u\n", ndev->db_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+       u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+       u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "\nNTB Incoming XLAT:\n");
+
+       u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT1 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT23 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT45 -\t\t%#018llx\n", u.v64);
+
+       u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT1 -\t\t\t%#06x\n", u.v32);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+       kfree(buf);
+       return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+       if (!debugfs_dir) {
+               ndev->debugfs_dir = NULL;
+               ndev->debugfs_info = NULL;
+       } else {
+               ndev->debugfs_dir =
+                       debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+               if (!ndev->debugfs_dir)
+                       ndev->debugfs_info = NULL;
+               else
+                       ndev->debugfs_info =
+                               debugfs_create_file("info", S_IRUSR,
+                                                   ndev->debugfs_dir, ndev,
+                                                   &amd_ntb_debugfs_info);
+       }
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+       debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+                                   struct pci_dev *pdev)
+{
+       ndev->ntb.pdev = pdev;
+       ndev->ntb.topo = NTB_TOPO_NONE;
+       ndev->ntb.ops = &amd_ntb_ops;
+       ndev->int_mask = AMD_EVENT_INTMASK;
+       spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->peer_mmio;
+       u32 reg, stat;
+       int rc;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+       dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+       if (reg == ndev->cntl_sta)
+               return 0;
+
+       ndev->cntl_sta = reg;
+
+       rc = pci_read_config_dword(ndev->ntb.pdev,
+                                  AMD_LINK_STATUS_OFFSET, &stat);
+       if (rc)
+               return 0;
+       ndev->lnk_sta = stat;
+
+       return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+       struct amd_ntb_dev *ndev = hb_ndev(work);
+
+       if (amd_poll_link(ndev))
+               ntb_link_event(&ndev->ntb);
+
+       if (!amd_link_is_up(ndev))
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+       return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (!(reg & AMD_SIDE_READY)) {
+               reg |= AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (reg & AMD_SIDE_READY) {
+               reg &= ~AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+               readl(mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+
+       ndev->mw_count = AMD_MW_CNT;
+       ndev->spad_count = AMD_SPADS_CNT;
+       ndev->db_count = AMD_DB_CNT;
+
+       switch (ndev->ntb.topo) {
+       case NTB_TOPO_PRI:
+       case NTB_TOPO_SEC:
+               ndev->spad_count >>= 1;
+               if (ndev->ntb.topo == NTB_TOPO_PRI) {
+                       ndev->self_spad = 0;
+                       ndev->peer_spad = 0x20;
+               } else {
+                       ndev->self_spad = 0x20;
+                       ndev->peer_spad = 0;
+               }
+
+               INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       default:
+               dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+               return -EINVAL;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       /* Mask event interrupts */
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 info;
+
+       info = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (info & AMD_SIDE_MASK)
+               return NTB_TOPO_SEC;
+       else
+               return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       int rc = 0;
+
+       pdev = ndev_pdev(ndev);
+
+       ndev->ntb.topo = amd_get_topo(ndev);
+       dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+               ntb_topo_string(ndev->ntb.topo));
+
+       rc = amd_init_ntb(ndev);
+       if (rc)
+               return rc;
+
+       rc = amd_init_isr(ndev);
+       if (rc) {
+               dev_err(ndev_dev(ndev), "fail to init isr.\n");
+               return rc;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+       cancel_delayed_work_sync(&ndev->hb_timer);
+
+       ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+                           struct pci_dev *pdev)
+{
+       int rc;
+
+       pci_set_drvdata(pdev, ndev);
+
+       rc = pci_enable_device(pdev);
+       if (rc)
+               goto err_pci_enable;
+
+       rc = pci_request_regions(pdev, NTB_NAME);
+       if (rc)
+               goto err_pci_regions;
+
+       pci_set_master(pdev);
+
+       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+       }
+
+       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+       }
+
+       ndev->self_mmio = pci_iomap(pdev, 0, 0);
+       if (!ndev->self_mmio) {
+               rc = -EIO;
+               goto err_dma_mask;
+       }
+       ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+       return 0;
+
+err_dma_mask:
+       pci_clear_master(pdev);
+err_pci_regions:
+       pci_disable_device(pdev);
+err_pci_enable:
+       pci_set_drvdata(pdev, NULL);
+       return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev = ndev_pdev(ndev);
+
+       pci_iounmap(pdev, ndev->self_mmio);
+
+       pci_clear_master(pdev);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+                            const struct pci_device_id *id)
+{
+       struct amd_ntb_dev *ndev;
+       int rc, node;
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+       if (!ndev) {
+               rc = -ENOMEM;
+               goto err_ndev;
+       }
+
+       ndev_init_struct(ndev, pdev);
+
+       rc = amd_ntb_init_pci(ndev, pdev);
+       if (rc)
+               goto err_init_pci;
+
+       rc = amd_init_dev(ndev);
+       if (rc)
+               goto err_init_dev;
+
+       /* write side info */
+       amd_init_side_info(ndev);
+
+       amd_poll_link(ndev);
+
+       ndev_init_debugfs(ndev);
+
+       rc = ntb_register_device(&ndev->ntb);
+       if (rc)
+               goto err_register;
+
+       dev_info(&pdev->dev, "NTB device registered.\n");
+
+       return 0;
+
+err_register:
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_dev(ndev);
+err_init_dev:
+       amd_ntb_deinit_pci(ndev);
+err_init_pci:
+       kfree(ndev);
+err_ndev:
+       return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+       struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+       ntb_unregister_device(&ndev->ntb);
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_side_info(ndev);
+       amd_deinit_dev(ndev);
+       amd_ntb_deinit_pci(ndev);
+       kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+       {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+       {0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+       .name           = KBUILD_MODNAME,
+       .id_table       = amd_ntb_pci_tbl,
+       .probe          = amd_ntb_pci_probe,
+       .remove         = amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+       pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+       if (debugfs_initialized())
+               debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+       return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+       pci_unregister_driver(&amd_ntb_pci_driver);
+       debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644 (file)
index 0000000..2eac3cd
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB  0x145B
+#define AMD_LINK_HB_TIMEOUT    msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET 0x68
+#define NTB_LIN_STA_ACTIVE_BIT 0x00000002
+#define NTB_LNK_STA_SPEED_MASK 0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK 0x03F00000
+#define NTB_LNK_STA_ACTIVE(x)  (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x)   (((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x)   (((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+       u64 low, high;
+
+       low = readl(mmio);
+       high = readl(mmio + sizeof(u32));
+       return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+       writel(val, mmio);
+       writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+       /* AMD NTB Capability */
+       AMD_MW_CNT              = 3,
+       AMD_DB_CNT              = 16,
+       AMD_MSIX_VECTOR_CNT     = 24,
+       AMD_SPADS_CNT           = 16,
+
+       /*  AMD NTB register offset */
+       AMD_CNTL_OFFSET         = 0x200,
+
+       /* NTB control register bits */
+       PMM_REG_CTL             = BIT(21),
+       SMM_REG_CTL             = BIT(20),
+       SMM_REG_ACC_PATH        = BIT(18),
+       PMM_REG_ACC_PATH        = BIT(17),
+       NTB_CLK_EN              = BIT(16),
+
+       AMD_STA_OFFSET          = 0x204,
+       AMD_PGSLV_OFFSET        = 0x208,
+       AMD_SPAD_MUX_OFFSET     = 0x20C,
+       AMD_SPAD_OFFSET         = 0x210,
+       AMD_RSMU_HCID           = 0x250,
+       AMD_RSMU_SIID           = 0x254,
+       AMD_PSION_OFFSET        = 0x300,
+       AMD_SSION_OFFSET        = 0x330,
+       AMD_MMINDEX_OFFSET      = 0x400,
+       AMD_MMDATA_OFFSET       = 0x404,
+       AMD_SIDEINFO_OFFSET     = 0x408,
+
+       AMD_SIDE_MASK           = BIT(0),
+       AMD_SIDE_READY          = BIT(1),
+
+       /* limit register */
+       AMD_ROMBARLMT_OFFSET    = 0x410,
+       AMD_BAR1LMT_OFFSET      = 0x414,
+       AMD_BAR23LMT_OFFSET     = 0x418,
+       AMD_BAR45LMT_OFFSET     = 0x420,
+       /* xlat address */
+       AMD_POMBARXLAT_OFFSET   = 0x428,
+       AMD_BAR1XLAT_OFFSET     = 0x430,
+       AMD_BAR23XLAT_OFFSET    = 0x438,
+       AMD_BAR45XLAT_OFFSET    = 0x440,
+       /* doorbell and interrupt */
+       AMD_DBFM_OFFSET         = 0x450,
+       AMD_DBREQ_OFFSET        = 0x454,
+       AMD_MIRRDBSTAT_OFFSET   = 0x458,
+       AMD_DBMASK_OFFSET       = 0x45C,
+       AMD_DBSTAT_OFFSET       = 0x460,
+       AMD_INTMASK_OFFSET      = 0x470,
+       AMD_INTSTAT_OFFSET      = 0x474,
+
+       /* event type */
+       AMD_PEER_FLUSH_EVENT    = BIT(0),
+       AMD_PEER_RESET_EVENT    = BIT(1),
+       AMD_PEER_D3_EVENT       = BIT(2),
+       AMD_PEER_PMETO_EVENT    = BIT(3),
+       AMD_PEER_D0_EVENT       = BIT(4),
+       AMD_EVENT_INTMASK       = (AMD_PEER_FLUSH_EVENT |
+                               AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+                               AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+       AMD_PMESTAT_OFFSET      = 0x480,
+       AMD_PMSGTRIG_OFFSET     = 0x490,
+       AMD_LTRLATENCY_OFFSET   = 0x494,
+       AMD_FLUSHTRIG_OFFSET    = 0x498,
+
+       /* SMU register*/
+       AMD_SMUACK_OFFSET       = 0x4A0,
+       AMD_SINRST_OFFSET       = 0x4A4,
+       AMD_RSPNUM_OFFSET       = 0x4A8,
+       AMD_SMU_SPADMUTEX       = 0x4B0,
+       AMD_SMU_SPADOFFSET      = 0x4B4,
+
+       AMD_PEER_OFFSET         = 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+       struct amd_ntb_dev      *ndev;
+       int                     num;
+};
+
+struct amd_ntb_dev {
+       struct ntb_dev ntb;
+
+       u32 ntb_side;
+       u32 lnk_sta;
+       u32 cntl_sta;
+       u32 peer_sta;
+
+       unsigned char mw_count;
+       unsigned char spad_count;
+       unsigned char db_count;
+       unsigned char msix_vec_count;
+
+       u64 db_valid_mask;
+       u64 db_mask;
+       u32 int_mask;
+
+       struct msix_entry *msix;
+       struct amd_ntb_vec *vec;
+
+       /* synchronize rmw access of db_mask and hw reg */
+       spinlock_t db_mask_lock;
+
+       void __iomem *self_mmio;
+       void __iomem *peer_mmio;
+       unsigned int self_spad;
+       unsigned int peer_spad;
+
+       struct delayed_work hb_timer;
+
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
index a198f82982582a155b230e3964db17b6e01479d1..40d04ef5da9e865c8ddecfe5acf431fef5976daa 100644 (file)
@@ -875,7 +875,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
        limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
 
        if (bar < 4 || !ndev->bar4_split) {
-               base = ioread64(mmio + base_reg);
+               base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
@@ -906,7 +906,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
                if ((addr + size) & (~0ull << 32))
                        return -EINVAL;
 
-               base = ioread32(mmio + base_reg);
+               base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
index 2eb4addd10d082c03184469619b6bafece08ddde..3ec149cf656276dd362fad25b37661b69d5fb3d0 100644 (file)
 #define NTB_UNSAFE_DB                  BIT_ULL(0)
 #define NTB_UNSAFE_SPAD                        BIT_ULL(1)
 
+#define NTB_BAR_MASK_64                        ~(0xfull)
+#define NTB_BAR_MASK_32                        ~(0xfu)
+
 struct intel_ntb_dev;
 
 struct intel_ntb_reg {
@@ -334,7 +337,8 @@ struct intel_ntb_dev {
 #define ndev_pdev(ndev) ((ndev)->ntb.pdev)
 #define ndev_name(ndev) pci_name(ndev_pdev(ndev))
 #define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
-#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb)
-#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work)
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+                                    hb_timer.work)
 
 #endif
index 60654d524858c4bf52f6c5b5668c90719f523eda..ec4775f0ec163a2ac4e3d89851bedfb4a0d52db1 100644 (file)
@@ -171,12 +171,14 @@ struct ntb_transport_qp {
        u64 rx_err_ver;
        u64 rx_memcpy;
        u64 rx_async;
+       u64 dma_rx_prep_err;
        u64 tx_bytes;
        u64 tx_pkts;
        u64 tx_ring_full;
        u64 tx_err_no_buf;
        u64 tx_memcpy;
        u64 tx_async;
+       u64 dma_tx_prep_err;
 };
 
 struct ntb_transport_mw {
@@ -249,6 +251,8 @@ enum {
 #define QP_TO_MW(nt, qp)       ((qp) % nt->mw_count)
 #define NTB_QP_DEF_NUM_ENTRIES 100
 #define NTB_LINK_DOWN_TIMEOUT  10
+#define DMA_RETRIES            20
+#define DMA_OUT_RESOURCE_TO    50
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
@@ -501,6 +505,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "free tx - \t%u\n",
                               ntb_transport_tx_free_entry(qp));
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA tx prep err - \t%llu\n",
+                              qp->dma_tx_prep_err);
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA rx prep err - \t%llu\n",
+                              qp->dma_rx_prep_err);
 
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "\n");
@@ -726,6 +736,8 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
        qp->tx_err_no_buf = 0;
        qp->tx_memcpy = 0;
        qp->tx_async = 0;
+       qp->dma_tx_prep_err = 0;
+       qp->dma_rx_prep_err = 0;
 }
 
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1228,6 +1240,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
        struct dmaengine_unmap_data *unmap;
        dma_cookie_t cookie;
        void *buf = entry->buf;
+       int retries = 0;
 
        len = entry->len;
 
@@ -1263,11 +1276,21 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
 
        unmap->from_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
-                                            unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                                    unmap->addr[0], len,
+                                                    DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_rx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_rx_copy_callback;
        txd->callback_param = entry;
@@ -1460,6 +1483,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
        void __iomem *offset;
        size_t len = entry->len;
        void *buf = entry->buf;
+       int retries = 0;
 
        offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
        hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1494,10 +1518,20 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 
        unmap->to_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
+                                                    len, DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_tx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_tx_copy_callback;
        txd->callback_param = entry;
@@ -1532,7 +1566,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
 
        if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
                if (qp->tx_handler)
-                       qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+                       qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
 
                ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
                             &qp->tx_free_q);
index 01852f98a843db8b9d04e4a9e5e115bce756a4b0..a5d0eda44438eefec4b88bad9d31afa484da4c6d 100644 (file)
@@ -17,3 +17,11 @@ config NTB_TOOL
         functioning at a basic level.
 
         If unsure, say N.
+
+config NTB_PERF
+       tristate "NTB RAW Perf Measuring Tool"
+       help
+        This is a tool to measure raw NTB performance by transferring data
+        to and from the window without additional software interaction.
+
+        If unsure, say N.
index 0ea32a324b6ca206a6f9beeaa4b86c75f0cab3e1..9e77e0b761c24fd45c6f435305e2b52e1705edee 100644 (file)
@@ -1,2 +1,3 @@
 obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
 obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
new file mode 100644 (file)
index 0000000..c8a37ba
--- /dev/null
@@ -0,0 +1,748 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *   PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME            "ntb_perf"
+#define DRIVER_DESCRIPTION     "PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE         "Dual BSD/GPL"
+#define DRIVER_VERSION         "1.0"
+#define DRIVER_AUTHOR          "Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT 10
+#define PERF_VERSION           0xffff0001
+#define MAX_THREADS            32
+#define MAX_TEST_SIZE          SZ_1M
+#define MAX_SRCS               32
+#define DMA_OUT_RESOURCE_TO    50
+#define DMA_RETRIES            20
+#define SZ_4G                  (1ULL << 32)
+#define MAX_SEG_ORDER          20 /* no larger than 1M for kmalloc buffer */
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+struct perf_mw {
+       phys_addr_t     phys_addr;
+       resource_size_t phys_size;
+       resource_size_t xlat_align;
+       resource_size_t xlat_align_size;
+       void __iomem    *vbase;
+       size_t          xlat_size;
+       size_t          buf_size;
+       void            *virt_addr;
+       dma_addr_t      dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+       struct task_struct      *thread;
+       struct perf_ctx         *perf;
+       atomic_t                dma_sync;
+       struct dma_chan         *dma_chan;
+       int                     dma_prep_err;
+       int                     src_idx;
+       void                    *srcs[MAX_SRCS];
+};
+
+struct perf_ctx {
+       struct ntb_dev          *ntb;
+       spinlock_t              db_lock;
+       struct perf_mw          mw;
+       bool                    link_is_up;
+       struct work_struct      link_cleanup;
+       struct delayed_work     link_work;
+       struct dentry           *debugfs_node_dir;
+       struct dentry           *debugfs_run;
+       struct dentry           *debugfs_threads;
+       u8                      perf_threads;
+       bool                    run;
+       struct pthr_ctx         pthr_ctx[MAX_THREADS];
+       atomic_t                tsync;
+};
+
+enum {
+       VERSION = 0,
+       MW_SZ_HIGH,
+       MW_SZ_LOW,
+       SPAD_MSG,
+       SPAD_ACK,
+       MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+       struct perf_ctx *perf = ctx;
+
+       if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work, 2*HZ);
+       else
+               schedule_work(&perf->link_cleanup);
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+       struct perf_ctx *perf = ctx;
+       u64 db_bits, db_mask;
+
+       db_mask = ntb_db_vector_mask(perf->ntb, vec);
+       db_bits = ntb_db_read(perf->ntb);
+
+       dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+               vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+       .link_event = perf_link_event,
+       .db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+       struct pthr_ctx *pctx = data;
+
+       atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+                        char *src, size_t size)
+{
+       struct perf_ctx *perf = pctx->perf;
+       struct dma_async_tx_descriptor *txd;
+       struct dma_chan *chan = pctx->dma_chan;
+       struct dma_device *device;
+       struct dmaengine_unmap_data *unmap;
+       dma_cookie_t cookie;
+       size_t src_off, dst_off;
+       struct perf_mw *mw = &perf->mw;
+       u64 vbase, dst_vaddr;
+       dma_addr_t dst_phys;
+       int retries = 0;
+
+       if (!use_dma) {
+               memcpy_toio(dst, src, size);
+               return size;
+       }
+
+       if (!chan) {
+               dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+               return -EINVAL;
+       }
+
+       device = chan->device;
+       src_off = (size_t)src & ~PAGE_MASK;
+       dst_off = (size_t)dst & ~PAGE_MASK;
+
+       if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+               return -ENODEV;
+
+       vbase = (u64)(u64 *)mw->vbase;
+       dst_vaddr = (u64)(u64 *)dst;
+       dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+       unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+       if (!unmap)
+               return -ENOMEM;
+
+       unmap->len = size;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+                                     src_off, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
+
+       do {
+               txd = device->device_prep_dma_memcpy(chan, dst_phys,
+                                                    unmap->addr[0],
+                                                    size, DMA_PREP_INTERRUPT);
+               if (!txd) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(DMA_OUT_RESOURCE_TO);
+               }
+       } while (!txd && (++retries < DMA_RETRIES));
+
+       if (!txd) {
+               pctx->dma_prep_err++;
+               goto err_get_unmap;
+       }
+
+       txd->callback = perf_copy_callback;
+       txd->callback_param = pctx;
+       dma_set_unmap(txd, unmap);
+
+       cookie = dmaengine_submit(txd);
+       if (dma_submit_error(cookie))
+               goto err_set_unmap;
+
+       atomic_inc(&pctx->dma_sync);
+       dma_async_issue_pending(chan);
+
+       return size;
+
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
+       return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+                         u64 buf_size, u64 win_size, u64 total)
+{
+       int chunks, total_chunks, i;
+       int copied_chunks = 0;
+       u64 copied = 0, result;
+       char *tmp = dst;
+       u64 perf, diff_us;
+       ktime_t kstart, kstop, kdiff;
+
+       chunks = div64_u64(win_size, buf_size);
+       total_chunks = div64_u64(total, buf_size);
+       kstart = ktime_get();
+
+       for (i = 0; i < total_chunks; i++) {
+               result = perf_copy(pctx, tmp, src, buf_size);
+               copied += result;
+               copied_chunks++;
+               if (copied_chunks == chunks) {
+                       tmp = dst;
+                       copied_chunks = 0;
+               } else
+                       tmp += buf_size;
+
+               /* Probably should schedule every 4GB to prevent soft hang. */
+               if (((copied % SZ_4G) == 0) && !use_dma) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(1);
+               }
+       }
+
+       if (use_dma) {
+               pr_info("%s: All DMA descriptors submitted\n", current->comm);
+               while (atomic_read(&pctx->dma_sync) != 0)
+                       msleep(20);
+       }
+
+       kstop = ktime_get();
+       kdiff = ktime_sub(kstop, kstart);
+       diff_us = ktime_to_us(kdiff);
+
+       pr_info("%s: copied %llu bytes\n", current->comm, copied);
+
+       pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+       perf = div64_u64(copied, diff_us);
+
+       pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+
+       return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+       return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+       struct pthr_ctx *pctx = data;
+       struct perf_ctx *perf = pctx->perf;
+       struct pci_dev *pdev = perf->ntb->pdev;
+       struct perf_mw *mw = &perf->mw;
+       char *dst;
+       u64 win_size, buf_size, total;
+       void *src;
+       int rc, node, i;
+       struct dma_chan *dma_chan = NULL;
+
+       pr_info("kthread %s starting...\n", current->comm);
+
+       node = dev_to_node(&pdev->dev);
+
+       if (use_dma && !pctx->dma_chan) {
+               dma_cap_mask_t dma_mask;
+
+               dma_cap_zero(dma_mask);
+               dma_cap_set(DMA_MEMCPY, dma_mask);
+               dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+                                              (void *)(unsigned long)node);
+               if (!dma_chan) {
+                       pr_warn("%s: cannot acquire DMA channel, quitting\n",
+                               current->comm);
+                       return -ENODEV;
+               }
+               pctx->dma_chan = dma_chan;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+               if (!pctx->srcs[i]) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+       }
+
+       win_size = mw->phys_size;
+       buf_size = 1ULL << seg_order;
+       total = 1ULL << run_order;
+
+       if (buf_size > MAX_TEST_SIZE)
+               buf_size = MAX_TEST_SIZE;
+
+       dst = (char *)mw->vbase;
+
+       atomic_inc(&perf->tsync);
+       while (atomic_read(&perf->tsync) != perf->perf_threads)
+               schedule();
+
+       src = pctx->srcs[pctx->src_idx];
+       pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+       rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+       atomic_dec(&perf->tsync);
+
+       if (rc < 0) {
+               pr_err("%s: failed\n", current->comm);
+               rc = -ENXIO;
+               goto err;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       return 0;
+
+err:
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       if (dma_chan) {
+               dma_release_channel(dma_chan);
+               pctx->dma_chan = NULL;
+       }
+
+       return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+       struct perf_mw *mw = &perf->mw;
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!mw->virt_addr)
+               return;
+
+       ntb_mw_clear_trans(perf->ntb, 0);
+       dma_free_coherent(&pdev->dev, mw->buf_size,
+                         mw->virt_addr, mw->dma_addr);
+       mw->xlat_size = 0;
+       mw->buf_size = 0;
+       mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+       struct perf_mw *mw = &perf->mw;
+       size_t xlat_size, buf_size;
+
+       if (!size)
+               return -EINVAL;
+
+       xlat_size = round_up(size, mw->xlat_align_size);
+       buf_size = round_up(size, mw->xlat_align);
+
+       if (mw->xlat_size == xlat_size)
+               return 0;
+
+       if (mw->buf_size)
+               perf_free_mw(perf);
+
+       mw->xlat_size = xlat_size;
+       mw->buf_size = buf_size;
+
+       mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+                                          &mw->dma_addr, GFP_KERNEL);
+       if (!mw->virt_addr) {
+               mw->xlat_size = 0;
+               mw->buf_size = 0;
+       }
+
+       return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+       struct perf_ctx *perf =
+               container_of(work, struct perf_ctx, link_work.work);
+       struct ntb_dev *ndev = perf->ntb;
+       struct pci_dev *pdev = ndev->pdev;
+       u32 val;
+       u64 size;
+       int rc;
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       size = perf->mw.phys_size;
+       ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
+       ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
+       ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+
+       /* now read what peer wrote */
+       val = ntb_spad_read(ndev, VERSION);
+       if (val != PERF_VERSION) {
+               dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+               goto out;
+       }
+
+       val = ntb_spad_read(ndev, MW_SZ_HIGH);
+       size = (u64)val << 32;
+
+       val = ntb_spad_read(ndev, MW_SZ_LOW);
+       size |= val;
+
+       dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+       rc = perf_set_mw(perf, size);
+       if (rc)
+               goto out1;
+
+       perf->link_is_up = true;
+
+       return;
+
+out1:
+       perf_free_mw(perf);
+
+out:
+       if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work,
+                                     msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static void perf_link_cleanup(struct work_struct *work)
+{
+       struct perf_ctx *perf = container_of(work,
+                                            struct perf_ctx,
+                                            link_cleanup);
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       if (!perf->link_is_up)
+               cancel_delayed_work_sync(&perf->link_work);
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+       struct perf_mw *mw;
+       int rc;
+
+       mw = &perf->mw;
+
+       rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
+                             &mw->xlat_align, &mw->xlat_align_size);
+       if (rc)
+               return rc;
+
+       perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+       if (!mw->vbase)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+                               size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       char *buf;
+       ssize_t ret, out_offset;
+
+       if (!perf)
+               return 0;
+
+       buf = kmalloc(64, GFP_KERNEL);
+       out_offset = snprintf(buf, 64, "%d\n", perf->run);
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+       kfree(buf);
+
+       return ret;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       int node, i;
+
+       if (!perf->link_is_up)
+               return 0;
+
+       if (perf->perf_threads == 0)
+               return 0;
+
+       if (atomic_read(&perf->tsync) == 0)
+               perf->run = false;
+
+       if (perf->run) {
+               /* lets stop the threads */
+               perf->run = false;
+               for (i = 0; i < MAX_THREADS; i++) {
+                       if (perf->pthr_ctx[i].thread) {
+                               kthread_stop(perf->pthr_ctx[i].thread);
+                               perf->pthr_ctx[i].thread = NULL;
+                       } else
+                               break;
+               }
+       } else {
+               perf->run = true;
+
+               if (perf->perf_threads > MAX_THREADS) {
+                       perf->perf_threads = MAX_THREADS;
+                       pr_info("Reset total threads to: %u\n", MAX_THREADS);
+               }
+
+               /* no greater than 1M */
+               if (seg_order > MAX_SEG_ORDER) {
+                       seg_order = MAX_SEG_ORDER;
+                       pr_info("Fix seg_order to %u\n", seg_order);
+               }
+
+               if (run_order < seg_order) {
+                       run_order = seg_order;
+                       pr_info("Fix run_order to %u\n", run_order);
+               }
+
+               node = dev_to_node(&perf->ntb->pdev->dev);
+               /* launch kernel thread */
+               for (i = 0; i < perf->perf_threads; i++) {
+                       struct pthr_ctx *pctx;
+
+                       pctx = &perf->pthr_ctx[i];
+                       atomic_set(&pctx->dma_sync, 0);
+                       pctx->perf = perf;
+                       pctx->thread =
+                               kthread_create_on_node(ntb_perf_thread,
+                                                      (void *)pctx,
+                                                      node, "ntb_perf %d", i);
+                       if (pctx->thread)
+                               wake_up_process(pctx->thread);
+                       else {
+                               perf->run = false;
+                               for (i = 0; i < MAX_THREADS; i++) {
+                                       if (pctx->thread) {
+                                               kthread_stop(pctx->thread);
+                                               pctx->thread = NULL;
+                                       }
+                               }
+                       }
+
+                       if (perf->run == false)
+                               return -ENXIO;
+               }
+
+       }
+
+       return count;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = debugfs_run_read,
+       .write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!debugfs_initialized())
+               return -ENODEV;
+
+       if (!perf_debugfs_dir) {
+               perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+               if (!perf_debugfs_dir)
+                       return -ENODEV;
+       }
+
+       perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+                                                   perf_debugfs_dir);
+       if (!perf->debugfs_node_dir)
+               return -ENODEV;
+
+       perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+                                               perf->debugfs_node_dir, perf,
+                                               &ntb_perf_debugfs_run);
+       if (!perf->debugfs_run)
+               return -ENODEV;
+
+       perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+                                                 perf->debugfs_node_dir,
+                                                 &perf->perf_threads);
+       if (!perf->debugfs_threads)
+               return -ENODEV;
+
+       return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct pci_dev *pdev = ntb->pdev;
+       struct perf_ctx *perf;
+       int node;
+       int rc = 0;
+
+       node = dev_to_node(&pdev->dev);
+
+       perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+       if (!perf) {
+               rc = -ENOMEM;
+               goto err_perf;
+       }
+
+       perf->ntb = ntb;
+       perf->perf_threads = 1;
+       atomic_set(&perf->tsync, 0);
+       perf->run = false;
+       spin_lock_init(&perf->db_lock);
+       perf_setup_mw(ntb, perf);
+       INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+       INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
+
+       rc = ntb_set_ctx(ntb, perf, &perf_ops);
+       if (rc)
+               goto err_ctx;
+
+       perf->link_is_up = false;
+       ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+       ntb_link_event(ntb);
+
+       rc = perf_debugfs_setup(perf);
+       if (rc)
+               goto err_ctx;
+
+       return 0;
+
+err_ctx:
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+       kfree(perf);
+err_perf:
+       return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct perf_ctx *perf = ntb->ctx;
+       int i;
+
+       dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+
+       ntb_clear_ctx(ntb);
+       ntb_link_disable(ntb);
+
+       debugfs_remove_recursive(perf_debugfs_dir);
+       perf_debugfs_dir = NULL;
+
+       if (use_dma) {
+               for (i = 0; i < MAX_THREADS; i++) {
+                       struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+                       if (pctx->dma_chan)
+                               dma_release_channel(pctx->dma_chan);
+               }
+       }
+
+       kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+       .ops = {
+               .probe = perf_probe,
+               .remove = perf_remove,
+       },
+};
+module_ntb_client(perf_client);
index 7e2c43f701bc451463c5273ed3a5a1e49f6be15f..5d28e9405f32fb96dc79049d5cc4dbca01af14c3 100644 (file)
@@ -382,18 +382,18 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
        [ND_CMD_ARS_CAP] = {
                .in_num = 2,
                .in_sizes = { 8, 8, },
-               .out_num = 2,
-               .out_sizes = { 4, 4, },
+               .out_num = 4,
+               .out_sizes = { 4, 4, 4, 4, },
        },
        [ND_CMD_ARS_START] = {
-               .in_num = 4,
-               .in_sizes = { 8, 8, 2, 6, },
-               .out_num = 1,
-               .out_sizes = { 4, },
+               .in_num = 5,
+               .in_sizes = { 8, 8, 2, 1, 5, },
+               .out_num = 2,
+               .out_sizes = { 4, 4, },
        },
        [ND_CMD_ARS_STATUS] = {
-               .out_num = 2,
-               .out_sizes = { 4, UINT_MAX, },
+               .out_num = 3,
+               .out_sizes = { 4, 4, UINT_MAX, },
        },
 };
 
@@ -442,8 +442,8 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
                return in_field[1];
        else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
                return out_field[1];
-       else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
-               return ND_CMD_ARS_STATUS_MAX;
+       else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2)
+               return out_field[1] - 8;
 
        return UINT_MAX;
 }
index 8ebfcaae3f5a06a4aca19611fc7c5abdeb29f16a..9edf7eb7d17cc39713e67e4dd62d4e16c0d93e07 100644 (file)
@@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev,
 
        device_lock(dev);
        claim = ndns->claim;
-       if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim)))
-               mode = "memory";
-       else if (claim && is_nd_btt(claim))
+       if (claim && is_nd_btt(claim))
                mode = "safe";
+       else if (claim && is_nd_pfn(claim))
+               mode = "memory";
+       else if (!claim && pmem_should_map_pages(dev))
+               mode = "memory";
        else
                mode = "raw";
        rc = sprintf(buf, "%s\n", mode);
index 0cc9048b86e23103900d36f75a0b64746521ee18..ae81a2f1da50b1e9f9cf5a14bd180df19b3ccab1 100644 (file)
@@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
 
        switch (le32_to_cpu(pfn_sb->mode)) {
        case PFN_MODE_RAM:
-               break;
        case PFN_MODE_PMEM:
-               /* TODO: allocate from PMEM support */
-               return -ENOTTY;
+               break;
        default:
                return -ENXIO;
        }
index 7edf31671dabed8f8d193806f54f3d1b30401e7c..8d0b546701848eb2b57d87b7207490133e5c32a9 100644 (file)
@@ -41,7 +41,7 @@ struct pmem_device {
        phys_addr_t             phys_addr;
        /* when non-zero this device is hosting a 'pfn' instance */
        phys_addr_t             data_offset;
-       unsigned long           pfn_flags;
+       u64                     pfn_flags;
        void __pmem             *virt_addr;
        size_t                  size;
        struct badblocks        bb;
index 002a94abdbc45c7f20255b804efae684185283c9..b586d84f251881801ae75023fd3fd6e6bd884047 100644 (file)
@@ -8,3 +8,15 @@ config BLK_DEV_NVME
 
          To compile this driver as a module, choose M here: the
          module will be called nvme.
+
+config BLK_DEV_NVME_SCSI
+       bool "SCSI emulation for NVMe device nodes"
+       depends on BLK_DEV_NVME
+       ---help---
+         This adds support for the SG_IO ioctl on the NVMe character
+         and block devices nodes, as well a a translation for a small
+         number of selected SCSI commands to NVMe commands to the NVMe
+         driver.  If you don't know what this means you probably want
+         to say N here, unless you run a distro that abuses the SCSI
+         emulation to provide stable device names for mount by id, like
+         some OpenSuSE and SLES versions.
index a5fe239525868b1b86ec2d5337cfdb5e324424d6..51bf90871549844c7f4db638a7ec26c4b4b9aa51 100644 (file)
@@ -1,5 +1,6 @@
 
 obj-$(CONFIG_BLK_DEV_NVME)     += nvme.o
 
-lightnvm-$(CONFIG_NVM) := lightnvm.o
-nvme-y         += pci.o scsi.o $(lightnvm-y)
+lightnvm-$(CONFIG_NVM)                 := lightnvm.o
+nvme-y                                 += core.o pci.o $(lightnvm-y)
+nvme-$(CONFIG_BLK_DEV_NVME_SCSI)        += scsi.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
new file mode 100644 (file)
index 0000000..3cd921e
--- /dev/null
@@ -0,0 +1,1471 @@
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/hdreg.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/pr.h>
+#include <linux/ptrace.h>
+#include <linux/nvme_ioctl.h>
+#include <linux/t10-pi.h>
+#include <scsi/sg.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+#define NVME_MINORS            (1U << MINORBITS)
+
+static int nvme_major;
+module_param(nvme_major, int, 0);
+
+static int nvme_char_major;
+module_param(nvme_char_major, int, 0);
+
+static LIST_HEAD(nvme_ctrl_list);
+DEFINE_SPINLOCK(dev_list_lock);
+
+static struct class *nvme_class;
+
+static void nvme_free_ns(struct kref *kref)
+{
+       struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+       if (ns->type == NVME_NS_LIGHTNVM)
+               nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
+
+       spin_lock(&dev_list_lock);
+       ns->disk->private_data = NULL;
+       spin_unlock(&dev_list_lock);
+
+       nvme_put_ctrl(ns->ctrl);
+       put_disk(ns->disk);
+       kfree(ns);
+}
+
+static void nvme_put_ns(struct nvme_ns *ns)
+{
+       kref_put(&ns->kref, nvme_free_ns);
+}
+
+static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
+{
+       struct nvme_ns *ns;
+
+       spin_lock(&dev_list_lock);
+       ns = disk->private_data;
+       if (ns && !kref_get_unless_zero(&ns->kref))
+               ns = NULL;
+       spin_unlock(&dev_list_lock);
+
+       return ns;
+}
+
+void nvme_requeue_req(struct request *req)
+{
+       unsigned long flags;
+
+       blk_mq_requeue_request(req);
+       spin_lock_irqsave(req->q->queue_lock, flags);
+       if (!blk_queue_stopped(req->q))
+               blk_mq_kick_requeue_list(req->q);
+       spin_unlock_irqrestore(req->q->queue_lock, flags);
+}
+
+struct request *nvme_alloc_request(struct request_queue *q,
+               struct nvme_command *cmd, unsigned int flags)
+{
+       bool write = cmd->common.opcode & 1;
+       struct request *req;
+
+       req = blk_mq_alloc_request(q, write, flags);
+       if (IS_ERR(req))
+               return req;
+
+       req->cmd_type = REQ_TYPE_DRV_PRIV;
+       req->cmd_flags |= REQ_FAILFAST_DRIVER;
+       req->__data_len = 0;
+       req->__sector = (sector_t) -1;
+       req->bio = req->biotail = NULL;
+
+       req->cmd = (unsigned char *)cmd;
+       req->cmd_len = sizeof(struct nvme_command);
+       req->special = (void *)0;
+
+       return req;
+}
+
+/*
+ * Returns 0 on success.  If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+               void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
+{
+       struct request *req;
+       int ret;
+
+       req = nvme_alloc_request(q, cmd, 0);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+       if (buffer && bufflen) {
+               ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
+               if (ret)
+                       goto out;
+       }
+
+       blk_execute_rq(req->q, NULL, req, 0);
+       if (result)
+               *result = (u32)(uintptr_t)req->special;
+       ret = req->errors;
+ out:
+       blk_mq_free_request(req);
+       return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+               void *buffer, unsigned bufflen)
+{
+       return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
+}
+
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+               void __user *ubuffer, unsigned bufflen,
+               void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+               u32 *result, unsigned timeout)
+{
+       bool write = cmd->common.opcode & 1;
+       struct nvme_ns *ns = q->queuedata;
+       struct gendisk *disk = ns ? ns->disk : NULL;
+       struct request *req;
+       struct bio *bio = NULL;
+       void *meta = NULL;
+       int ret;
+
+       req = nvme_alloc_request(q, cmd, 0);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+       if (ubuffer && bufflen) {
+               ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+                               GFP_KERNEL);
+               if (ret)
+                       goto out;
+               bio = req->bio;
+
+               if (!disk)
+                       goto submit;
+               bio->bi_bdev = bdget_disk(disk, 0);
+               if (!bio->bi_bdev) {
+                       ret = -ENODEV;
+                       goto out_unmap;
+               }
+
+               if (meta_buffer) {
+                       struct bio_integrity_payload *bip;
+
+                       meta = kmalloc(meta_len, GFP_KERNEL);
+                       if (!meta) {
+                               ret = -ENOMEM;
+                               goto out_unmap;
+                       }
+
+                       if (write) {
+                               if (copy_from_user(meta, meta_buffer,
+                                               meta_len)) {
+                                       ret = -EFAULT;
+                                       goto out_free_meta;
+                               }
+                       }
+
+                       bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+                       if (IS_ERR(bip)) {
+                               ret = PTR_ERR(bip);
+                               goto out_free_meta;
+                       }
+
+                       bip->bip_iter.bi_size = meta_len;
+                       bip->bip_iter.bi_sector = meta_seed;
+
+                       ret = bio_integrity_add_page(bio, virt_to_page(meta),
+                                       meta_len, offset_in_page(meta));
+                       if (ret != meta_len) {
+                               ret = -ENOMEM;
+                               goto out_free_meta;
+                       }
+               }
+       }
+ submit:
+       blk_execute_rq(req->q, disk, req, 0);
+       ret = req->errors;
+       if (result)
+               *result = (u32)(uintptr_t)req->special;
+       if (meta && !ret && !write) {
+               if (copy_to_user(meta_buffer, meta, meta_len))
+                       ret = -EFAULT;
+       }
+ out_free_meta:
+       kfree(meta);
+ out_unmap:
+       if (bio) {
+               if (disk && bio->bi_bdev)
+                       bdput(bio->bi_bdev);
+               blk_rq_unmap_user(bio);
+       }
+ out:
+       blk_mq_free_request(req);
+       return ret;
+}
+
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+               void __user *ubuffer, unsigned bufflen, u32 *result,
+               unsigned timeout)
+{
+       return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
+                       result, timeout);
+}
+
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+{
+       struct nvme_command c = { };
+       int error;
+
+       /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+       c.identify.opcode = nvme_admin_identify;
+       c.identify.cns = cpu_to_le32(1);
+
+       *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+       if (!*id)
+               return -ENOMEM;
+
+       error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+                       sizeof(struct nvme_id_ctrl));
+       if (error)
+               kfree(*id);
+       return error;
+}
+
+static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
+{
+       struct nvme_command c = { };
+
+       c.identify.opcode = nvme_admin_identify;
+       c.identify.cns = cpu_to_le32(2);
+       c.identify.nsid = cpu_to_le32(nsid);
+       return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
+}
+
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+               struct nvme_id_ns **id)
+{
+       struct nvme_command c = { };
+       int error;
+
+       /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+       c.identify.opcode = nvme_admin_identify,
+       c.identify.nsid = cpu_to_le32(nsid),
+
+       *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+       if (!*id)
+               return -ENOMEM;
+
+       error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+                       sizeof(struct nvme_id_ns));
+       if (error)
+               kfree(*id);
+       return error;
+}
+
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
+                                       dma_addr_t dma_addr, u32 *result)
+{
+       struct nvme_command c;
+
+       memset(&c, 0, sizeof(c));
+       c.features.opcode = nvme_admin_get_features;
+       c.features.nsid = cpu_to_le32(nsid);
+       c.features.prp1 = cpu_to_le64(dma_addr);
+       c.features.fid = cpu_to_le32(fid);
+
+       return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+                                       dma_addr_t dma_addr, u32 *result)
+{
+       struct nvme_command c;
+
+       memset(&c, 0, sizeof(c));
+       c.features.opcode = nvme_admin_set_features;
+       c.features.prp1 = cpu_to_le64(dma_addr);
+       c.features.fid = cpu_to_le32(fid);
+       c.features.dword11 = cpu_to_le32(dword11);
+
+       return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
+{
+       struct nvme_command c = { };
+       int error;
+
+       c.common.opcode = nvme_admin_get_log_page,
+       c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+       c.common.cdw10[0] = cpu_to_le32(
+                       (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+                        NVME_LOG_SMART),
+
+       *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+       if (!*log)
+               return -ENOMEM;
+
+       error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+                       sizeof(struct nvme_smart_log));
+       if (error)
+               kfree(*log);
+       return error;
+}
+
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
+{
+       u32 q_count = (*count - 1) | ((*count - 1) << 16);
+       u32 result;
+       int status, nr_io_queues;
+
+       status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
+                       &result);
+       if (status)
+               return status;
+
+       nr_io_queues = min(result & 0xffff, result >> 16) + 1;
+       *count = min(*count, nr_io_queues);
+       return 0;
+}
+
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+       struct nvme_user_io io;
+       struct nvme_command c;
+       unsigned length, meta_len;
+       void __user *metadata;
+
+       if (copy_from_user(&io, uio, sizeof(io)))
+               return -EFAULT;
+
+       switch (io.opcode) {
+       case nvme_cmd_write:
+       case nvme_cmd_read:
+       case nvme_cmd_compare:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       length = (io.nblocks + 1) << ns->lba_shift;
+       meta_len = (io.nblocks + 1) * ns->ms;
+       metadata = (void __user *)(uintptr_t)io.metadata;
+
+       if (ns->ext) {
+               length += meta_len;
+               meta_len = 0;
+       } else if (meta_len) {
+               if ((io.metadata & 3) || !io.metadata)
+                       return -EINVAL;
+       }
+
+       memset(&c, 0, sizeof(c));
+       c.rw.opcode = io.opcode;
+       c.rw.flags = io.flags;
+       c.rw.nsid = cpu_to_le32(ns->ns_id);
+       c.rw.slba = cpu_to_le64(io.slba);
+       c.rw.length = cpu_to_le16(io.nblocks);
+       c.rw.control = cpu_to_le16(io.control);
+       c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
+       c.rw.reftag = cpu_to_le32(io.reftag);
+       c.rw.apptag = cpu_to_le16(io.apptag);
+       c.rw.appmask = cpu_to_le16(io.appmask);
+
+       return __nvme_submit_user_cmd(ns->queue, &c,
+                       (void __user *)(uintptr_t)io.addr, length,
+                       metadata, meta_len, io.slba, NULL, 0);
+}
+
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+                       struct nvme_passthru_cmd __user *ucmd)
+{
+       struct nvme_passthru_cmd cmd;
+       struct nvme_command c;
+       unsigned timeout = 0;
+       int status;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+       if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+               return -EFAULT;
+
+       memset(&c, 0, sizeof(c));
+       c.common.opcode = cmd.opcode;
+       c.common.flags = cmd.flags;
+       c.common.nsid = cpu_to_le32(cmd.nsid);
+       c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+       c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+       c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+       c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+       c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+       c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+       c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+       c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+       if (cmd.timeout_ms)
+               timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+       status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+                       (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+                       &cmd.result, timeout);
+       if (status >= 0) {
+               if (put_user(cmd.result, &ucmd->result))
+                       return -EFAULT;
+       }
+
+       return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+               unsigned int cmd, unsigned long arg)
+{
+       struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+       switch (cmd) {
+       case NVME_IOCTL_ID:
+               force_successful_syscall_return();
+               return ns->ns_id;
+       case NVME_IOCTL_ADMIN_CMD:
+               return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
+       case NVME_IOCTL_IO_CMD:
+               return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
+       case NVME_IOCTL_SUBMIT_IO:
+               return nvme_submit_io(ns, (void __user *)arg);
+#ifdef CONFIG_BLK_DEV_NVME_SCSI
+       case SG_GET_VERSION_NUM:
+               return nvme_sg_get_version_num((void __user *)arg);
+       case SG_IO:
+               return nvme_sg_io(ns, (void __user *)arg);
+#endif
+       default:
+               return -ENOTTY;
+       }
+}
+
+#ifdef CONFIG_COMPAT
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+                       unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case SG_IO:
+               return -ENOIOCTLCMD;
+       }
+       return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl      NULL
+#endif
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+       return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+}
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+       nvme_put_ns(disk->private_data);
+}
+
+static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       /* some standard values */
+       geo->heads = 1 << 6;
+       geo->sectors = 1 << 5;
+       geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
+       return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+       struct blk_integrity integrity;
+
+       switch (ns->pi_type) {
+       case NVME_NS_DPS_PI_TYPE3:
+               integrity.profile = &t10_pi_type3_crc;
+               break;
+       case NVME_NS_DPS_PI_TYPE1:
+       case NVME_NS_DPS_PI_TYPE2:
+               integrity.profile = &t10_pi_type1_crc;
+               break;
+       default:
+               integrity.profile = NULL;
+               break;
+       }
+       integrity.tuple_size = ns->ms;
+       blk_integrity_register(ns->disk, &integrity);
+       blk_queue_max_integrity_segments(ns->queue, 1);
+}
+#else
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+static void nvme_config_discard(struct nvme_ns *ns)
+{
+       u32 logical_block_size = queue_logical_block_size(ns->queue);
+       ns->queue->limits.discard_zeroes_data = 0;
+       ns->queue->limits.discard_alignment = logical_block_size;
+       ns->queue->limits.discard_granularity = logical_block_size;
+       blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+}
+
+static int nvme_revalidate_disk(struct gendisk *disk)
+{
+       struct nvme_ns *ns = disk->private_data;
+       struct nvme_id_ns *id;
+       u8 lbaf, pi_type;
+       u16 old_ms;
+       unsigned short bs;
+
+       if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
+               dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
+                               __func__, ns->ctrl->instance, ns->ns_id);
+               return -ENODEV;
+       }
+       if (id->ncap == 0) {
+               kfree(id);
+               return -ENODEV;
+       }
+
+       if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
+               if (nvme_nvm_register(ns->queue, disk->disk_name)) {
+                       dev_warn(ns->ctrl->dev,
+                               "%s: LightNVM init failure\n", __func__);
+                       kfree(id);
+                       return -ENODEV;
+               }
+               ns->type = NVME_NS_LIGHTNVM;
+       }
+
+       if (ns->ctrl->vs >= NVME_VS(1, 1))
+               memcpy(ns->eui, id->eui64, sizeof(ns->eui));
+       if (ns->ctrl->vs >= NVME_VS(1, 2))
+               memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
+
+       old_ms = ns->ms;
+       lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
+       ns->lba_shift = id->lbaf[lbaf].ds;
+       ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+       ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+       /*
+        * If identify namespace failed, use default 512 byte block size so
+        * block layer can use before failing read/write for 0 capacity.
+        */
+       if (ns->lba_shift == 0)
+               ns->lba_shift = 9;
+       bs = 1 << ns->lba_shift;
+       /* XXX: PI implementation requires metadata equal t10 pi tuple size */
+       pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
+                                       id->dps & NVME_NS_DPS_PI_MASK : 0;
+
+       blk_mq_freeze_queue(disk->queue);
+       if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
+                               ns->ms != old_ms ||
+                               bs != queue_logical_block_size(disk->queue) ||
+                               (ns->ms && ns->ext)))
+               blk_integrity_unregister(disk);
+
+       ns->pi_type = pi_type;
+       blk_queue_logical_block_size(ns->queue, bs);
+
+       if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
+               nvme_init_integrity(ns);
+       if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
+               set_capacity(disk, 0);
+       else
+               set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+       if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+               nvme_config_discard(ns);
+       blk_mq_unfreeze_queue(disk->queue);
+
+       kfree(id);
+       return 0;
+}
+
+static char nvme_pr_type(enum pr_type type)
+{
+       switch (type) {
+       case PR_WRITE_EXCLUSIVE:
+               return 1;
+       case PR_EXCLUSIVE_ACCESS:
+               return 2;
+       case PR_WRITE_EXCLUSIVE_REG_ONLY:
+               return 3;
+       case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+               return 4;
+       case PR_WRITE_EXCLUSIVE_ALL_REGS:
+               return 5;
+       case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+               return 6;
+       default:
+               return 0;
+       }
+};
+
+static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
+                               u64 key, u64 sa_key, u8 op)
+{
+       struct nvme_ns *ns = bdev->bd_disk->private_data;
+       struct nvme_command c;
+       u8 data[16] = { 0, };
+
+       put_unaligned_le64(key, &data[0]);
+       put_unaligned_le64(sa_key, &data[8]);
+
+       memset(&c, 0, sizeof(c));
+       c.common.opcode = op;
+       c.common.nsid = cpu_to_le32(ns->ns_id);
+       c.common.cdw10[0] = cpu_to_le32(cdw10);
+
+       return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
+}
+
+static int nvme_pr_register(struct block_device *bdev, u64 old,
+               u64 new, unsigned flags)
+{
+       u32 cdw10;
+
+       if (flags & ~PR_FL_IGNORE_KEY)
+               return -EOPNOTSUPP;
+
+       cdw10 = old ? 2 : 0;
+       cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
+       cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
+       return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_reserve(struct block_device *bdev, u64 key,
+               enum pr_type type, unsigned flags)
+{
+       u32 cdw10;
+
+       if (flags & ~PR_FL_IGNORE_KEY)
+               return -EOPNOTSUPP;
+
+       cdw10 = nvme_pr_type(type) << 8;
+       cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
+       return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+               enum pr_type type, bool abort)
+{
+       u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+       return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_clear(struct block_device *bdev, u64 key)
+{
+       u32 cdw10 = 1 | (key ? 1 << 3 : 0);
+       return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+       u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+       return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static const struct pr_ops nvme_pr_ops = {
+       .pr_register    = nvme_pr_register,
+       .pr_reserve     = nvme_pr_reserve,
+       .pr_release     = nvme_pr_release,
+       .pr_preempt     = nvme_pr_preempt,
+       .pr_clear       = nvme_pr_clear,
+};
+
+static const struct block_device_operations nvme_fops = {
+       .owner          = THIS_MODULE,
+       .ioctl          = nvme_ioctl,
+       .compat_ioctl   = nvme_compat_ioctl,
+       .open           = nvme_open,
+       .release        = nvme_release,
+       .getgeo         = nvme_getgeo,
+       .revalidate_disk= nvme_revalidate_disk,
+       .pr_ops         = &nvme_pr_ops,
+};
+
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
+{
+       unsigned long timeout =
+               ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+       u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
+       int ret;
+
+       while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+               if ((csts & NVME_CSTS_RDY) == bit)
+                       break;
+
+               msleep(100);
+               if (fatal_signal_pending(current))
+                       return -EINTR;
+               if (time_after(jiffies, timeout)) {
+                       dev_err(ctrl->dev,
+                               "Device not ready; aborting %s\n", enabled ?
+                                               "initialisation" : "reset");
+                       return -ENODEV;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * If the device has been passed off to us in an enabled state, just clear
+ * the enabled bit.  The spec says we should set the 'shutdown notification
+ * bits', but doing so may cause the device to complete commands to the
+ * admin queue ... and we don't know what memory that might be pointing at!
+ */
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+       int ret;
+
+       ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+       ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+
+       ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+       if (ret)
+               return ret;
+       return nvme_wait_ready(ctrl, cap, false);
+}
+
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+       /*
+        * Default to a 4K page size, with the intention to update this
+        * path in the future to accomodate architectures with differing
+        * kernel and IO page sizes.
+        */
+       unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
+       int ret;
+
+       if (page_shift < dev_page_min) {
+               dev_err(ctrl->dev,
+                       "Minimum device page size %u too large for host (%u)\n",
+                       1 << dev_page_min, 1 << page_shift);
+               return -ENODEV;
+       }
+
+       ctrl->page_size = 1 << page_shift;
+
+       ctrl->ctrl_config = NVME_CC_CSS_NVM;
+       ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
+       ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+       ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+       ctrl->ctrl_config |= NVME_CC_ENABLE;
+
+       ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+       if (ret)
+               return ret;
+       return nvme_wait_ready(ctrl, cap, true);
+}
+
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
+{
+       unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+       u32 csts;
+       int ret;
+
+       ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+       ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+
+       ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+       if (ret)
+               return ret;
+
+       while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+               if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
+                       break;
+
+               msleep(100);
+               if (fatal_signal_pending(current))
+                       return -EINTR;
+               if (time_after(jiffies, timeout)) {
+                       dev_err(ctrl->dev,
+                               "Device shutdown incomplete; abort shutdown\n");
+                       return -ENODEV;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * Initialize the cached copies of the Identify data and various controller
+ * register in our nvme_ctrl structure.  This should be called as soon as
+ * the admin queue is fully up and running.
+ */
+int nvme_init_identify(struct nvme_ctrl *ctrl)
+{
+       struct nvme_id_ctrl *id;
+       u64 cap;
+       int ret, page_shift;
+
+       ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+       if (ret) {
+               dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+               return ret;
+       }
+
+       ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
+       if (ret) {
+               dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+               return ret;
+       }
+       page_shift = NVME_CAP_MPSMIN(cap) + 12;
+
+       if (ctrl->vs >= NVME_VS(1, 1))
+               ctrl->subsystem = NVME_CAP_NSSRC(cap);
+
+       ret = nvme_identify_ctrl(ctrl, &id);
+       if (ret) {
+               dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+               return -EIO;
+       }
+
+       ctrl->oncs = le16_to_cpup(&id->oncs);
+       atomic_set(&ctrl->abort_limit, id->acl + 1);
+       ctrl->vwc = id->vwc;
+       memcpy(ctrl->serial, id->sn, sizeof(id->sn));
+       memcpy(ctrl->model, id->mn, sizeof(id->mn));
+       memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
+       if (id->mdts)
+               ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
+       else
+               ctrl->max_hw_sectors = UINT_MAX;
+
+       if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
+               unsigned int max_hw_sectors;
+
+               ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
+               max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
+               if (ctrl->max_hw_sectors) {
+                       ctrl->max_hw_sectors = min(max_hw_sectors,
+                                                       ctrl->max_hw_sectors);
+               } else {
+                       ctrl->max_hw_sectors = max_hw_sectors;
+               }
+       }
+
+       kfree(id);
+       return 0;
+}
+
+static int nvme_dev_open(struct inode *inode, struct file *file)
+{
+       struct nvme_ctrl *ctrl;
+       int instance = iminor(inode);
+       int ret = -ENODEV;
+
+       spin_lock(&dev_list_lock);
+       list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
+               if (ctrl->instance != instance)
+                       continue;
+
+               if (!ctrl->admin_q) {
+                       ret = -EWOULDBLOCK;
+                       break;
+               }
+               if (!kref_get_unless_zero(&ctrl->kref))
+                       break;
+               file->private_data = ctrl;
+               ret = 0;
+               break;
+       }
+       spin_unlock(&dev_list_lock);
+
+       return ret;
+}
+
+static int nvme_dev_release(struct inode *inode, struct file *file)
+{
+       nvme_put_ctrl(file->private_data);
+       return 0;
+}
+
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+{
+       struct nvme_ns *ns;
+       int ret;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       if (list_empty(&ctrl->namespaces)) {
+               ret = -ENOTTY;
+               goto out_unlock;
+       }
+
+       ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+       if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
+               dev_warn(ctrl->dev,
+                       "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       dev_warn(ctrl->dev,
+               "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
+       kref_get(&ns->kref);
+       mutex_unlock(&ctrl->namespaces_mutex);
+
+       ret = nvme_user_cmd(ctrl, ns, argp);
+       nvme_put_ns(ns);
+       return ret;
+
+out_unlock:
+       mutex_unlock(&ctrl->namespaces_mutex);
+       return ret;
+}
+
+static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+               unsigned long arg)
+{
+       struct nvme_ctrl *ctrl = file->private_data;
+       void __user *argp = (void __user *)arg;
+
+       switch (cmd) {
+       case NVME_IOCTL_ADMIN_CMD:
+               return nvme_user_cmd(ctrl, NULL, argp);
+       case NVME_IOCTL_IO_CMD:
+               return nvme_dev_user_cmd(ctrl, argp);
+       case NVME_IOCTL_RESET:
+               dev_warn(ctrl->dev, "resetting controller\n");
+               return ctrl->ops->reset_ctrl(ctrl);
+       case NVME_IOCTL_SUBSYS_RESET:
+               return nvme_reset_subsystem(ctrl);
+       default:
+               return -ENOTTY;
+       }
+}
+
+static const struct file_operations nvme_dev_fops = {
+       .owner          = THIS_MODULE,
+       .open           = nvme_dev_open,
+       .release        = nvme_dev_release,
+       .unlocked_ioctl = nvme_dev_ioctl,
+       .compat_ioctl   = nvme_dev_ioctl,
+};
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t count)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+       int ret;
+
+       ret = ctrl->ops->reset_ctrl(ctrl);
+       if (ret < 0)
+               return ret;
+       return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+                                                               char *buf)
+{
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+       return sprintf(buf, "%pU\n", ns->uuid);
+}
+static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
+
+static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
+                                                               char *buf)
+{
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+       return sprintf(buf, "%8phd\n", ns->eui);
+}
+static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
+
+static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
+                                                               char *buf)
+{
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+       return sprintf(buf, "%d\n", ns->ns_id);
+}
+static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
+
+static struct attribute *nvme_ns_attrs[] = {
+       &dev_attr_uuid.attr,
+       &dev_attr_eui.attr,
+       &dev_attr_nsid.attr,
+       NULL,
+};
+
+static umode_t nvme_attrs_are_visible(struct kobject *kobj,
+               struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+
+       if (a == &dev_attr_uuid.attr) {
+               if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+                       return 0;
+       }
+       if (a == &dev_attr_eui.attr) {
+               if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
+                       return 0;
+       }
+       return a->mode;
+}
+
+static const struct attribute_group nvme_ns_attr_group = {
+       .attrs          = nvme_ns_attrs,
+       .is_visible     = nvme_attrs_are_visible,
+};
+
+#define nvme_show_function(field)                                              \
+static ssize_t  field##_show(struct device *dev,                               \
+                           struct device_attribute *attr, char *buf)           \
+{                                                                              \
+        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);                         \
+        return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field);  \
+}                                                                              \
+static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
+
+nvme_show_function(model);
+nvme_show_function(serial);
+nvme_show_function(firmware_rev);
+
+static struct attribute *nvme_dev_attrs[] = {
+       &dev_attr_reset_controller.attr,
+       &dev_attr_model.attr,
+       &dev_attr_serial.attr,
+       &dev_attr_firmware_rev.attr,
+       NULL
+};
+
+static struct attribute_group nvme_dev_attrs_group = {
+       .attrs = nvme_dev_attrs,
+};
+
+static const struct attribute_group *nvme_dev_attr_groups[] = {
+       &nvme_dev_attrs_group,
+       NULL,
+};
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+       struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+       struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+       return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+       struct nvme_ns *ns;
+
+       lockdep_assert_held(&ctrl->namespaces_mutex);
+
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               if (ns->ns_id == nsid)
+                       return ns;
+               if (ns->ns_id > nsid)
+                       break;
+       }
+       return NULL;
+}
+
+static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+       struct nvme_ns *ns;
+       struct gendisk *disk;
+       int node = dev_to_node(ctrl->dev);
+
+       lockdep_assert_held(&ctrl->namespaces_mutex);
+
+       ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
+       if (!ns)
+               return;
+
+       ns->queue = blk_mq_init_queue(ctrl->tagset);
+       if (IS_ERR(ns->queue))
+               goto out_free_ns;
+       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+       ns->queue->queuedata = ns;
+       ns->ctrl = ctrl;
+
+       disk = alloc_disk_node(0, node);
+       if (!disk)
+               goto out_free_queue;
+
+       kref_init(&ns->kref);
+       ns->ns_id = nsid;
+       ns->disk = disk;
+       ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
+
+       blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+       if (ctrl->max_hw_sectors) {
+               blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
+               blk_queue_max_segments(ns->queue,
+                       (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
+       }
+       if (ctrl->stripe_size)
+               blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
+       if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+               blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
+       blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
+
+       disk->major = nvme_major;
+       disk->first_minor = 0;
+       disk->fops = &nvme_fops;
+       disk->private_data = ns;
+       disk->queue = ns->queue;
+       disk->driverfs_dev = ctrl->device;
+       disk->flags = GENHD_FL_EXT_DEVT;
+       sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
+
+       if (nvme_revalidate_disk(ns->disk))
+               goto out_free_disk;
+
+       list_add_tail(&ns->list, &ctrl->namespaces);
+       kref_get(&ctrl->kref);
+       if (ns->type == NVME_NS_LIGHTNVM)
+               return;
+
+       add_disk(ns->disk);
+       if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
+                                       &nvme_ns_attr_group))
+               pr_warn("%s: failed to create sysfs group for identification\n",
+                       ns->disk->disk_name);
+       return;
+ out_free_disk:
+       kfree(disk);
+ out_free_queue:
+       blk_cleanup_queue(ns->queue);
+ out_free_ns:
+       kfree(ns);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+       bool kill = nvme_io_incapable(ns->ctrl) &&
+                       !blk_queue_dying(ns->queue);
+
+       lockdep_assert_held(&ns->ctrl->namespaces_mutex);
+
+       if (kill) {
+               blk_set_queue_dying(ns->queue);
+
+               /*
+                * The controller was shutdown first if we got here through
+                * device removal. The shutdown may requeue outstanding
+                * requests. These need to be aborted immediately so
+                * del_gendisk doesn't block indefinitely for their completion.
+                */
+               blk_mq_abort_requeue_list(ns->queue);
+       }
+       if (ns->disk->flags & GENHD_FL_UP) {
+               if (blk_get_integrity(ns->disk))
+                       blk_integrity_unregister(ns->disk);
+               sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
+                                       &nvme_ns_attr_group);
+               del_gendisk(ns->disk);
+       }
+       if (kill || !blk_queue_dying(ns->queue)) {
+               blk_mq_abort_requeue_list(ns->queue);
+               blk_cleanup_queue(ns->queue);
+       }
+       list_del_init(&ns->list);
+       nvme_put_ns(ns);
+}
+
+static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+       struct nvme_ns *ns;
+
+       ns = nvme_find_ns(ctrl, nsid);
+       if (ns) {
+               if (revalidate_disk(ns->disk))
+                       nvme_ns_remove(ns);
+       } else
+               nvme_alloc_ns(ctrl, nsid);
+}
+
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+{
+       struct nvme_ns *ns;
+       __le32 *ns_list;
+       unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+       int ret = 0;
+
+       ns_list = kzalloc(0x1000, GFP_KERNEL);
+       if (!ns_list)
+               return -ENOMEM;
+
+       for (i = 0; i < num_lists; i++) {
+               ret = nvme_identify_ns_list(ctrl, prev, ns_list);
+               if (ret)
+                       goto out;
+
+               for (j = 0; j < min(nn, 1024U); j++) {
+                       nsid = le32_to_cpu(ns_list[j]);
+                       if (!nsid)
+                               goto out;
+
+                       nvme_validate_ns(ctrl, nsid);
+
+                       while (++prev < nsid) {
+                               ns = nvme_find_ns(ctrl, prev);
+                               if (ns)
+                                       nvme_ns_remove(ns);
+                       }
+               }
+               nn -= j;
+       }
+ out:
+       kfree(ns_list);
+       return ret;
+}
+
+static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
+{
+       struct nvme_ns *ns, *next;
+       unsigned i;
+
+       lockdep_assert_held(&ctrl->namespaces_mutex);
+
+       for (i = 1; i <= nn; i++)
+               nvme_validate_ns(ctrl, i);
+
+       list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
+               if (ns->ns_id > nn)
+                       nvme_ns_remove(ns);
+       }
+}
+
+void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
+{
+       struct nvme_id_ctrl *id;
+       unsigned nn;
+
+       if (nvme_identify_ctrl(ctrl, &id))
+               return;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       nn = le32_to_cpu(id->nn);
+       if (ctrl->vs >= NVME_VS(1, 1) &&
+           !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+               if (!nvme_scan_ns_list(ctrl, nn))
+                       goto done;
+       }
+       __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
+ done:
+       list_sort(NULL, &ctrl->namespaces, ns_cmp);
+       mutex_unlock(&ctrl->namespaces_mutex);
+       kfree(id);
+}
+
+void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns, *next;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
+               nvme_ns_remove(ns);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct nvme_ctrl *ctrl)
+{
+       int instance, error;
+
+       do {
+               if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+                       return -ENODEV;
+
+               spin_lock(&dev_list_lock);
+               error = ida_get_new(&nvme_instance_ida, &instance);
+               spin_unlock(&dev_list_lock);
+       } while (error == -EAGAIN);
+
+       if (error)
+               return -ENODEV;
+
+       ctrl->instance = instance;
+       return 0;
+}
+
+static void nvme_release_instance(struct nvme_ctrl *ctrl)
+{
+       spin_lock(&dev_list_lock);
+       ida_remove(&nvme_instance_ida, ctrl->instance);
+       spin_unlock(&dev_list_lock);
+}
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+ {
+       device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
+
+       spin_lock(&dev_list_lock);
+       list_del(&ctrl->node);
+       spin_unlock(&dev_list_lock);
+}
+
+static void nvme_free_ctrl(struct kref *kref)
+{
+       struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+
+       put_device(ctrl->device);
+       nvme_release_instance(ctrl);
+
+       ctrl->ops->free_ctrl(ctrl);
+}
+
+void nvme_put_ctrl(struct nvme_ctrl *ctrl)
+{
+       kref_put(&ctrl->kref, nvme_free_ctrl);
+}
+
+/*
+ * Initialize a NVMe controller structures.  This needs to be called during
+ * earliest initialization so that we have the initialized structured around
+ * during probing.
+ */
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+               const struct nvme_ctrl_ops *ops, unsigned long quirks)
+{
+       int ret;
+
+       INIT_LIST_HEAD(&ctrl->namespaces);
+       mutex_init(&ctrl->namespaces_mutex);
+       kref_init(&ctrl->kref);
+       ctrl->dev = dev;
+       ctrl->ops = ops;
+       ctrl->quirks = quirks;
+
+       ret = nvme_set_instance(ctrl);
+       if (ret)
+               goto out;
+
+       ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
+                               MKDEV(nvme_char_major, ctrl->instance),
+                               dev, nvme_dev_attr_groups,
+                               "nvme%d", ctrl->instance);
+       if (IS_ERR(ctrl->device)) {
+               ret = PTR_ERR(ctrl->device);
+               goto out_release_instance;
+       }
+       get_device(ctrl->device);
+       dev_set_drvdata(ctrl->device, ctrl);
+
+       spin_lock(&dev_list_lock);
+       list_add_tail(&ctrl->node, &nvme_ctrl_list);
+       spin_unlock(&dev_list_lock);
+
+       return 0;
+out_release_instance:
+       nvme_release_instance(ctrl);
+out:
+       return ret;
+}
+
+void nvme_stop_queues(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               spin_lock_irq(ns->queue->queue_lock);
+               queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+               spin_unlock_irq(ns->queue->queue_lock);
+
+               blk_mq_cancel_requeue_work(ns->queue);
+               blk_mq_stop_hw_queues(ns->queue);
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+void nvme_start_queues(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+               blk_mq_start_stopped_hw_queues(ns->queue, true);
+               blk_mq_kick_requeue_list(ns->queue);
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+int __init nvme_core_init(void)
+{
+       int result;
+
+       result = register_blkdev(nvme_major, "nvme");
+       if (result < 0)
+               return result;
+       else if (result > 0)
+               nvme_major = result;
+
+       result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
+                                                       &nvme_dev_fops);
+       if (result < 0)
+               goto unregister_blkdev;
+       else if (result > 0)
+               nvme_char_major = result;
+
+       nvme_class = class_create(THIS_MODULE, "nvme");
+       if (IS_ERR(nvme_class)) {
+               result = PTR_ERR(nvme_class);
+               goto unregister_chrdev;
+       }
+
+       return 0;
+
+ unregister_chrdev:
+       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ unregister_blkdev:
+       unregister_blkdev(nvme_major, "nvme");
+       return result;
+}
+
+void nvme_core_exit(void)
+{
+       unregister_blkdev(nvme_major, "nvme");
+       class_destroy(nvme_class);
+       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+}
index 1af54ea20e7bd55ff243598acfde869a106030d2..6bb15e4926dc86ed8b30e5cc2c504dc29090d545 100644 (file)
@@ -146,6 +146,17 @@ struct nvme_nvm_command {
        };
 };
 
+#define NVME_NVM_LP_MLC_PAIRS 886
+struct nvme_nvm_lp_mlc {
+       __u16                   num_pairs;
+       __u8                    pairs[NVME_NVM_LP_MLC_PAIRS];
+};
+
+struct nvme_nvm_lp_tbl {
+       __u8                    id[8];
+       struct nvme_nvm_lp_mlc  mlc;
+};
+
 struct nvme_nvm_id_group {
        __u8                    mtype;
        __u8                    fmtype;
@@ -169,7 +180,8 @@ struct nvme_nvm_id_group {
        __le32                  mpos;
        __le32                  mccap;
        __le16                  cpar;
-       __u8                    reserved[906];
+       __u8                    reserved[10];
+       struct nvme_nvm_lp_tbl lptbl;
 } __packed;
 
 struct nvme_nvm_addr_format {
@@ -266,6 +278,20 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
                dst->mccap = le32_to_cpu(src->mccap);
 
                dst->cpar = le16_to_cpu(src->cpar);
+
+               if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
+                       memcpy(dst->lptbl.id, src->lptbl.id, 8);
+                       dst->lptbl.mlc.num_pairs =
+                                       le16_to_cpu(src->lptbl.mlc.num_pairs);
+
+                       if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
+                               pr_err("nvm: number of MLC pairs not supported\n");
+                               return -EINVAL;
+                       }
+
+                       memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
+                                               dst->lptbl.mlc.num_pairs);
+               }
        }
 
        return 0;
@@ -274,7 +300,6 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
 static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
 {
        struct nvme_ns *ns = nvmdev->q->queuedata;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_nvm_id *nvme_nvm_id;
        struct nvme_nvm_command c = {};
        int ret;
@@ -287,7 +312,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
        if (!nvme_nvm_id)
                return -ENOMEM;
 
-       ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+       ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
                                nvme_nvm_id, sizeof(struct nvme_nvm_id));
        if (ret) {
                ret = -EIO;
@@ -312,9 +337,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
                                nvm_l2p_update_fn *update_l2p, void *priv)
 {
        struct nvme_ns *ns = nvmdev->q->queuedata;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_nvm_command c = {};
-       u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
+       u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9;
        u32 nlb_pr_rq = len / sizeof(u64);
        u64 cmd_slba = slba;
        void *entries;
@@ -332,10 +356,10 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
                c.l2p.slba = cpu_to_le64(cmd_slba);
                c.l2p.nlb = cpu_to_le32(cmd_nlb);
 
-               ret = nvme_submit_sync_cmd(dev->admin_q,
+               ret = nvme_submit_sync_cmd(ns->ctrl->admin_q,
                                (struct nvme_command *)&c, entries, len);
                if (ret) {
-                       dev_err(dev->dev, "L2P table transfer failed (%d)\n",
+                       dev_err(ns->ctrl->dev, "L2P table transfer failed (%d)\n",
                                                                        ret);
                        ret = -EIO;
                        goto out;
@@ -361,7 +385,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
 {
        struct request_queue *q = nvmdev->q;
        struct nvme_ns *ns = q->queuedata;
-       struct nvme_dev *dev = ns->dev;
+       struct nvme_ctrl *ctrl = ns->ctrl;
        struct nvme_nvm_command c = {};
        struct nvme_nvm_bb_tbl *bb_tbl;
        int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
@@ -375,41 +399,36 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
        if (!bb_tbl)
                return -ENOMEM;
 
-       ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+       ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
                                                                bb_tbl, tblsz);
        if (ret) {
-               dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
+               dev_err(ctrl->dev, "get bad block table failed (%d)\n", ret);
                ret = -EIO;
                goto out;
        }
 
        if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
                bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
-               dev_err(dev->dev, "bbt format mismatch\n");
+               dev_err(ctrl->dev, "bbt format mismatch\n");
                ret = -EINVAL;
                goto out;
        }
 
        if (le16_to_cpu(bb_tbl->verid) != 1) {
                ret = -EINVAL;
-               dev_err(dev->dev, "bbt version not supported\n");
+               dev_err(ctrl->dev, "bbt version not supported\n");
                goto out;
        }
 
        if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
                ret = -EINVAL;
-               dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
+               dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)",
                                        le32_to_cpu(bb_tbl->tblks), nr_blocks);
                goto out;
        }
 
        ppa = dev_to_generic_addr(nvmdev, ppa);
        ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
-       if (ret) {
-               ret = -EINTR;
-               goto out;
-       }
-
 out:
        kfree(bb_tbl);
        return ret;
@@ -419,7 +438,6 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd,
                                                                int type)
 {
        struct nvme_ns *ns = nvmdev->q->queuedata;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_nvm_command c = {};
        int ret = 0;
 
@@ -429,10 +447,10 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd,
        c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
        c.set_bb.value = type;
 
-       ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+       ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
                                                                NULL, 0);
        if (ret)
-               dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
+               dev_err(ns->ctrl->dev, "set bad block table failed (%d)\n", ret);
        return ret;
 }
 
@@ -453,11 +471,8 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
 static void nvme_nvm_end_io(struct request *rq, int error)
 {
        struct nvm_rq *rqd = rq->end_io_data;
-       struct nvm_dev *dev = rqd->dev;
 
-       if (dev->mt && dev->mt->end_io(rqd, error))
-               pr_err("nvme: err status: %x result: %lx\n",
-                               rq->errors, (unsigned long)rq->special);
+       nvm_end_io(rqd, error);
 
        kfree(rq->cmd);
        blk_mq_free_request(rq);
@@ -520,9 +535,8 @@ static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
 static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
 {
        struct nvme_ns *ns = nvmdev->q->queuedata;
-       struct nvme_dev *dev = ns->dev;
 
-       return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
+       return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
 }
 
 static void nvme_nvm_destroy_dma_pool(void *pool)
@@ -580,8 +594,9 @@ void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
 
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
 {
-       struct nvme_dev *dev = ns->dev;
-       struct pci_dev *pdev = to_pci_dev(dev->dev);
+       struct nvme_ctrl *ctrl = ns->ctrl;
+       /* XXX: this is poking into PCI structures from generic code! */
+       struct pci_dev *pdev = to_pci_dev(ctrl->dev);
 
        /* QEMU NVMe simulator - PCI ID + Vendor specific bit */
        if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
index 044253dca30a433549a37ec7e28c3a6c161d38a5..9664d07d807d844b470975bc21708e0022b1a31e 100644 (file)
 #include <linux/kref.h>
 #include <linux/blk-mq.h>
 
+enum {
+       /*
+        * Driver internal status code for commands that were cancelled due
+        * to timeouts or controller shutdown.  The value is negative so
+        * that it a) doesn't overlap with the unsigned hardware error codes,
+        * and b) can easily be tested for.
+        */
+       NVME_SC_CANCELLED               = -EINTR,
+};
+
 extern unsigned char nvme_io_timeout;
 #define NVME_IO_TIMEOUT        (nvme_io_timeout * HZ)
 
+extern unsigned char admin_timeout;
+#define ADMIN_TIMEOUT  (admin_timeout * HZ)
+
+extern unsigned char shutdown_timeout;
+#define SHUTDOWN_TIMEOUT       (shutdown_timeout * HZ)
+
 enum {
        NVME_NS_LBA             = 0,
        NVME_NS_LIGHTNVM        = 1,
 };
 
 /*
- * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ * List of workarounds for devices that required behavior not specified in
+ * the standard.
  */
-struct nvme_dev {
-       struct list_head node;
-       struct nvme_queue **queues;
+enum nvme_quirks {
+       /*
+        * Prefers I/O aligned to a stripe size specified in a vendor
+        * specific Identify field.
+        */
+       NVME_QUIRK_STRIPE_SIZE                  = (1 << 0),
+
+       /*
+        * The controller doesn't handle Identify value others than 0 or 1
+        * correctly.
+        */
+       NVME_QUIRK_IDENTIFY_CNS                 = (1 << 1),
+};
+
+struct nvme_ctrl {
+       const struct nvme_ctrl_ops *ops;
        struct request_queue *admin_q;
-       struct blk_mq_tag_set tagset;
-       struct blk_mq_tag_set admin_tagset;
-       u32 __iomem *dbs;
        struct device *dev;
-       struct dma_pool *prp_page_pool;
-       struct dma_pool *prp_small_pool;
+       struct kref kref;
        int instance;
-       unsigned queue_count;
-       unsigned online_queues;
-       unsigned max_qid;
-       int q_depth;
-       u32 db_stride;
-       u32 ctrl_config;
-       struct msix_entry *entry;
-       struct nvme_bar __iomem *bar;
+       struct blk_mq_tag_set *tagset;
        struct list_head namespaces;
-       struct kref kref;
-       struct device *device;
-       struct work_struct reset_work;
-       struct work_struct probe_work;
-       struct work_struct scan_work;
+       struct mutex namespaces_mutex;
+       struct device *device;  /* char device */
+       struct list_head node;
+
        char name[12];
        char serial[20];
        char model[40];
        char firmware_rev[8];
-       bool subsystem;
+
+       u32 ctrl_config;
+
+       u32 page_size;
        u32 max_hw_sectors;
        u32 stripe_size;
-       u32 page_size;
-       void __iomem *cmb;
-       dma_addr_t cmb_dma_addr;
-       u64 cmb_size;
-       u32 cmbsz;
        u16 oncs;
-       u16 abort_limit;
+       atomic_t abort_limit;
        u8 event_limit;
        u8 vwc;
+       u32 vs;
+       bool subsystem;
+       unsigned long quirks;
 };
 
 /*
@@ -79,11 +98,14 @@ struct nvme_dev {
 struct nvme_ns {
        struct list_head list;
 
-       struct nvme_dev *dev;
+       struct nvme_ctrl *ctrl;
        struct request_queue *queue;
        struct gendisk *disk;
        struct kref kref;
 
+       u8 eui[8];
+       u8 uuid[16];
+
        unsigned ns_id;
        int lba_shift;
        u16 ms;
@@ -94,41 +116,156 @@ struct nvme_ns {
        u32 mode_select_block_len;
 };
 
-/*
- * The nvme_iod describes the data in an I/O, including the list of PRP
- * entries.  You can't see it in this data structure because C doesn't let
- * me express that.  Use nvme_alloc_iod to ensure there's enough space
- * allocated to store the PRP list.
- */
-struct nvme_iod {
-       unsigned long private;  /* For the use of the submitter of the I/O */
-       int npages;             /* In the PRP list. 0 means small pool in use */
-       int offset;             /* Of PRP list */
-       int nents;              /* Used in scatterlist */
-       int length;             /* Of data, in bytes */
-       dma_addr_t first_dma;
-       struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
-       struct scatterlist sg[0];
+struct nvme_ctrl_ops {
+       int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+       int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
+       int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+       bool (*io_incapable)(struct nvme_ctrl *ctrl);
+       int (*reset_ctrl)(struct nvme_ctrl *ctrl);
+       void (*free_ctrl)(struct nvme_ctrl *ctrl);
 };
 
+static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
+{
+       u32 val = 0;
+
+       if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+               return false;
+       return val & NVME_CSTS_RDY;
+}
+
+static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl)
+{
+       u32 val = 0;
+
+       if (ctrl->ops->io_incapable(ctrl))
+               return true;
+       if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+               return true;
+       return val & NVME_CSTS_CFS;
+}
+
+static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
+{
+       if (!ctrl->subsystem)
+               return -ENOTTY;
+       return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
+}
+
 static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 {
        return (sector >> (ns->lba_shift - 9));
 }
 
+static inline void nvme_setup_flush(struct nvme_ns *ns,
+               struct nvme_command *cmnd)
+{
+       memset(cmnd, 0, sizeof(*cmnd));
+       cmnd->common.opcode = nvme_cmd_flush;
+       cmnd->common.nsid = cpu_to_le32(ns->ns_id);
+}
+
+static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
+               struct nvme_command *cmnd)
+{
+       u16 control = 0;
+       u32 dsmgmt = 0;
+
+       if (req->cmd_flags & REQ_FUA)
+               control |= NVME_RW_FUA;
+       if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+               control |= NVME_RW_LR;
+
+       if (req->cmd_flags & REQ_RAHEAD)
+               dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+
+       memset(cmnd, 0, sizeof(*cmnd));
+       cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+       cmnd->rw.command_id = req->tag;
+       cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+       cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+       cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+
+       if (ns->ms) {
+               switch (ns->pi_type) {
+               case NVME_NS_DPS_PI_TYPE3:
+                       control |= NVME_RW_PRINFO_PRCHK_GUARD;
+                       break;
+               case NVME_NS_DPS_PI_TYPE1:
+               case NVME_NS_DPS_PI_TYPE2:
+                       control |= NVME_RW_PRINFO_PRCHK_GUARD |
+                                       NVME_RW_PRINFO_PRCHK_REF;
+                       cmnd->rw.reftag = cpu_to_le32(
+                                       nvme_block_nr(ns, blk_rq_pos(req)));
+                       break;
+               }
+               if (!blk_integrity_rq(req))
+                       control |= NVME_RW_PRINFO_PRACT;
+       }
+
+       cmnd->rw.control = cpu_to_le16(control);
+       cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+}
+
+
+static inline int nvme_error_status(u16 status)
+{
+       switch (status & 0x7ff) {
+       case NVME_SC_SUCCESS:
+               return 0;
+       case NVME_SC_CAP_EXCEEDED:
+               return -ENOSPC;
+       default:
+               return -EIO;
+       }
+}
+
+static inline bool nvme_req_needs_retry(struct request *req, u16 status)
+{
+       return !(status & NVME_SC_DNR || blk_noretry_request(req)) &&
+               (jiffies - req->start_time) < req->timeout;
+}
+
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+               const struct nvme_ctrl_ops *ops, unsigned long quirks);
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_put_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_identify(struct nvme_ctrl *ctrl);
+
+void nvme_scan_namespaces(struct nvme_ctrl *ctrl);
+void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
+
+void nvme_stop_queues(struct nvme_ctrl *ctrl);
+void nvme_start_queues(struct nvme_ctrl *ctrl);
+
+struct request *nvme_alloc_request(struct request_queue *q,
+               struct nvme_command *cmd, unsigned int flags);
+void nvme_requeue_req(struct request *req);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                void *buf, unsigned bufflen);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-               void *buffer, void __user *ubuffer, unsigned bufflen,
+               void *buffer, unsigned bufflen,  u32 *result, unsigned timeout);
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+               void __user *ubuffer, unsigned bufflen, u32 *result,
+               unsigned timeout);
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+               void __user *ubuffer, unsigned bufflen,
+               void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
                u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
                struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
                        dma_addr_t dma_addr, u32 *result);
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
                        dma_addr_t dma_addr, u32 *result);
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
+
+extern spinlock_t dev_list_lock;
 
 struct sg_io_hdr;
 
@@ -154,4 +291,7 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i
 }
 #endif /* CONFIG_NVM */
 
+int __init nvme_core_init(void);
+void nvme_core_exit(void);
+
 #endif /* _NVME_H */
index f5c0e2613c7cc87fa04ec3bc30f51b14aa1c8278..a128672472ecf22b0abb1eb588bb12e896903a27 100644 (file)
@@ -12,6 +12,7 @@
  * more details.
  */
 
+#include <linux/aer.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
-#include <linux/list_sort.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/poison.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/t10-pi.h>
 #include <linux/types.h>
-#include <linux/pr.h>
-#include <scsi/sg.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <asm/unaligned.h>
 
-#include <uapi/linux/nvme_ioctl.h>
 #include "nvme.h"
 
-#define NVME_MINORS            (1U << MINORBITS)
 #define NVME_Q_DEPTH           1024
 #define NVME_AQ_DEPTH          256
 #define SQ_SIZE(depth)         (depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)         (depth * sizeof(struct nvme_completion))
-#define ADMIN_TIMEOUT          (admin_timeout * HZ)
-#define SHUTDOWN_TIMEOUT       (shutdown_timeout * HZ)
+               
+/*
+ * We handle AEN commands ourselves and don't even let the
+ * block layer know about them.
+ */
+#define NVME_NR_AEN_COMMANDS   1
+#define NVME_AQ_BLKMQ_DEPTH    (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
 
-static unsigned char admin_timeout = 60;
+unsigned char admin_timeout = 60;
 module_param(admin_timeout, byte, 0644);
 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
 
@@ -63,16 +65,10 @@ unsigned char nvme_io_timeout = 30;
 module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 
-static unsigned char shutdown_timeout = 5;
+unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
-static int nvme_major;
-module_param(nvme_major, int, 0);
-
-static int nvme_char_major;
-module_param(nvme_char_major, int, 0);
-
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -80,28 +76,60 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
-static DEFINE_SPINLOCK(dev_list_lock);
 static LIST_HEAD(dev_list);
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
 static wait_queue_head_t nvme_kthread_wait;
 
-static struct class *nvme_class;
+struct nvme_dev;
+struct nvme_queue;
 
-static int __nvme_reset(struct nvme_dev *dev);
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_dead_ctrl(struct nvme_dev *dev);
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
-struct async_cmd_info {
-       struct kthread_work work;
-       struct kthread_worker *worker;
-       struct request *req;
-       u32 result;
-       int status;
-       void *ctx;
+/*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+       struct list_head node;
+       struct nvme_queue **queues;
+       struct blk_mq_tag_set tagset;
+       struct blk_mq_tag_set admin_tagset;
+       u32 __iomem *dbs;
+       struct device *dev;
+       struct dma_pool *prp_page_pool;
+       struct dma_pool *prp_small_pool;
+       unsigned queue_count;
+       unsigned online_queues;
+       unsigned max_qid;
+       int q_depth;
+       u32 db_stride;
+       struct msix_entry *entry;
+       void __iomem *bar;
+       struct work_struct reset_work;
+       struct work_struct scan_work;
+       struct work_struct remove_work;
+       struct mutex shutdown_lock;
+       bool subsystem;
+       void __iomem *cmb;
+       dma_addr_t cmb_dma_addr;
+       u64 cmb_size;
+       u32 cmbsz;
+       unsigned long flags;
+
+#define NVME_CTRL_RESETTING    0
+
+       struct nvme_ctrl ctrl;
+       struct completion ioq_wait;
 };
 
+static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
+{
+       return container_of(ctrl, struct nvme_dev, ctrl);
+}
+
 /*
  * An NVM Express queue.  Each device has at least two (one for admin
  * commands and one for I/O commands).
@@ -126,7 +154,24 @@ struct nvme_queue {
        u16 qid;
        u8 cq_phase;
        u8 cqe_seen;
-       struct async_cmd_info cmdinfo;
+};
+
+/*
+ * The nvme_iod describes the data in an I/O, including the list of PRP
+ * entries.  You can't see it in this data structure because C doesn't let
+ * me express that.  Use nvme_init_iod to ensure there's enough space
+ * allocated to store the PRP list.
+ */
+struct nvme_iod {
+       struct nvme_queue *nvmeq;
+       int aborted;
+       int npages;             /* In the PRP list. 0 means small pool in use */
+       int nents;              /* Used in scatterlist */
+       int length;             /* Of data, in bytes */
+       dma_addr_t first_dma;
+       struct scatterlist meta_sg; /* metadata requires single contiguous buffer */
+       struct scatterlist *sg;
+       struct scatterlist inline_sg[0];
 };
 
 /*
@@ -148,23 +193,11 @@ static inline void _nvme_check_size(void)
        BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 }
 
-typedef void (*nvme_completion_fn)(struct nvme_queue *, void *,
-                                               struct nvme_completion *);
-
-struct nvme_cmd_info {
-       nvme_completion_fn fn;
-       void *ctx;
-       int aborted;
-       struct nvme_queue *nvmeq;
-       struct nvme_iod iod[0];
-};
-
 /*
  * Max size of iod being embedded in the request payload
  */
 #define NVME_INT_PAGES         2
-#define NVME_INT_BYTES(dev)    (NVME_INT_PAGES * (dev)->page_size)
-#define NVME_INT_MASK          0x01
+#define NVME_INT_BYTES(dev)    (NVME_INT_PAGES * (dev)->ctrl.page_size)
 
 /*
  * Will slightly overestimate the number of pages needed.  This is OK
@@ -173,19 +206,22 @@ struct nvme_cmd_info {
  */
 static int nvme_npages(unsigned size, struct nvme_dev *dev)
 {
-       unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);
+       unsigned nprps = DIV_ROUND_UP(size + dev->ctrl.page_size,
+                                     dev->ctrl.page_size);
        return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
 }
 
-static unsigned int nvme_cmd_size(struct nvme_dev *dev)
+static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev,
+               unsigned int size, unsigned int nseg)
 {
-       unsigned int ret = sizeof(struct nvme_cmd_info);
-
-       ret += sizeof(struct nvme_iod);
-       ret += sizeof(__le64 *) * nvme_npages(NVME_INT_BYTES(dev), dev);
-       ret += sizeof(struct scatterlist) * NVME_INT_PAGES;
+       return sizeof(__le64 *) * nvme_npages(size, dev) +
+                       sizeof(struct scatterlist) * nseg;
+}
 
-       return ret;
+static unsigned int nvme_cmd_size(struct nvme_dev *dev)
+{
+       return sizeof(struct nvme_iod) +
+               nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
 }
 
 static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -215,11 +251,11 @@ static int nvme_admin_init_request(void *data, struct request *req,
                                unsigned int numa_node)
 {
        struct nvme_dev *dev = data;
-       struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct nvme_queue *nvmeq = dev->queues[0];
 
        BUG_ON(!nvmeq);
-       cmd->nvmeq = nvmeq;
+       iod->nvmeq = nvmeq;
        return 0;
 }
 
@@ -242,148 +278,36 @@ static int nvme_init_request(void *data, struct request *req,
                                unsigned int numa_node)
 {
        struct nvme_dev *dev = data;
-       struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
 
        BUG_ON(!nvmeq);
-       cmd->nvmeq = nvmeq;
+       iod->nvmeq = nvmeq;
        return 0;
 }
 
-static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx,
-                               nvme_completion_fn handler)
+static void nvme_complete_async_event(struct nvme_dev *dev,
+               struct nvme_completion *cqe)
 {
-       cmd->fn = handler;
-       cmd->ctx = ctx;
-       cmd->aborted = 0;
-       blk_mq_start_request(blk_mq_rq_from_pdu(cmd));
-}
-
-static void *iod_get_private(struct nvme_iod *iod)
-{
-       return (void *) (iod->private & ~0x1UL);
-}
-
-/*
- * If bit 0 is set, the iod is embedded in the request payload.
- */
-static bool iod_should_kfree(struct nvme_iod *iod)
-{
-       return (iod->private & NVME_INT_MASK) == 0;
-}
-
-/* Special values must be less than 0x1000 */
-#define CMD_CTX_BASE           ((void *)POISON_POINTER_DELTA)
-#define CMD_CTX_CANCELLED      (0x30C + CMD_CTX_BASE)
-#define CMD_CTX_COMPLETED      (0x310 + CMD_CTX_BASE)
-#define CMD_CTX_INVALID                (0x314 + CMD_CTX_BASE)
-
-static void special_completion(struct nvme_queue *nvmeq, void *ctx,
-                                               struct nvme_completion *cqe)
-{
-       if (ctx == CMD_CTX_CANCELLED)
-               return;
-       if (ctx == CMD_CTX_COMPLETED) {
-               dev_warn(nvmeq->q_dmadev,
-                               "completed id %d twice on queue %d\n",
-                               cqe->command_id, le16_to_cpup(&cqe->sq_id));
-               return;
-       }
-       if (ctx == CMD_CTX_INVALID) {
-               dev_warn(nvmeq->q_dmadev,
-                               "invalid id %d completed on queue %d\n",
-                               cqe->command_id, le16_to_cpup(&cqe->sq_id));
-               return;
-       }
-       dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
-}
-
-static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
-{
-       void *ctx;
-
-       if (fn)
-               *fn = cmd->fn;
-       ctx = cmd->ctx;
-       cmd->fn = special_completion;
-       cmd->ctx = CMD_CTX_CANCELLED;
-       return ctx;
-}
-
-static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
-                                               struct nvme_completion *cqe)
-{
-       u32 result = le32_to_cpup(&cqe->result);
-       u16 status = le16_to_cpup(&cqe->status) >> 1;
+       u16 status = le16_to_cpu(cqe->status) >> 1;
+       u32 result = le32_to_cpu(cqe->result);
 
        if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
-               ++nvmeq->dev->event_limit;
+               ++dev->ctrl.event_limit;
        if (status != NVME_SC_SUCCESS)
                return;
 
        switch (result & 0xff07) {
        case NVME_AER_NOTICE_NS_CHANGED:
-               dev_info(nvmeq->q_dmadev, "rescanning\n");
-               schedule_work(&nvmeq->dev->scan_work);
+               dev_info(dev->dev, "rescanning\n");
+               queue_work(nvme_workq, &dev->scan_work);
        default:
-               dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
-       }
-}
-
-static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
-                                               struct nvme_completion *cqe)
-{
-       struct request *req = ctx;
-
-       u16 status = le16_to_cpup(&cqe->status) >> 1;
-       u32 result = le32_to_cpup(&cqe->result);
-
-       blk_mq_free_request(req);
-
-       dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
-       ++nvmeq->dev->abort_limit;
-}
-
-static void async_completion(struct nvme_queue *nvmeq, void *ctx,
-                                               struct nvme_completion *cqe)
-{
-       struct async_cmd_info *cmdinfo = ctx;
-       cmdinfo->result = le32_to_cpup(&cqe->result);
-       cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
-       queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
-       blk_mq_free_request(cmdinfo->req);
-}
-
-static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
-                                 unsigned int tag)
-{
-       struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag);
-
-       return blk_mq_rq_to_pdu(req);
-}
-
-/*
- * Called with local interrupts disabled and the q_lock held.  May not sleep.
- */
-static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag,
-                                               nvme_completion_fn *fn)
-{
-       struct nvme_cmd_info *cmd = get_cmd_from_tag(nvmeq, tag);
-       void *ctx;
-       if (tag >= nvmeq->q_depth) {
-               *fn = special_completion;
-               return CMD_CTX_INVALID;
+               dev_warn(dev->dev, "async event result %08x\n", result);
        }
-       if (fn)
-               *fn = cmd->fn;
-       ctx = cmd->ctx;
-       cmd->fn = special_completion;
-       cmd->ctx = CMD_CTX_COMPLETED;
-       return ctx;
 }
 
 /**
- * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
  * @nvmeq: The queue to use
  * @cmd: The command to send
  *
@@ -405,69 +329,44 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
        nvmeq->sq_tail = tail;
 }
 
-static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
-{
-       unsigned long flags;
-       spin_lock_irqsave(&nvmeq->q_lock, flags);
-       __nvme_submit_cmd(nvmeq, cmd);
-       spin_unlock_irqrestore(&nvmeq->q_lock, flags);
-}
-
-static __le64 **iod_list(struct nvme_iod *iod)
-{
-       return ((void *)iod) + iod->offset;
-}
-
-static inline void iod_init(struct nvme_iod *iod, unsigned nbytes,
-                           unsigned nseg, unsigned long private)
-{
-       iod->private = private;
-       iod->offset = offsetof(struct nvme_iod, sg[nseg]);
-       iod->npages = -1;
-       iod->length = nbytes;
-       iod->nents = 0;
-}
-
-static struct nvme_iod *
-__nvme_alloc_iod(unsigned nseg, unsigned bytes, struct nvme_dev *dev,
-                unsigned long priv, gfp_t gfp)
+static __le64 **iod_list(struct request *req)
 {
-       struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
-                               sizeof(__le64 *) * nvme_npages(bytes, dev) +
-                               sizeof(struct scatterlist) * nseg, gfp);
-
-       if (iod)
-               iod_init(iod, bytes, nseg, priv);
-
-       return iod;
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       return (__le64 **)(iod->sg + req->nr_phys_segments);
 }
 
-static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
-                                      gfp_t gfp)
+static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
-       unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
-                                                sizeof(struct nvme_dsm_range);
-       struct nvme_iod *iod;
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
+       int nseg = rq->nr_phys_segments;
+       unsigned size;
 
-       if (rq->nr_phys_segments <= NVME_INT_PAGES &&
-           size <= NVME_INT_BYTES(dev)) {
-               struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);
+       if (rq->cmd_flags & REQ_DISCARD)
+               size = sizeof(struct nvme_dsm_range);
+       else
+               size = blk_rq_bytes(rq);
 
-               iod = cmd->iod;
-               iod_init(iod, size, rq->nr_phys_segments,
-                               (unsigned long) rq | NVME_INT_MASK);
-               return iod;
+       if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
+               iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
+               if (!iod->sg)
+                       return BLK_MQ_RQ_QUEUE_BUSY;
+       } else {
+               iod->sg = iod->inline_sg;
        }
 
-       return __nvme_alloc_iod(rq->nr_phys_segments, size, dev,
-                               (unsigned long) rq, gfp);
+       iod->aborted = 0;
+       iod->npages = -1;
+       iod->nents = 0;
+       iod->length = size;
+       return 0;
 }
 
-static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 {
-       const int last_prp = dev->page_size / 8 - 1;
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       const int last_prp = dev->ctrl.page_size / 8 - 1;
        int i;
-       __le64 **list = iod_list(iod);
+       __le64 **list = iod_list(req);
        dma_addr_t prp_dma = iod->first_dma;
 
        if (iod->npages == 0)
@@ -479,20 +378,8 @@ static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
                prp_dma = next_prp_dma;
        }
 
-       if (iod_should_kfree(iod))
-               kfree(iod);
-}
-
-static int nvme_error_status(u16 status)
-{
-       switch (status & 0x7ff) {
-       case NVME_SC_SUCCESS:
-               return 0;
-       case NVME_SC_CAP_EXCEEDED:
-               return -ENOSPC;
-       default:
-               return -EIO;
-       }
+       if (iod->sg != iod->inline_sg)
+               kfree(iod->sg);
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
@@ -549,27 +436,6 @@ static void nvme_dif_remap(struct request *req,
        }
        kunmap_atomic(pmap);
 }
-
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
-       struct blk_integrity integrity;
-
-       switch (ns->pi_type) {
-       case NVME_NS_DPS_PI_TYPE3:
-               integrity.profile = &t10_pi_type3_crc;
-               break;
-       case NVME_NS_DPS_PI_TYPE1:
-       case NVME_NS_DPS_PI_TYPE2:
-               integrity.profile = &t10_pi_type1_crc;
-               break;
-       default:
-               integrity.profile = NULL;
-               break;
-       }
-       integrity.tuple_size = ns->ms;
-       blk_integrity_register(ns->disk, &integrity);
-       blk_queue_max_integrity_segments(ns->queue, 1);
-}
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 static void nvme_dif_remap(struct request *req,
                        void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
@@ -581,91 +447,27 @@ static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
 static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 {
 }
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
-}
 #endif
 
-static void req_completion(struct nvme_queue *nvmeq, void *ctx,
-                                               struct nvme_completion *cqe)
-{
-       struct nvme_iod *iod = ctx;
-       struct request *req = iod_get_private(iod);
-       struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
-       u16 status = le16_to_cpup(&cqe->status) >> 1;
-       bool requeue = false;
-       int error = 0;
-
-       if (unlikely(status)) {
-               if (!(status & NVME_SC_DNR || blk_noretry_request(req))
-                   && (jiffies - req->start_time) < req->timeout) {
-                       unsigned long flags;
-
-                       requeue = true;
-                       blk_mq_requeue_request(req);
-                       spin_lock_irqsave(req->q->queue_lock, flags);
-                       if (!blk_queue_stopped(req->q))
-                               blk_mq_kick_requeue_list(req->q);
-                       spin_unlock_irqrestore(req->q->queue_lock, flags);
-                       goto release_iod;
-               }
-
-               if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-                       if (cmd_rq->ctx == CMD_CTX_CANCELLED)
-                               error = -EINTR;
-                       else
-                               error = status;
-               } else {
-                       error = nvme_error_status(status);
-               }
-       }
-
-       if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-               u32 result = le32_to_cpup(&cqe->result);
-               req->special = (void *)(uintptr_t)result;
-       }
-
-       if (cmd_rq->aborted)
-               dev_warn(nvmeq->dev->dev,
-                       "completing aborted command with status:%04x\n",
-                       error);
-
-release_iod:
-       if (iod->nents) {
-               dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
-                       rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-               if (blk_integrity_rq(req)) {
-                       if (!rq_data_dir(req))
-                               nvme_dif_remap(req, nvme_dif_complete);
-                       dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
-                               rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-               }
-       }
-       nvme_free_iod(nvmeq->dev, iod);
-
-       if (likely(!requeue))
-               blk_mq_complete_request(req, error);
-}
-
-/* length is in bytes.  gfp flags indicates whether we may sleep. */
-static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
-               int total_len, gfp_t gfp)
+static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
+               int total_len)
 {
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct dma_pool *pool;
        int length = total_len;
        struct scatterlist *sg = iod->sg;
        int dma_len = sg_dma_len(sg);
        u64 dma_addr = sg_dma_address(sg);
-       u32 page_size = dev->page_size;
+       u32 page_size = dev->ctrl.page_size;
        int offset = dma_addr & (page_size - 1);
        __le64 *prp_list;
-       __le64 **list = iod_list(iod);
+       __le64 **list = iod_list(req);
        dma_addr_t prp_dma;
        int nprps, i;
 
        length -= (page_size - offset);
        if (length <= 0)
-               return total_len;
+               return true;
 
        dma_len -= (page_size - offset);
        if (dma_len) {
@@ -678,7 +480,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
 
        if (length <= page_size) {
                iod->first_dma = dma_addr;
-               return total_len;
+               return true;
        }
 
        nprps = DIV_ROUND_UP(length, page_size);
@@ -690,11 +492,11 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
                iod->npages = 1;
        }
 
-       prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+       prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
        if (!prp_list) {
                iod->first_dma = dma_addr;
                iod->npages = -1;
-               return (total_len - length) + page_size;
+               return false;
        }
        list[0] = prp_list;
        iod->first_dma = prp_dma;
@@ -702,9 +504,9 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
        for (;;) {
                if (i == page_size >> 3) {
                        __le64 *old_prp_list = prp_list;
-                       prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+                       prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
                        if (!prp_list)
-                               return total_len - length;
+                               return false;
                        list[iod->npages++] = prp_list;
                        prp_list[0] = old_prp_list[i - 1];
                        old_prp_list[i - 1] = cpu_to_le64(prp_dma);
@@ -724,115 +526,105 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
                dma_len = sg_dma_len(sg);
        }
 
-       return total_len;
+       return true;
 }
 
-static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
-               struct nvme_iod *iod)
+static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+               struct nvme_command *cmnd)
 {
-       struct nvme_command cmnd;
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       struct request_queue *q = req->q;
+       enum dma_data_direction dma_dir = rq_data_dir(req) ?
+                       DMA_TO_DEVICE : DMA_FROM_DEVICE;
+       int ret = BLK_MQ_RQ_QUEUE_ERROR;
 
-       memcpy(&cmnd, req->cmd, sizeof(cmnd));
-       cmnd.rw.command_id = req->tag;
-       if (req->nr_phys_segments) {
-               cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-               cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
-       }
+       sg_init_table(iod->sg, req->nr_phys_segments);
+       iod->nents = blk_rq_map_sg(q, req, iod->sg);
+       if (!iod->nents)
+               goto out;
 
-       __nvme_submit_cmd(nvmeq, &cmnd);
-}
+       ret = BLK_MQ_RQ_QUEUE_BUSY;
+       if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir))
+               goto out;
 
-/*
- * We reuse the small pool to allocate the 16-byte range here as it is not
- * worth having a special pool for these or additional cases to handle freeing
- * the iod.
- */
-static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-               struct request *req, struct nvme_iod *iod)
-{
-       struct nvme_dsm_range *range =
-                               (struct nvme_dsm_range *)iod_list(iod)[0];
-       struct nvme_command cmnd;
+       if (!nvme_setup_prps(dev, req, blk_rq_bytes(req)))
+               goto out_unmap;
 
-       range->cattr = cpu_to_le32(0);
-       range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
-       range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+       ret = BLK_MQ_RQ_QUEUE_ERROR;
+       if (blk_integrity_rq(req)) {
+               if (blk_rq_count_integrity_sg(q, req->bio) != 1)
+                       goto out_unmap;
 
-       memset(&cmnd, 0, sizeof(cmnd));
-       cmnd.dsm.opcode = nvme_cmd_dsm;
-       cmnd.dsm.command_id = req->tag;
-       cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
-       cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
-       cmnd.dsm.nr = 0;
-       cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+               sg_init_table(&iod->meta_sg, 1);
+               if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
+                       goto out_unmap;
 
-       __nvme_submit_cmd(nvmeq, &cmnd);
-}
+               if (rq_data_dir(req))
+                       nvme_dif_remap(req, nvme_dif_prep);
 
-static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-                                                               int cmdid)
-{
-       struct nvme_command cmnd;
+               if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
+                       goto out_unmap;
+       }
 
-       memset(&cmnd, 0, sizeof(cmnd));
-       cmnd.common.opcode = nvme_cmd_flush;
-       cmnd.common.command_id = cmdid;
-       cmnd.common.nsid = cpu_to_le32(ns->ns_id);
+       cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+       cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+       if (blk_integrity_rq(req))
+               cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
+       return BLK_MQ_RQ_QUEUE_OK;
 
-       __nvme_submit_cmd(nvmeq, &cmnd);
+out_unmap:
+       dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+out:
+       return ret;
 }
 
-static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
-                                                       struct nvme_ns *ns)
+static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 {
-       struct request *req = iod_get_private(iod);
-       struct nvme_command cmnd;
-       u16 control = 0;
-       u32 dsmgmt = 0;
-
-       if (req->cmd_flags & REQ_FUA)
-               control |= NVME_RW_FUA;
-       if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
-               control |= NVME_RW_LR;
-
-       if (req->cmd_flags & REQ_RAHEAD)
-               dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
-
-       memset(&cmnd, 0, sizeof(cmnd));
-       cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
-       cmnd.rw.command_id = req->tag;
-       cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
-       cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-       cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
-       cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
-       cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-
-       if (ns->ms) {
-               switch (ns->pi_type) {
-               case NVME_NS_DPS_PI_TYPE3:
-                       control |= NVME_RW_PRINFO_PRCHK_GUARD;
-                       break;
-               case NVME_NS_DPS_PI_TYPE1:
-               case NVME_NS_DPS_PI_TYPE2:
-                       control |= NVME_RW_PRINFO_PRCHK_GUARD |
-                                       NVME_RW_PRINFO_PRCHK_REF;
-                       cmnd.rw.reftag = cpu_to_le32(
-                                       nvme_block_nr(ns, blk_rq_pos(req)));
-                       break;
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       enum dma_data_direction dma_dir = rq_data_dir(req) ?
+                       DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+       if (iod->nents) {
+               dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+               if (blk_integrity_rq(req)) {
+                       if (!rq_data_dir(req))
+                               nvme_dif_remap(req, nvme_dif_complete);
+                       dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
                }
-               if (blk_integrity_rq(req))
-                       cmnd.rw.metadata =
-                               cpu_to_le64(sg_dma_address(iod->meta_sg));
-               else
-                       control |= NVME_RW_PRINFO_PRACT;
        }
 
-       cmnd.rw.control = cpu_to_le16(control);
-       cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
+       nvme_free_iod(dev, req);
+}
 
-       __nvme_submit_cmd(nvmeq, &cmnd);
+/*
+ * We reuse the small pool to allocate the 16-byte range here as it is not
+ * worth having a special pool for these or additional cases to handle freeing
+ * the iod.
+ */
+static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+               struct request *req, struct nvme_command *cmnd)
+{
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       struct nvme_dsm_range *range;
 
-       return 0;
+       range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC,
+                                               &iod->first_dma);
+       if (!range)
+               return BLK_MQ_RQ_QUEUE_BUSY;
+       iod_list(req)[0] = (__le64 *)range;
+       iod->npages = 0;
+
+       range->cattr = cpu_to_le32(0);
+       range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
+       range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+
+       memset(cmnd, 0, sizeof(*cmnd));
+       cmnd->dsm.opcode = nvme_cmd_dsm;
+       cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
+       cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
+       cmnd->dsm.nr = 0;
+       cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+       return BLK_MQ_RQ_QUEUE_OK;
 }
 
 /*
@@ -845,9 +637,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct nvme_queue *nvmeq = hctx->driver_data;
        struct nvme_dev *dev = nvmeq->dev;
        struct request *req = bd->rq;
-       struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
-       struct nvme_iod *iod;
-       enum dma_data_direction dma_dir;
+       struct nvme_command cmnd;
+       int ret = BLK_MQ_RQ_QUEUE_OK;
 
        /*
         * If formated with metadata, require the block layer provide a buffer
@@ -857,91 +648,77 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (ns && ns->ms && !blk_integrity_rq(req)) {
                if (!(ns->pi_type && ns->ms == 8) &&
                                        req->cmd_type != REQ_TYPE_DRV_PRIV) {
-                       blk_mq_complete_request(req, -EFAULT);
+                       blk_mq_end_request(req, -EFAULT);
                        return BLK_MQ_RQ_QUEUE_OK;
                }
        }
 
-       iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
-       if (!iod)
-               return BLK_MQ_RQ_QUEUE_BUSY;
+       ret = nvme_init_iod(req, dev);
+       if (ret)
+               return ret;
 
        if (req->cmd_flags & REQ_DISCARD) {
-               void *range;
-               /*
-                * We reuse the small pool to allocate the 16-byte range here
-                * as it is not worth having a special pool for these or
-                * additional cases to handle freeing the iod.
-                */
-               range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
-                                               &iod->first_dma);
-               if (!range)
-                       goto retry_cmd;
-               iod_list(iod)[0] = (__le64 *)range;
-               iod->npages = 0;
-       } else if (req->nr_phys_segments) {
-               dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+               ret = nvme_setup_discard(nvmeq, ns, req, &cmnd);
+       } else {
+               if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+                       memcpy(&cmnd, req->cmd, sizeof(cmnd));
+               else if (req->cmd_flags & REQ_FLUSH)
+                       nvme_setup_flush(ns, &cmnd);
+               else
+                       nvme_setup_rw(ns, req, &cmnd);
+
+               if (req->nr_phys_segments)
+                       ret = nvme_map_data(dev, req, &cmnd);
+       }
 
-               sg_init_table(iod->sg, req->nr_phys_segments);
-               iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
-               if (!iod->nents)
-                       goto error_cmd;
+       if (ret)
+               goto out;
 
-               if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir))
-                       goto retry_cmd;
+       cmnd.common.command_id = req->tag;
+       blk_mq_start_request(req);
 
-               if (blk_rq_bytes(req) !=
-                    nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
-                       dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
-                       goto retry_cmd;
-               }
-               if (blk_integrity_rq(req)) {
-                       if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) {
-                               dma_unmap_sg(dev->dev, iod->sg, iod->nents,
-                                               dma_dir);
-                               goto error_cmd;
-                       }
+       spin_lock_irq(&nvmeq->q_lock);
+       if (unlikely(nvmeq->cq_vector < 0)) {
+               ret = BLK_MQ_RQ_QUEUE_BUSY;
+               spin_unlock_irq(&nvmeq->q_lock);
+               goto out;
+       }
+       __nvme_submit_cmd(nvmeq, &cmnd);
+       nvme_process_cq(nvmeq);
+       spin_unlock_irq(&nvmeq->q_lock);
+       return BLK_MQ_RQ_QUEUE_OK;
+out:
+       nvme_free_iod(dev, req);
+       return ret;
+}
 
-                       sg_init_table(iod->meta_sg, 1);
-                       if (blk_rq_map_integrity_sg(
-                                       req->q, req->bio, iod->meta_sg) != 1) {
-                               dma_unmap_sg(dev->dev, iod->sg, iod->nents,
-                                               dma_dir);
-                               goto error_cmd;
-                       }
+static void nvme_complete_rq(struct request *req)
+{
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       struct nvme_dev *dev = iod->nvmeq->dev;
+       int error = 0;
 
-                       if (rq_data_dir(req))
-                               nvme_dif_remap(req, nvme_dif_prep);
+       nvme_unmap_data(dev, req);
 
-                       if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) {
-                               dma_unmap_sg(dev->dev, iod->sg, iod->nents,
-                                               dma_dir);
-                               goto error_cmd;
-                       }
+       if (unlikely(req->errors)) {
+               if (nvme_req_needs_retry(req, req->errors)) {
+                       nvme_requeue_req(req);
+                       return;
                }
-       }
 
-       nvme_set_info(cmd, iod, req_completion);
-       spin_lock_irq(&nvmeq->q_lock);
-       if (req->cmd_type == REQ_TYPE_DRV_PRIV)
-               nvme_submit_priv(nvmeq, req, iod);
-       else if (req->cmd_flags & REQ_DISCARD)
-               nvme_submit_discard(nvmeq, ns, req, iod);
-       else if (req->cmd_flags & REQ_FLUSH)
-               nvme_submit_flush(nvmeq, ns, req->tag);
-       else
-               nvme_submit_iod(nvmeq, iod, ns);
+               if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+                       error = req->errors;
+               else
+                       error = nvme_error_status(req->errors);
+       }
 
-       nvme_process_cq(nvmeq);
-       spin_unlock_irq(&nvmeq->q_lock);
-       return BLK_MQ_RQ_QUEUE_OK;
+       if (unlikely(iod->aborted)) {
+               dev_warn(dev->dev,
+                       "completing aborted command with status: %04x\n",
+                       req->errors);
+       }
 
- error_cmd:
-       nvme_free_iod(dev, iod);
-       return BLK_MQ_RQ_QUEUE_ERROR;
- retry_cmd:
-       nvme_free_iod(dev, iod);
-       return BLK_MQ_RQ_QUEUE_BUSY;
+       blk_mq_end_request(req, error);
 }
 
 static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
@@ -952,20 +729,47 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
        phase = nvmeq->cq_phase;
 
        for (;;) {
-               void *ctx;
-               nvme_completion_fn fn;
                struct nvme_completion cqe = nvmeq->cqes[head];
-               if ((le16_to_cpu(cqe.status) & 1) != phase)
+               u16 status = le16_to_cpu(cqe.status);
+               struct request *req;
+
+               if ((status & 1) != phase)
                        break;
                nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
                if (++head == nvmeq->q_depth) {
                        head = 0;
                        phase = !phase;
                }
+
                if (tag && *tag == cqe.command_id)
                        *tag = -1;
-               ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn);
-               fn(nvmeq, ctx, &cqe);
+
+               if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
+                       dev_warn(nvmeq->q_dmadev,
+                               "invalid id %d completed on queue %d\n",
+                               cqe.command_id, le16_to_cpu(cqe.sq_id));
+                       continue;
+               }
+
+               /*
+                * AEN requests are special as they don't time out and can
+                * survive any kind of queue freeze and often don't respond to
+                * aborts.  We don't even bother to allocate a struct request
+                * for them but rather special case them here.
+                */
+               if (unlikely(nvmeq->qid == 0 &&
+                               cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+                       nvme_complete_async_event(nvmeq->dev, &cqe);
+                       continue;
+               }
+
+               req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
+               if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+                       u32 result = le32_to_cpu(cqe.result);
+                       req->special = (void *)(uintptr_t)result;
+               }
+               blk_mq_complete_request(req, status >> 1);
+
        }
 
        /* If the controller ignores the cq head doorbell and continuously
@@ -1028,115 +832,18 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
        return 0;
 }
 
-/*
- * Returns 0 on success.  If the result is negative, it's a Linux error code;
- * if the result is positive, it's an NVM Express status code
- */
-int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-               void *buffer, void __user *ubuffer, unsigned bufflen,
-               u32 *result, unsigned timeout)
+static void nvme_submit_async_event(struct nvme_dev *dev)
 {
-       bool write = cmd->common.opcode & 1;
-       struct bio *bio = NULL;
-       struct request *req;
-       int ret;
-
-       req = blk_mq_alloc_request(q, write, 0);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->cmd_type = REQ_TYPE_DRV_PRIV;
-       req->cmd_flags |= REQ_FAILFAST_DRIVER;
-       req->__data_len = 0;
-       req->__sector = (sector_t) -1;
-       req->bio = req->biotail = NULL;
-
-       req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
-
-       req->cmd = (unsigned char *)cmd;
-       req->cmd_len = sizeof(struct nvme_command);
-       req->special = (void *)0;
-
-       if (buffer && bufflen) {
-               ret = blk_rq_map_kern(q, req, buffer, bufflen,
-                                     __GFP_DIRECT_RECLAIM);
-               if (ret)
-                       goto out;
-       } else if (ubuffer && bufflen) {
-               ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
-                                     __GFP_DIRECT_RECLAIM);
-               if (ret)
-                       goto out;
-               bio = req->bio;
-       }
+       struct nvme_command c;
 
-       blk_execute_rq(req->q, NULL, req, 0);
-       if (bio)
-               blk_rq_unmap_user(bio);
-       if (result)
-               *result = (u32)(uintptr_t)req->special;
-       ret = req->errors;
- out:
-       blk_mq_free_request(req);
-       return ret;
-}
+       memset(&c, 0, sizeof(c));
+       c.common.opcode = nvme_admin_async_event;
+       c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit;
 
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-               void *buffer, unsigned bufflen)
-{
-       return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
+       __nvme_submit_cmd(dev->queues[0], &c);
 }
 
-static int nvme_submit_async_admin_req(struct nvme_dev *dev)
-{
-       struct nvme_queue *nvmeq = dev->queues[0];
-       struct nvme_command c;
-       struct nvme_cmd_info *cmd_info;
-       struct request *req;
-
-       req = blk_mq_alloc_request(dev->admin_q, WRITE,
-                       BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->cmd_flags |= REQ_NO_TIMEOUT;
-       cmd_info = blk_mq_rq_to_pdu(req);
-       nvme_set_info(cmd_info, NULL, async_req_completion);
-
-       memset(&c, 0, sizeof(c));
-       c.common.opcode = nvme_admin_async_event;
-       c.common.command_id = req->tag;
-
-       blk_mq_free_request(req);
-       __nvme_submit_cmd(nvmeq, &c);
-       return 0;
-}
-
-static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
-                       struct nvme_command *cmd,
-                       struct async_cmd_info *cmdinfo, unsigned timeout)
-{
-       struct nvme_queue *nvmeq = dev->queues[0];
-       struct request *req;
-       struct nvme_cmd_info *cmd_rq;
-
-       req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->timeout = timeout;
-       cmd_rq = blk_mq_rq_to_pdu(req);
-       cmdinfo->req = req;
-       nvme_set_info(cmd_rq, cmdinfo, async_completion);
-       cmdinfo->status = -EINTR;
-
-       cmd->common.command_id = req->tag;
-
-       nvme_submit_cmd(nvmeq, cmd);
-       return 0;
-}
-
-static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
+static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
        struct nvme_command c;
 
@@ -1144,7 +851,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
        c.delete_queue.opcode = opcode;
        c.delete_queue.qid = cpu_to_le16(id);
 
-       return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+       return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1165,7 +872,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
        c.create_cq.cq_flags = cpu_to_le16(flags);
        c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 
-       return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+       return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1186,7 +893,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
        c.create_sq.sq_flags = cpu_to_le16(flags);
        c.create_sq.cqid = cpu_to_le16(qid);
 
-       return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+       return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1199,195 +906,111 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
        return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
-{
-       struct nvme_command c = { };
-       int error;
-
-       /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
-       c.identify.opcode = nvme_admin_identify;
-       c.identify.cns = cpu_to_le32(1);
-
-       *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
-       if (!*id)
-               return -ENOMEM;
-
-       error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
-                       sizeof(struct nvme_id_ctrl));
-       if (error)
-               kfree(*id);
-       return error;
-}
-
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
-               struct nvme_id_ns **id)
-{
-       struct nvme_command c = { };
-       int error;
-
-       /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
-       c.identify.opcode = nvme_admin_identify,
-       c.identify.nsid = cpu_to_le32(nsid),
-
-       *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
-       if (!*id)
-               return -ENOMEM;
-
-       error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
-                       sizeof(struct nvme_id_ns));
-       if (error)
-               kfree(*id);
-       return error;
-}
-
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
-                                       dma_addr_t dma_addr, u32 *result)
-{
-       struct nvme_command c;
-
-       memset(&c, 0, sizeof(c));
-       c.features.opcode = nvme_admin_get_features;
-       c.features.nsid = cpu_to_le32(nsid);
-       c.features.prp1 = cpu_to_le64(dma_addr);
-       c.features.fid = cpu_to_le32(fid);
-
-       return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
-                       result, 0);
-}
-
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
-                                       dma_addr_t dma_addr, u32 *result)
-{
-       struct nvme_command c;
-
-       memset(&c, 0, sizeof(c));
-       c.features.opcode = nvme_admin_set_features;
-       c.features.prp1 = cpu_to_le64(dma_addr);
-       c.features.fid = cpu_to_le32(fid);
-       c.features.dword11 = cpu_to_le32(dword11);
-
-       return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
-                       result, 0);
-}
-
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+static void abort_endio(struct request *req, int error)
 {
-       struct nvme_command c = { };
-       int error;
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       struct nvme_queue *nvmeq = iod->nvmeq;
+       u32 result = (u32)(uintptr_t)req->special;
+       u16 status = req->errors;
 
-       c.common.opcode = nvme_admin_get_log_page,
-       c.common.nsid = cpu_to_le32(0xFFFFFFFF),
-       c.common.cdw10[0] = cpu_to_le32(
-                       (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
-                        NVME_LOG_SMART),
-
-       *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
-       if (!*log)
-               return -ENOMEM;
+       dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+       atomic_inc(&nvmeq->dev->ctrl.abort_limit);
 
-       error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
-                       sizeof(struct nvme_smart_log));
-       if (error)
-               kfree(*log);
-       return error;
+       blk_mq_free_request(req);
 }
 
-/**
- * nvme_abort_req - Attempt aborting a request
- *
- * Schedule controller reset if the command was already aborted once before and
- * still hasn't been returned to the driver, or if this is the admin queue.
- */
-static void nvme_abort_req(struct request *req)
+static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 {
-       struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
-       struct nvme_queue *nvmeq = cmd_rq->nvmeq;
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       struct nvme_queue *nvmeq = iod->nvmeq;
        struct nvme_dev *dev = nvmeq->dev;
        struct request *abort_req;
-       struct nvme_cmd_info *abort_cmd;
        struct nvme_command cmd;
 
-       if (!nvmeq->qid || cmd_rq->aborted) {
-               spin_lock(&dev_list_lock);
-               if (!__nvme_reset(dev)) {
-                       dev_warn(dev->dev,
-                                "I/O %d QID %d timeout, reset controller\n",
-                                req->tag, nvmeq->qid);
-               }
-               spin_unlock(&dev_list_lock);
-               return;
+       /*
+        * Shutdown immediately if controller times out while starting. The
+        * reset work will see the pci device disabled when it gets the forced
+        * cancellation error. All outstanding requests are completed on
+        * shutdown, so we return BLK_EH_HANDLED.
+        */
+       if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
+               dev_warn(dev->dev,
+                        "I/O %d QID %d timeout, disable controller\n",
+                        req->tag, nvmeq->qid);
+               nvme_dev_disable(dev, false);
+               req->errors = NVME_SC_CANCELLED;
+               return BLK_EH_HANDLED;
        }
 
-       if (!dev->abort_limit)
-               return;
+       /*
+        * Shutdown the controller immediately and schedule a reset if the
+        * command was already aborted once before and still hasn't been
+        * returned to the driver, or if this is the admin queue.
+        */
+       if (!nvmeq->qid || iod->aborted) {
+               dev_warn(dev->dev,
+                        "I/O %d QID %d timeout, reset controller\n",
+                        req->tag, nvmeq->qid);
+               nvme_dev_disable(dev, false);
+               queue_work(nvme_workq, &dev->reset_work);
 
-       abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
-                       BLK_MQ_REQ_NOWAIT);
-       if (IS_ERR(abort_req))
-               return;
+               /*
+                * Mark the request as handled, since the inline shutdown
+                * forces all outstanding requests to complete.
+                */
+               req->errors = NVME_SC_CANCELLED;
+               return BLK_EH_HANDLED;
+       }
+
+       iod->aborted = 1;
 
-       abort_cmd = blk_mq_rq_to_pdu(abort_req);
-       nvme_set_info(abort_cmd, abort_req, abort_completion);
+       if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
+               atomic_inc(&dev->ctrl.abort_limit);
+               return BLK_EH_RESET_TIMER;
+       }
 
        memset(&cmd, 0, sizeof(cmd));
        cmd.abort.opcode = nvme_admin_abort_cmd;
        cmd.abort.cid = req->tag;
        cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
-       cmd.abort.command_id = abort_req->tag;
 
-       --dev->abort_limit;
-       cmd_rq->aborted = 1;
+       dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
+                                req->tag, nvmeq->qid);
+
+       abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
+                       BLK_MQ_REQ_NOWAIT);
+       if (IS_ERR(abort_req)) {
+               atomic_inc(&dev->ctrl.abort_limit);
+               return BLK_EH_RESET_TIMER;
+       }
+
+       abort_req->timeout = ADMIN_TIMEOUT;
+       abort_req->end_io_data = NULL;
+       blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
 
-       dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
-                                                       nvmeq->qid);
-       nvme_submit_cmd(dev->queues[0], &cmd);
+       /*
+        * The aborted req will be completed on receiving the abort req.
+        * We enable the timer again. If hit twice, it'll cause a device reset,
+        * as the device then is in a faulty state.
+        */
+       return BLK_EH_RESET_TIMER;
 }
 
 static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
 {
        struct nvme_queue *nvmeq = data;
-       void *ctx;
-       nvme_completion_fn fn;
-       struct nvme_cmd_info *cmd;
-       struct nvme_completion cqe;
+       int status;
 
        if (!blk_mq_request_started(req))
                return;
 
-       cmd = blk_mq_rq_to_pdu(req);
-
-       if (cmd->ctx == CMD_CTX_CANCELLED)
-               return;
+       dev_dbg_ratelimited(nvmeq->q_dmadev,
+                "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
 
+       status = NVME_SC_ABORT_REQ;
        if (blk_queue_dying(req->q))
-               cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
-       else
-               cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
-
-
-       dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
-                                               req->tag, nvmeq->qid);
-       ctx = cancel_cmd_info(cmd, &fn);
-       fn(nvmeq, ctx, &cqe);
-}
-
-static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
-{
-       struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
-       struct nvme_queue *nvmeq = cmd->nvmeq;
-
-       dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
-                                                       nvmeq->qid);
-       spin_lock_irq(&nvmeq->q_lock);
-       nvme_abort_req(req);
-       spin_unlock_irq(&nvmeq->q_lock);
-
-       /*
-        * The aborted req will be completed on receiving the abort req.
-        * We enable the timer again. If hit twice, it'll cause a device reset,
-        * as the device then is in a faulty state.
-        */
-       return BLK_EH_RESET_TIMER;
+               status |= NVME_SC_DNR;
+       blk_mq_complete_request(req, status);
 }
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1430,8 +1053,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
        nvmeq->cq_vector = -1;
        spin_unlock_irq(&nvmeq->q_lock);
 
-       if (!nvmeq->qid && nvmeq->dev->admin_q)
-               blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
+       if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
+               blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
 
        irq_set_affinity_hint(vector, NULL);
        free_irq(vector, nvmeq);
@@ -1447,21 +1070,20 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq)
        spin_unlock_irq(&nvmeq->q_lock);
 }
 
-static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 {
-       struct nvme_queue *nvmeq = dev->queues[qid];
+       struct nvme_queue *nvmeq = dev->queues[0];
 
        if (!nvmeq)
                return;
        if (nvme_suspend_queue(nvmeq))
                return;
 
-       /* Don't tell the adapter to delete the admin queue.
-        * Don't tell a removed adapter to delete IO queues. */
-       if (qid && readl(&dev->bar->csts) != -1) {
-               adapter_delete_sq(dev, qid);
-               adapter_delete_cq(dev, qid);
-       }
+       if (shutdown)
+               nvme_shutdown_ctrl(&dev->ctrl);
+       else
+               nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
+                                               dev->bar + NVME_REG_CAP));
 
        spin_lock_irq(&nvmeq->q_lock);
        nvme_process_cq(nvmeq);
@@ -1472,11 +1094,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
                                int entry_size)
 {
        int q_depth = dev->q_depth;
-       unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
+       unsigned q_size_aligned = roundup(q_depth * entry_size,
+                                         dev->ctrl.page_size);
 
        if (q_size_aligned * nr_io_queues > dev->cmb_size) {
                u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
-               mem_per_q = round_down(mem_per_q, dev->page_size);
+               mem_per_q = round_down(mem_per_q, dev->ctrl.page_size);
                q_depth = div_u64(mem_per_q, entry_size);
 
                /*
@@ -1495,8 +1118,8 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
                                int qid, int depth)
 {
        if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
-               unsigned offset = (qid - 1) *
-                                       roundup(SQ_SIZE(depth), dev->page_size);
+               unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
+                                                     dev->ctrl.page_size);
                nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
                nvmeq->sq_cmds_io = dev->cmb + offset;
        } else {
@@ -1527,7 +1150,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
        nvmeq->q_dmadev = dev->dev;
        nvmeq->dev = dev;
        snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
-                       dev->instance, qid);
+                       dev->ctrl.instance, qid);
        spin_lock_init(&nvmeq->q_lock);
        nvmeq->cq_head = 0;
        nvmeq->cq_phase = 1;
@@ -1604,79 +1227,9 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
        return result;
 }
 
-static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
-{
-       unsigned long timeout;
-       u32 bit = enabled ? NVME_CSTS_RDY : 0;
-
-       timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
-
-       while ((readl(&dev->bar->csts) & NVME_CSTS_RDY) != bit) {
-               msleep(100);
-               if (fatal_signal_pending(current))
-                       return -EINTR;
-               if (time_after(jiffies, timeout)) {
-                       dev_err(dev->dev,
-                               "Device not ready; aborting %s\n", enabled ?
-                                               "initialisation" : "reset");
-                       return -ENODEV;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * If the device has been passed off to us in an enabled state, just clear
- * the enabled bit.  The spec says we should set the 'shutdown notification
- * bits', but doing so may cause the device to complete commands to the
- * admin queue ... and we don't know what memory that might be pointing at!
- */
-static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap)
-{
-       dev->ctrl_config &= ~NVME_CC_SHN_MASK;
-       dev->ctrl_config &= ~NVME_CC_ENABLE;
-       writel(dev->ctrl_config, &dev->bar->cc);
-
-       return nvme_wait_ready(dev, cap, false);
-}
-
-static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
-{
-       dev->ctrl_config &= ~NVME_CC_SHN_MASK;
-       dev->ctrl_config |= NVME_CC_ENABLE;
-       writel(dev->ctrl_config, &dev->bar->cc);
-
-       return nvme_wait_ready(dev, cap, true);
-}
-
-static int nvme_shutdown_ctrl(struct nvme_dev *dev)
-{
-       unsigned long timeout;
-
-       dev->ctrl_config &= ~NVME_CC_SHN_MASK;
-       dev->ctrl_config |= NVME_CC_SHN_NORMAL;
-
-       writel(dev->ctrl_config, &dev->bar->cc);
-
-       timeout = SHUTDOWN_TIMEOUT + jiffies;
-       while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
-                                                       NVME_CSTS_SHST_CMPLT) {
-               msleep(100);
-               if (fatal_signal_pending(current))
-                       return -EINTR;
-               if (time_after(jiffies, timeout)) {
-                       dev_err(dev->dev,
-                               "Device shutdown incomplete; abort shutdown\n");
-                       return -ENODEV;
-               }
-       }
-
-       return 0;
-}
-
 static struct blk_mq_ops nvme_mq_admin_ops = {
        .queue_rq       = nvme_queue_rq,
+       .complete       = nvme_complete_rq,
        .map_queue      = blk_mq_map_queue,
        .init_hctx      = nvme_admin_init_hctx,
        .exit_hctx      = nvme_admin_exit_hctx,
@@ -1686,6 +1239,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
 
 static struct blk_mq_ops nvme_mq_ops = {
        .queue_rq       = nvme_queue_rq,
+       .complete       = nvme_complete_rq,
        .map_queue      = blk_mq_map_queue,
        .init_hctx      = nvme_init_hctx,
        .init_request   = nvme_init_request,
@@ -1695,19 +1249,23 @@ static struct blk_mq_ops nvme_mq_ops = {
 
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
-       if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
-               blk_cleanup_queue(dev->admin_q);
+       if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+               blk_cleanup_queue(dev->ctrl.admin_q);
                blk_mq_free_tag_set(&dev->admin_tagset);
        }
 }
 
 static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 {
-       if (!dev->admin_q) {
+       if (!dev->ctrl.admin_q) {
                dev->admin_tagset.ops = &nvme_mq_admin_ops;
                dev->admin_tagset.nr_hw_queues = 1;
-               dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
-               dev->admin_tagset.reserved_tags = 1;
+
+               /*
+                * Subtract one to leave an empty queue entry for 'Full Queue'
+                * condition. See NVM-Express 1.2 specification, section 4.1.2.
+                */
+               dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
                dev->admin_tagset.timeout = ADMIN_TIMEOUT;
                dev->admin_tagset.numa_node = dev_to_node(dev->dev);
                dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
@@ -1716,18 +1274,18 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
                if (blk_mq_alloc_tag_set(&dev->admin_tagset))
                        return -ENOMEM;
 
-               dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
-               if (IS_ERR(dev->admin_q)) {
+               dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
+               if (IS_ERR(dev->ctrl.admin_q)) {
                        blk_mq_free_tag_set(&dev->admin_tagset);
                        return -ENOMEM;
                }
-               if (!blk_get_queue(dev->admin_q)) {
+               if (!blk_get_queue(dev->ctrl.admin_q)) {
                        nvme_dev_remove_admin(dev);
-                       dev->admin_q = NULL;
+                       dev->ctrl.admin_q = NULL;
                        return -ENODEV;
                }
        } else
-               blk_mq_unfreeze_queue(dev->admin_q);
+               blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
 
        return 0;
 }
@@ -1736,31 +1294,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
        int result;
        u32 aqa;
-       u64 cap = lo_hi_readq(&dev->bar->cap);
+       u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
        struct nvme_queue *nvmeq;
-       /*
-        * default to a 4K page size, with the intention to update this
-        * path in the future to accomodate architectures with differing
-        * kernel and IO page sizes.
-        */
-       unsigned page_shift = 12;
-       unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
-
-       if (page_shift < dev_page_min) {
-               dev_err(dev->dev,
-                               "Minimum device page size (%u) too large for "
-                               "host (%u)\n", 1 << dev_page_min,
-                               1 << page_shift);
-               return -ENODEV;
-       }
 
-       dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
+       dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ?
                                                NVME_CAP_NSSRC(cap) : 0;
 
-       if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
-               writel(NVME_CSTS_NSSRO, &dev->bar->csts);
+       if (dev->subsystem &&
+           (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
+               writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
 
-       result = nvme_disable_ctrl(dev, cap);
+       result = nvme_disable_ctrl(&dev->ctrl, cap);
        if (result < 0)
                return result;
 
@@ -1774,18 +1318,11 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        aqa = nvmeq->q_depth - 1;
        aqa |= aqa << 16;
 
-       dev->page_size = 1 << page_shift;
-
-       dev->ctrl_config = NVME_CC_CSS_NVM;
-       dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
-       dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
-       dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+       writel(aqa, dev->bar + NVME_REG_AQA);
+       lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
+       lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
 
-       writel(aqa, &dev->bar->aqa);
-       lo_hi_writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
-       lo_hi_writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
-
-       result = nvme_enable_ctrl(dev, cap);
+       result = nvme_enable_ctrl(&dev->ctrl, cap);
        if (result)
                goto free_nvmeq;
 
@@ -1803,615 +1340,124 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        return result;
 }
 
-static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+static int nvme_kthread(void *data)
 {
-       struct nvme_dev *dev = ns->dev;
-       struct nvme_user_io io;
-       struct nvme_command c;
-       unsigned length, meta_len;
-       int status, write;
-       dma_addr_t meta_dma = 0;
-       void *meta = NULL;
-       void __user *metadata;
-
-       if (copy_from_user(&io, uio, sizeof(io)))
-               return -EFAULT;
-
-       switch (io.opcode) {
-       case nvme_cmd_write:
-       case nvme_cmd_read:
-       case nvme_cmd_compare:
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       length = (io.nblocks + 1) << ns->lba_shift;
-       meta_len = (io.nblocks + 1) * ns->ms;
-       metadata = (void __user *)(uintptr_t)io.metadata;
-       write = io.opcode & 1;
+       struct nvme_dev *dev, *next;
 
-       if (ns->ext) {
-               length += meta_len;
-               meta_len = 0;
-       }
-       if (meta_len) {
-               if (((io.metadata & 3) || !io.metadata) && !ns->ext)
-                       return -EINVAL;
+       while (!kthread_should_stop()) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               spin_lock(&dev_list_lock);
+               list_for_each_entry_safe(dev, next, &dev_list, node) {
+                       int i;
+                       u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
-               meta = dma_alloc_coherent(dev->dev, meta_len,
-                                               &meta_dma, GFP_KERNEL);
+                       /*
+                        * Skip controllers currently under reset.
+                        */
+                       if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work))
+                               continue;
 
-               if (!meta) {
-                       status = -ENOMEM;
-                       goto unmap;
-               }
-               if (write) {
-                       if (copy_from_user(meta, metadata, meta_len)) {
-                               status = -EFAULT;
-                               goto unmap;
+                       if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
+                                                       csts & NVME_CSTS_CFS) {
+                               if (queue_work(nvme_workq, &dev->reset_work)) {
+                                       dev_warn(dev->dev,
+                                               "Failed status: %x, reset controller\n",
+                                               readl(dev->bar + NVME_REG_CSTS));
+                               }
+                               continue;
                        }
-               }
-       }
+                       for (i = 0; i < dev->queue_count; i++) {
+                               struct nvme_queue *nvmeq = dev->queues[i];
+                               if (!nvmeq)
+                                       continue;
+                               spin_lock_irq(&nvmeq->q_lock);
+                               nvme_process_cq(nvmeq);
 
-       memset(&c, 0, sizeof(c));
-       c.rw.opcode = io.opcode;
-       c.rw.flags = io.flags;
-       c.rw.nsid = cpu_to_le32(ns->ns_id);
-       c.rw.slba = cpu_to_le64(io.slba);
-       c.rw.length = cpu_to_le16(io.nblocks);
-       c.rw.control = cpu_to_le16(io.control);
-       c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
-       c.rw.reftag = cpu_to_le32(io.reftag);
-       c.rw.apptag = cpu_to_le16(io.apptag);
-       c.rw.appmask = cpu_to_le16(io.appmask);
-       c.rw.metadata = cpu_to_le64(meta_dma);
-
-       status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
-                       (void __user *)(uintptr_t)io.addr, length, NULL, 0);
- unmap:
-       if (meta) {
-               if (status == NVME_SC_SUCCESS && !write) {
-                       if (copy_to_user(metadata, meta, meta_len))
-                               status = -EFAULT;
+                               while (i == 0 && dev->ctrl.event_limit > 0)
+                                       nvme_submit_async_event(dev);
+                               spin_unlock_irq(&nvmeq->q_lock);
+                       }
                }
-               dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
+               spin_unlock(&dev_list_lock);
+               schedule_timeout(round_jiffies_relative(HZ));
        }
-       return status;
+       return 0;
 }
 
-static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
-                       struct nvme_passthru_cmd __user *ucmd)
+static int nvme_create_io_queues(struct nvme_dev *dev)
 {
-       struct nvme_passthru_cmd cmd;
-       struct nvme_command c;
-       unsigned timeout = 0;
-       int status;
+       unsigned i;
+       int ret = 0;
 
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-       if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
-               return -EFAULT;
+       for (i = dev->queue_count; i <= dev->max_qid; i++) {
+               if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+                       ret = -ENOMEM;
+                       break;
+               }
+       }
 
-       memset(&c, 0, sizeof(c));
-       c.common.opcode = cmd.opcode;
-       c.common.flags = cmd.flags;
-       c.common.nsid = cpu_to_le32(cmd.nsid);
-       c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
-       c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
-       c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
-       c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
-       c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
-       c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
-       c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
-       c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
-
-       if (cmd.timeout_ms)
-               timeout = msecs_to_jiffies(cmd.timeout_ms);
-
-       status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
-                       NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
-                       &cmd.result, timeout);
-       if (status >= 0) {
-               if (put_user(cmd.result, &ucmd->result))
-                       return -EFAULT;
+       for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
+               ret = nvme_create_queue(dev->queues[i], i);
+               if (ret) {
+                       nvme_free_queues(dev, i);
+                       break;
+               }
        }
 
-       return status;
+       /*
+        * Ignore failing Create SQ/CQ commands, we can continue with less
+        * than the desired aount of queues, and even a controller without
+        * I/O queues an still be used to issue admin commands.  This might
+        * be useful to upgrade a buggy firmware for example.
+        */
+       return ret >= 0 ? 0 : ret;
 }
 
-static int nvme_subsys_reset(struct nvme_dev *dev)
+static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 {
-       if (!dev->subsystem)
-               return -ENOTTY;
-
-       writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
-       return 0;
-}
-
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
-                                                       unsigned long arg)
-{
-       struct nvme_ns *ns = bdev->bd_disk->private_data;
-
-       switch (cmd) {
-       case NVME_IOCTL_ID:
-               force_successful_syscall_return();
-               return ns->ns_id;
-       case NVME_IOCTL_ADMIN_CMD:
-               return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
-       case NVME_IOCTL_IO_CMD:
-               return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
-       case NVME_IOCTL_SUBMIT_IO:
-               return nvme_submit_io(ns, (void __user *)arg);
-       case SG_GET_VERSION_NUM:
-               return nvme_sg_get_version_num((void __user *)arg);
-       case SG_IO:
-               return nvme_sg_io(ns, (void __user *)arg);
-       default:
-               return -ENOTTY;
-       }
-}
+       u64 szu, size, offset;
+       u32 cmbloc;
+       resource_size_t bar_size;
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
+       void __iomem *cmb;
+       dma_addr_t dma_addr;
 
-#ifdef CONFIG_COMPAT
-static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
-                                       unsigned int cmd, unsigned long arg)
-{
-       switch (cmd) {
-       case SG_IO:
-               return -ENOIOCTLCMD;
-       }
-       return nvme_ioctl(bdev, mode, cmd, arg);
-}
-#else
-#define nvme_compat_ioctl      NULL
-#endif
+       if (!use_cmb_sqes)
+               return NULL;
 
-static void nvme_free_dev(struct kref *kref);
-static void nvme_free_ns(struct kref *kref)
-{
-       struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+       dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
+       if (!(NVME_CMB_SZ(dev->cmbsz)))
+               return NULL;
 
-       if (ns->type == NVME_NS_LIGHTNVM)
-               nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
+       cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
 
-       spin_lock(&dev_list_lock);
-       ns->disk->private_data = NULL;
-       spin_unlock(&dev_list_lock);
+       szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
+       size = szu * NVME_CMB_SZ(dev->cmbsz);
+       offset = szu * NVME_CMB_OFST(cmbloc);
+       bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
 
-       kref_put(&ns->dev->kref, nvme_free_dev);
-       put_disk(ns->disk);
-       kfree(ns);
-}
+       if (offset > bar_size)
+               return NULL;
 
-static int nvme_open(struct block_device *bdev, fmode_t mode)
-{
-       int ret = 0;
-       struct nvme_ns *ns;
+       /*
+        * Controllers may support a CMB size larger than their BAR,
+        * for example, due to being behind a bridge. Reduce the CMB to
+        * the reported size of the BAR
+        */
+       if (size > bar_size - offset)
+               size = bar_size - offset;
 
-       spin_lock(&dev_list_lock);
-       ns = bdev->bd_disk->private_data;
-       if (!ns)
-               ret = -ENXIO;
-       else if (!kref_get_unless_zero(&ns->kref))
-               ret = -ENXIO;
-       spin_unlock(&dev_list_lock);
+       dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
+       cmb = ioremap_wc(dma_addr, size);
+       if (!cmb)
+               return NULL;
 
-       return ret;
+       dev->cmb_dma_addr = dma_addr;
+       dev->cmb_size = size;
+       return cmb;
 }
 
-static void nvme_release(struct gendisk *disk, fmode_t mode)
-{
-       struct nvme_ns *ns = disk->private_data;
-       kref_put(&ns->kref, nvme_free_ns);
-}
-
-static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
-{
-       /* some standard values */
-       geo->heads = 1 << 6;
-       geo->sectors = 1 << 5;
-       geo->cylinders = get_capacity(bd->bd_disk) >> 11;
-       return 0;
-}
-
-static void nvme_config_discard(struct nvme_ns *ns)
-{
-       u32 logical_block_size = queue_logical_block_size(ns->queue);
-       ns->queue->limits.discard_zeroes_data = 0;
-       ns->queue->limits.discard_alignment = logical_block_size;
-       ns->queue->limits.discard_granularity = logical_block_size;
-       blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
-       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
-}
-
-static int nvme_revalidate_disk(struct gendisk *disk)
-{
-       struct nvme_ns *ns = disk->private_data;
-       struct nvme_dev *dev = ns->dev;
-       struct nvme_id_ns *id;
-       u8 lbaf, pi_type;
-       u16 old_ms;
-       unsigned short bs;
-
-       if (nvme_identify_ns(dev, ns->ns_id, &id)) {
-               dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
-                                               dev->instance, ns->ns_id);
-               return -ENODEV;
-       }
-       if (id->ncap == 0) {
-               kfree(id);
-               return -ENODEV;
-       }
-
-       if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
-               if (nvme_nvm_register(ns->queue, disk->disk_name)) {
-                       dev_warn(dev->dev,
-                               "%s: LightNVM init failure\n", __func__);
-                       kfree(id);
-                       return -ENODEV;
-               }
-               ns->type = NVME_NS_LIGHTNVM;
-       }
-
-       old_ms = ns->ms;
-       lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
-       ns->lba_shift = id->lbaf[lbaf].ds;
-       ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
-       ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
-
-       /*
-        * If identify namespace failed, use default 512 byte block size so
-        * block layer can use before failing read/write for 0 capacity.
-        */
-       if (ns->lba_shift == 0)
-               ns->lba_shift = 9;
-       bs = 1 << ns->lba_shift;
-
-       /* XXX: PI implementation requires metadata equal t10 pi tuple size */
-       pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
-                                       id->dps & NVME_NS_DPS_PI_MASK : 0;
-
-       blk_mq_freeze_queue(disk->queue);
-       if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
-                               ns->ms != old_ms ||
-                               bs != queue_logical_block_size(disk->queue) ||
-                               (ns->ms && ns->ext)))
-               blk_integrity_unregister(disk);
-
-       ns->pi_type = pi_type;
-       blk_queue_logical_block_size(ns->queue, bs);
-
-       if (ns->ms && !ns->ext)
-               nvme_init_integrity(ns);
-
-       if ((ns->ms && !(ns->ms == 8 && ns->pi_type) &&
-                                               !blk_get_integrity(disk)) ||
-                                               ns->type == NVME_NS_LIGHTNVM)
-               set_capacity(disk, 0);
-       else
-               set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
-
-       if (dev->oncs & NVME_CTRL_ONCS_DSM)
-               nvme_config_discard(ns);
-       blk_mq_unfreeze_queue(disk->queue);
-
-       kfree(id);
-       return 0;
-}
-
-static char nvme_pr_type(enum pr_type type)
-{
-       switch (type) {
-       case PR_WRITE_EXCLUSIVE:
-               return 1;
-       case PR_EXCLUSIVE_ACCESS:
-               return 2;
-       case PR_WRITE_EXCLUSIVE_REG_ONLY:
-               return 3;
-       case PR_EXCLUSIVE_ACCESS_REG_ONLY:
-               return 4;
-       case PR_WRITE_EXCLUSIVE_ALL_REGS:
-               return 5;
-       case PR_EXCLUSIVE_ACCESS_ALL_REGS:
-               return 6;
-       default:
-               return 0;
-       }
-};
-
-static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
-                               u64 key, u64 sa_key, u8 op)
-{
-       struct nvme_ns *ns = bdev->bd_disk->private_data;
-       struct nvme_command c;
-       u8 data[16] = { 0, };
-
-       put_unaligned_le64(key, &data[0]);
-       put_unaligned_le64(sa_key, &data[8]);
-
-       memset(&c, 0, sizeof(c));
-       c.common.opcode = op;
-       c.common.nsid = cpu_to_le32(ns->ns_id);
-       c.common.cdw10[0] = cpu_to_le32(cdw10);
-
-       return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
-}
-
-static int nvme_pr_register(struct block_device *bdev, u64 old,
-               u64 new, unsigned flags)
-{
-       u32 cdw10;
-
-       if (flags & ~PR_FL_IGNORE_KEY)
-               return -EOPNOTSUPP;
-
-       cdw10 = old ? 2 : 0;
-       cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
-       cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
-       return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_reserve(struct block_device *bdev, u64 key,
-               enum pr_type type, unsigned flags)
-{
-       u32 cdw10;
-
-       if (flags & ~PR_FL_IGNORE_KEY)
-               return -EOPNOTSUPP;
-
-       cdw10 = nvme_pr_type(type) << 8;
-       cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
-       return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
-               enum pr_type type, bool abort)
-{
-       u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
-       return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_clear(struct block_device *bdev, u64 key)
-{
-       u32 cdw10 = 1 | (key ? 1 << 3 : 0);
-       return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
-{
-       u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
-       return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-static const struct pr_ops nvme_pr_ops = {
-       .pr_register    = nvme_pr_register,
-       .pr_reserve     = nvme_pr_reserve,
-       .pr_release     = nvme_pr_release,
-       .pr_preempt     = nvme_pr_preempt,
-       .pr_clear       = nvme_pr_clear,
-};
-
-static const struct block_device_operations nvme_fops = {
-       .owner          = THIS_MODULE,
-       .ioctl          = nvme_ioctl,
-       .compat_ioctl   = nvme_compat_ioctl,
-       .open           = nvme_open,
-       .release        = nvme_release,
-       .getgeo         = nvme_getgeo,
-       .revalidate_disk= nvme_revalidate_disk,
-       .pr_ops         = &nvme_pr_ops,
-};
-
-static int nvme_kthread(void *data)
-{
-       struct nvme_dev *dev, *next;
-
-       while (!kthread_should_stop()) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               spin_lock(&dev_list_lock);
-               list_for_each_entry_safe(dev, next, &dev_list, node) {
-                       int i;
-                       u32 csts = readl(&dev->bar->csts);
-
-                       if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
-                                                       csts & NVME_CSTS_CFS) {
-                               if (!__nvme_reset(dev)) {
-                                       dev_warn(dev->dev,
-                                               "Failed status: %x, reset controller\n",
-                                               readl(&dev->bar->csts));
-                               }
-                               continue;
-                       }
-                       for (i = 0; i < dev->queue_count; i++) {
-                               struct nvme_queue *nvmeq = dev->queues[i];
-                               if (!nvmeq)
-                                       continue;
-                               spin_lock_irq(&nvmeq->q_lock);
-                               nvme_process_cq(nvmeq);
-
-                               while ((i == 0) && (dev->event_limit > 0)) {
-                                       if (nvme_submit_async_admin_req(dev))
-                                               break;
-                                       dev->event_limit--;
-                               }
-                               spin_unlock_irq(&nvmeq->q_lock);
-                       }
-               }
-               spin_unlock(&dev_list_lock);
-               schedule_timeout(round_jiffies_relative(HZ));
-       }
-       return 0;
-}
-
-static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
-{
-       struct nvme_ns *ns;
-       struct gendisk *disk;
-       int node = dev_to_node(dev->dev);
-
-       ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
-       if (!ns)
-               return;
-
-       ns->queue = blk_mq_init_queue(&dev->tagset);
-       if (IS_ERR(ns->queue))
-               goto out_free_ns;
-       queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
-       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
-       ns->dev = dev;
-       ns->queue->queuedata = ns;
-
-       disk = alloc_disk_node(0, node);
-       if (!disk)
-               goto out_free_queue;
-
-       kref_init(&ns->kref);
-       ns->ns_id = nsid;
-       ns->disk = disk;
-       ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
-       list_add_tail(&ns->list, &dev->namespaces);
-
-       blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
-       if (dev->max_hw_sectors) {
-               blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
-               blk_queue_max_segments(ns->queue,
-                       (dev->max_hw_sectors / (dev->page_size >> 9)) + 1);
-       }
-       if (dev->stripe_size)
-               blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
-       if (dev->vwc & NVME_CTRL_VWC_PRESENT)
-               blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
-       blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
-
-       disk->major = nvme_major;
-       disk->first_minor = 0;
-       disk->fops = &nvme_fops;
-       disk->private_data = ns;
-       disk->queue = ns->queue;
-       disk->driverfs_dev = dev->device;
-       disk->flags = GENHD_FL_EXT_DEVT;
-       sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
-
-       /*
-        * Initialize capacity to 0 until we establish the namespace format and
-        * setup integrity extentions if necessary. The revalidate_disk after
-        * add_disk allows the driver to register with integrity if the format
-        * requires it.
-        */
-       set_capacity(disk, 0);
-       if (nvme_revalidate_disk(ns->disk))
-               goto out_free_disk;
-
-       kref_get(&dev->kref);
-       if (ns->type != NVME_NS_LIGHTNVM) {
-               add_disk(ns->disk);
-               if (ns->ms) {
-                       struct block_device *bd = bdget_disk(ns->disk, 0);
-                       if (!bd)
-                               return;
-                       if (blkdev_get(bd, FMODE_READ, NULL)) {
-                               bdput(bd);
-                               return;
-                       }
-                       blkdev_reread_part(bd);
-                       blkdev_put(bd, FMODE_READ);
-               }
-       }
-       return;
- out_free_disk:
-       kfree(disk);
-       list_del(&ns->list);
- out_free_queue:
-       blk_cleanup_queue(ns->queue);
- out_free_ns:
-       kfree(ns);
-}
-
-/*
- * Create I/O queues.  Failing to create an I/O queue is not an issue,
- * we can continue with less than the desired amount of queues, and
- * even a controller without I/O queues an still be used to issue
- * admin commands.  This might be useful to upgrade a buggy firmware
- * for example.
- */
-static void nvme_create_io_queues(struct nvme_dev *dev)
-{
-       unsigned i;
-
-       for (i = dev->queue_count; i <= dev->max_qid; i++)
-               if (!nvme_alloc_queue(dev, i, dev->q_depth))
-                       break;
-
-       for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
-               if (nvme_create_queue(dev->queues[i], i)) {
-                       nvme_free_queues(dev, i);
-                       break;
-               }
-}
-
-static int set_queue_count(struct nvme_dev *dev, int count)
-{
-       int status;
-       u32 result;
-       u32 q_count = (count - 1) | ((count - 1) << 16);
-
-       status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
-                                                               &result);
-       if (status < 0)
-               return status;
-       if (status > 0) {
-               dev_err(dev->dev, "Could not set queue count (%d)\n", status);
-               return 0;
-       }
-       return min(result & 0xffff, result >> 16) + 1;
-}
-
-static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
-{
-       u64 szu, size, offset;
-       u32 cmbloc;
-       resource_size_t bar_size;
-       struct pci_dev *pdev = to_pci_dev(dev->dev);
-       void __iomem *cmb;
-       dma_addr_t dma_addr;
-
-       if (!use_cmb_sqes)
-               return NULL;
-
-       dev->cmbsz = readl(&dev->bar->cmbsz);
-       if (!(NVME_CMB_SZ(dev->cmbsz)))
-               return NULL;
-
-       cmbloc = readl(&dev->bar->cmbloc);
-
-       szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
-       size = szu * NVME_CMB_SZ(dev->cmbsz);
-       offset = szu * NVME_CMB_OFST(cmbloc);
-       bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
-
-       if (offset > bar_size)
-               return NULL;
-
-       /*
-        * Controllers may support a CMB size larger than their BAR,
-        * for example, due to being behind a bridge. Reduce the CMB to
-        * the reported size of the BAR
-        */
-       if (size > bar_size - offset)
-               size = bar_size - offset;
-
-       dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
-       cmb = ioremap_wc(dma_addr, size);
-       if (!cmb)
-               return NULL;
-
-       dev->cmb_dma_addr = dma_addr;
-       dev->cmb_size = size;
-       return cmb;
-}
-
-static inline void nvme_release_cmb(struct nvme_dev *dev)
+static inline void nvme_release_cmb(struct nvme_dev *dev)
 {
        if (dev->cmb) {
                iounmap(dev->cmb);
@@ -2431,11 +1477,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
        int result, i, vecs, nr_io_queues, size;
 
        nr_io_queues = num_possible_cpus();
-       result = set_queue_count(dev, nr_io_queues);
-       if (result <= 0)
+       result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
+       if (result < 0)
                return result;
-       if (result < nr_io_queues)
-               nr_io_queues = result;
+
+       /*
+        * Degraded controllers might return an error when setting the queue
+        * count.  We still want to be able to bring them online and offer
+        * access to the admin queue, as that might be only way to fix them up.
+        */
+       if (result > 0) {
+               dev_err(dev->dev, "Could not set queue count (%d)\n", result);
+               nr_io_queues = 0;
+               result = 0;
+       }
 
        if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
                result = nvme_cmb_qdepth(dev, nr_io_queues,
@@ -2457,7 +1512,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
                                return -ENOMEM;
                        size = db_bar_size(dev, nr_io_queues);
                } while (1);
-               dev->dbs = ((void __iomem *)dev->bar) + 4096;
+               dev->dbs = dev->bar + 4096;
                adminq->q_db = dev->dbs;
        }
 
@@ -2501,115 +1556,115 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 
        /* Free previously allocated queues that are no longer usable */
        nvme_free_queues(dev, nr_io_queues + 1);
-       nvme_create_io_queues(dev);
-
-       return 0;
+       return nvme_create_io_queues(dev);
 
  free_queues:
        nvme_free_queues(dev, 1);
        return result;
 }
 
-static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+static void nvme_set_irq_hints(struct nvme_dev *dev)
 {
-       struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
-       struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+       struct nvme_queue *nvmeq;
+       int i;
 
-       return nsa->ns_id - nsb->ns_id;
-}
+       for (i = 0; i < dev->online_queues; i++) {
+               nvmeq = dev->queues[i];
 
-static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
-{
-       struct nvme_ns *ns;
+               if (!nvmeq->tags || !(*nvmeq->tags))
+                       continue;
 
-       list_for_each_entry(ns, &dev->namespaces, list) {
-               if (ns->ns_id == nsid)
-                       return ns;
-               if (ns->ns_id > nsid)
-                       break;
+               irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
+                                       blk_mq_tags_cpumask(*nvmeq->tags));
        }
-       return NULL;
 }
 
-static inline bool nvme_io_incapable(struct nvme_dev *dev)
+static void nvme_dev_scan(struct work_struct *work)
 {
-       return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
-                                                       dev->online_queues < 2);
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+
+       if (!dev->tagset.tags)
+               return;
+       nvme_scan_namespaces(&dev->ctrl);
+       nvme_set_irq_hints(dev);
 }
 
-static void nvme_ns_remove(struct nvme_ns *ns)
+static void nvme_del_queue_end(struct request *req, int error)
 {
-       bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+       struct nvme_queue *nvmeq = req->end_io_data;
 
-       if (kill) {
-               blk_set_queue_dying(ns->queue);
-
-               /*
-                * The controller was shutdown first if we got here through
-                * device removal. The shutdown may requeue outstanding
-                * requests. These need to be aborted immediately so
-                * del_gendisk doesn't block indefinitely for their completion.
-                */
-               blk_mq_abort_requeue_list(ns->queue);
-       }
-       if (ns->disk->flags & GENHD_FL_UP)
-               del_gendisk(ns->disk);
-       if (kill || !blk_queue_dying(ns->queue)) {
-               blk_mq_abort_requeue_list(ns->queue);
-               blk_cleanup_queue(ns->queue);
-       }
-       list_del_init(&ns->list);
-       kref_put(&ns->kref, nvme_free_ns);
+       blk_mq_free_request(req);
+       complete(&nvmeq->dev->ioq_wait);
 }
 
-static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+static void nvme_del_cq_end(struct request *req, int error)
 {
-       struct nvme_ns *ns, *next;
-       unsigned i;
+       struct nvme_queue *nvmeq = req->end_io_data;
 
-       for (i = 1; i <= nn; i++) {
-               ns = nvme_find_ns(dev, i);
-               if (ns) {
-                       if (revalidate_disk(ns->disk))
-                               nvme_ns_remove(ns);
-               } else
-                       nvme_alloc_ns(dev, i);
-       }
-       list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-               if (ns->ns_id > nn)
-                       nvme_ns_remove(ns);
+       if (!error) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&nvmeq->q_lock, flags);
+               nvme_process_cq(nvmeq);
+               spin_unlock_irqrestore(&nvmeq->q_lock, flags);
        }
-       list_sort(NULL, &dev->namespaces, ns_cmp);
+
+       nvme_del_queue_end(req, error);
 }
 
-static void nvme_set_irq_hints(struct nvme_dev *dev)
+static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
 {
-       struct nvme_queue *nvmeq;
-       int i;
+       struct request_queue *q = nvmeq->dev->ctrl.admin_q;
+       struct request *req;
+       struct nvme_command cmd;
 
-       for (i = 0; i < dev->online_queues; i++) {
-               nvmeq = dev->queues[i];
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.delete_queue.opcode = opcode;
+       cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
 
-               if (!nvmeq->tags || !(*nvmeq->tags))
-                       continue;
+       req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
 
-               irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
-                                       blk_mq_tags_cpumask(*nvmeq->tags));
-       }
+       req->timeout = ADMIN_TIMEOUT;
+       req->end_io_data = nvmeq;
+
+       blk_execute_rq_nowait(q, NULL, req, false,
+                       opcode == nvme_admin_delete_cq ?
+                               nvme_del_cq_end : nvme_del_queue_end);
+       return 0;
 }
 
-static void nvme_dev_scan(struct work_struct *work)
+static void nvme_disable_io_queues(struct nvme_dev *dev)
 {
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
-       struct nvme_id_ctrl *ctrl;
+       int pass;
+       unsigned long timeout;
+       u8 opcode = nvme_admin_delete_sq;
 
-       if (!dev->tagset.tags)
-               return;
-       if (nvme_identify_ctrl(dev, &ctrl))
-               return;
-       nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
-       kfree(ctrl);
-       nvme_set_irq_hints(dev);
+       for (pass = 0; pass < 2; pass++) {
+               int sent = 0, i = dev->queue_count - 1;
+
+               reinit_completion(&dev->ioq_wait);
+ retry:
+               timeout = ADMIN_TIMEOUT;
+               for (; i > 0; i--) {
+                       struct nvme_queue *nvmeq = dev->queues[i];
+
+                       if (!pass)
+                               nvme_suspend_queue(nvmeq);
+                       if (nvme_delete_queue(nvmeq, opcode))
+                               break;
+                       ++sent;
+               }
+               while (sent--) {
+                       timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
+                       if (timeout == 0)
+                               return;
+                       if (i)
+                               goto retry;
+               }
+               opcode = nvme_admin_delete_cq;
+       }
 }
 
 /*
@@ -2620,42 +1675,7 @@ static void nvme_dev_scan(struct work_struct *work)
  */
 static int nvme_dev_add(struct nvme_dev *dev)
 {
-       struct pci_dev *pdev = to_pci_dev(dev->dev);
-       int res;
-       struct nvme_id_ctrl *ctrl;
-       int shift = NVME_CAP_MPSMIN(lo_hi_readq(&dev->bar->cap)) + 12;
-
-       res = nvme_identify_ctrl(dev, &ctrl);
-       if (res) {
-               dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
-               return -EIO;
-       }
-
-       dev->oncs = le16_to_cpup(&ctrl->oncs);
-       dev->abort_limit = ctrl->acl + 1;
-       dev->vwc = ctrl->vwc;
-       memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
-       memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
-       memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
-       if (ctrl->mdts)
-               dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
-       else
-               dev->max_hw_sectors = UINT_MAX;
-       if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
-                       (pdev->device == 0x0953) && ctrl->vs[3]) {
-               unsigned int max_hw_sectors;
-
-               dev->stripe_size = 1 << (ctrl->vs[3] + shift);
-               max_hw_sectors = dev->stripe_size >> (shift - 9);
-               if (dev->max_hw_sectors) {
-                       dev->max_hw_sectors = min(max_hw_sectors,
-                                                       dev->max_hw_sectors);
-               } else
-                       dev->max_hw_sectors = max_hw_sectors;
-       }
-       kfree(ctrl);
-
-       if (!dev->tagset.tags) {
+       if (!dev->ctrl.tagset) {
                dev->tagset.ops = &nvme_mq_ops;
                dev->tagset.nr_hw_queues = dev->online_queues - 1;
                dev->tagset.timeout = NVME_IO_TIMEOUT;
@@ -2668,8 +1688,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
 
                if (blk_mq_alloc_tag_set(&dev->tagset))
                        return 0;
+               dev->ctrl.tagset = &dev->tagset;
        }
-       schedule_work(&dev->scan_work);
+       queue_work(nvme_workq, &dev->scan_work);
        return 0;
 }
 
@@ -2699,7 +1720,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
        if (!dev->bar)
                goto disable;
 
-       if (readl(&dev->bar->csts) == -1) {
+       if (readl(dev->bar + NVME_REG_CSTS) == -1) {
                result = -ENODEV;
                goto unmap;
        }
@@ -2714,10 +1735,11 @@ static int nvme_dev_map(struct nvme_dev *dev)
                        goto unmap;
        }
 
-       cap = lo_hi_readq(&dev->bar->cap);
+       cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+
        dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
        dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
-       dev->dbs = ((void __iomem *)dev->bar) + 4096;
+       dev->dbs = dev->bar + 4096;
 
        /*
         * Temporary fix for the Apple controller found in the MacBook8,1 and
@@ -2730,9 +1752,11 @@ static int nvme_dev_map(struct nvme_dev *dev)
                        dev->q_depth);
        }
 
-       if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
+       if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
                dev->cmb = nvme_map_cmb(dev);
 
+       pci_enable_pcie_error_reporting(pdev);
+       pci_save_state(pdev);
        return 0;
 
  unmap:
@@ -2760,152 +1784,34 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
                pci_release_regions(pdev);
        }
 
-       if (pci_is_enabled(pdev))
+       if (pci_is_enabled(pdev)) {
+               pci_disable_pcie_error_reporting(pdev);
                pci_disable_device(pdev);
-}
-
-struct nvme_delq_ctx {
-       struct task_struct *waiter;
-       struct kthread_worker *worker;
-       atomic_t refcount;
-};
-
-static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
-{
-       dq->waiter = current;
-       mb();
-
-       for (;;) {
-               set_current_state(TASK_KILLABLE);
-               if (!atomic_read(&dq->refcount))
-                       break;
-               if (!schedule_timeout(ADMIN_TIMEOUT) ||
-                                       fatal_signal_pending(current)) {
-                       /*
-                        * Disable the controller first since we can't trust it
-                        * at this point, but leave the admin queue enabled
-                        * until all queue deletion requests are flushed.
-                        * FIXME: This may take a while if there are more h/w
-                        * queues than admin tags.
-                        */
-                       set_current_state(TASK_RUNNING);
-                       nvme_disable_ctrl(dev, lo_hi_readq(&dev->bar->cap));
-                       nvme_clear_queue(dev->queues[0]);
-                       flush_kthread_worker(dq->worker);
-                       nvme_disable_queue(dev, 0);
-                       return;
-               }
        }
-       set_current_state(TASK_RUNNING);
 }
 
-static void nvme_put_dq(struct nvme_delq_ctx *dq)
+static int nvme_dev_list_add(struct nvme_dev *dev)
 {
-       atomic_dec(&dq->refcount);
-       if (dq->waiter)
-               wake_up_process(dq->waiter);
-}
-
-static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
-{
-       atomic_inc(&dq->refcount);
-       return dq;
-}
-
-static void nvme_del_queue_end(struct nvme_queue *nvmeq)
-{
-       struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
-       nvme_put_dq(dq);
-
-       spin_lock_irq(&nvmeq->q_lock);
-       nvme_process_cq(nvmeq);
-       spin_unlock_irq(&nvmeq->q_lock);
-}
-
-static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
-                                               kthread_work_func_t fn)
-{
-       struct nvme_command c;
-
-       memset(&c, 0, sizeof(c));
-       c.delete_queue.opcode = opcode;
-       c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
-
-       init_kthread_work(&nvmeq->cmdinfo.work, fn);
-       return nvme_submit_admin_async_cmd(nvmeq->dev, &c, &nvmeq->cmdinfo,
-                                                               ADMIN_TIMEOUT);
-}
-
-static void nvme_del_cq_work_handler(struct kthread_work *work)
-{
-       struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
-                                                       cmdinfo.work);
-       nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_cq(struct nvme_queue *nvmeq)
-{
-       return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
-                                               nvme_del_cq_work_handler);
-}
-
-static void nvme_del_sq_work_handler(struct kthread_work *work)
-{
-       struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
-                                                       cmdinfo.work);
-       int status = nvmeq->cmdinfo.status;
-
-       if (!status)
-               status = nvme_delete_cq(nvmeq);
-       if (status)
-               nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_sq(struct nvme_queue *nvmeq)
-{
-       return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
-                                               nvme_del_sq_work_handler);
-}
-
-static void nvme_del_queue_start(struct kthread_work *work)
-{
-       struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
-                                                       cmdinfo.work);
-       if (nvme_delete_sq(nvmeq))
-               nvme_del_queue_end(nvmeq);
-}
+       bool start_thread = false;
 
-static void nvme_disable_io_queues(struct nvme_dev *dev)
-{
-       int i;
-       DEFINE_KTHREAD_WORKER_ONSTACK(worker);
-       struct nvme_delq_ctx dq;
-       struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
-                                       &worker, "nvme%d", dev->instance);
-
-       if (IS_ERR(kworker_task)) {
-               dev_err(dev->dev,
-                       "Failed to create queue del task\n");
-               for (i = dev->queue_count - 1; i > 0; i--)
-                       nvme_disable_queue(dev, i);
-               return;
+       spin_lock(&dev_list_lock);
+       if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
+               start_thread = true;
+               nvme_thread = NULL;
        }
+       list_add(&dev->node, &dev_list);
+       spin_unlock(&dev_list_lock);
 
-       dq.waiter = NULL;
-       atomic_set(&dq.refcount, 0);
-       dq.worker = &worker;
-       for (i = dev->queue_count - 1; i > 0; i--) {
-               struct nvme_queue *nvmeq = dev->queues[i];
+       if (start_thread) {
+               nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
+               wake_up_all(&nvme_kthread_wait);
+       } else
+               wait_event_killable(nvme_kthread_wait, nvme_thread);
 
-               if (nvme_suspend_queue(nvmeq))
-                       continue;
-               nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
-               nvmeq->cmdinfo.worker = dq.worker;
-               init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
-               queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
-       }
-       nvme_wait_dq(&dq, dev);
-       kthread_stop(kworker_task);
+       if (IS_ERR_OR_NULL(nvme_thread))
+               return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
+
+       return 0;
 }
 
 /*
@@ -2928,44 +1834,17 @@ static void nvme_dev_list_remove(struct nvme_dev *dev)
                kthread_stop(tmp);
 }
 
-static void nvme_freeze_queues(struct nvme_dev *dev)
-{
-       struct nvme_ns *ns;
-
-       list_for_each_entry(ns, &dev->namespaces, list) {
-               blk_mq_freeze_queue_start(ns->queue);
-
-               spin_lock_irq(ns->queue->queue_lock);
-               queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
-               spin_unlock_irq(ns->queue->queue_lock);
-
-               blk_mq_cancel_requeue_work(ns->queue);
-               blk_mq_stop_hw_queues(ns->queue);
-       }
-}
-
-static void nvme_unfreeze_queues(struct nvme_dev *dev)
-{
-       struct nvme_ns *ns;
-
-       list_for_each_entry(ns, &dev->namespaces, list) {
-               queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
-               blk_mq_unfreeze_queue(ns->queue);
-               blk_mq_start_stopped_hw_queues(ns->queue, true);
-               blk_mq_kick_requeue_list(ns->queue);
-       }
-}
-
-static void nvme_dev_shutdown(struct nvme_dev *dev)
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
        int i;
        u32 csts = -1;
 
        nvme_dev_list_remove(dev);
 
+       mutex_lock(&dev->shutdown_lock);
        if (dev->bar) {
-               nvme_freeze_queues(dev);
-               csts = readl(&dev->bar->csts);
+               nvme_stop_queues(&dev->ctrl);
+               csts = readl(dev->bar + NVME_REG_CSTS);
        }
        if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
                for (i = dev->queue_count - 1; i >= 0; i--) {
@@ -2974,30 +1853,13 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
                }
        } else {
                nvme_disable_io_queues(dev);
-               nvme_shutdown_ctrl(dev);
-               nvme_disable_queue(dev, 0);
+               nvme_disable_admin_queue(dev, shutdown);
        }
        nvme_dev_unmap(dev);
 
        for (i = dev->queue_count - 1; i >= 0; i--)
                nvme_clear_queue(dev->queues[i]);
-}
-
-static void nvme_dev_remove(struct nvme_dev *dev)
-{
-       struct nvme_ns *ns, *next;
-
-       if (nvme_io_incapable(dev)) {
-               /*
-                * If the device is not capable of IO (surprise hot-removal,
-                * for example), we need to quiesce prior to deleting the
-                * namespaces. This will end outstanding requests and prevent
-                * attempts to sync dirty data.
-                */
-               nvme_dev_shutdown(dev);
-       }
-       list_for_each_entry_safe(ns, next, &dev->namespaces, list)
-               nvme_ns_remove(ns);
+       mutex_unlock(&dev->shutdown_lock);
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -3023,119 +1885,36 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
        dma_pool_destroy(dev->prp_small_pool);
 }
 
-static DEFINE_IDA(nvme_instance_ida);
-
-static int nvme_set_instance(struct nvme_dev *dev)
-{
-       int instance, error;
-
-       do {
-               if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
-                       return -ENODEV;
-
-               spin_lock(&dev_list_lock);
-               error = ida_get_new(&nvme_instance_ida, &instance);
-               spin_unlock(&dev_list_lock);
-       } while (error == -EAGAIN);
-
-       if (error)
-               return -ENODEV;
-
-       dev->instance = instance;
-       return 0;
-}
-
-static void nvme_release_instance(struct nvme_dev *dev)
+static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 {
-       spin_lock(&dev_list_lock);
-       ida_remove(&nvme_instance_ida, dev->instance);
-       spin_unlock(&dev_list_lock);
-}
-
-static void nvme_free_dev(struct kref *kref)
-{
-       struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
 
        put_device(dev->dev);
-       put_device(dev->device);
-       nvme_release_instance(dev);
        if (dev->tagset.tags)
                blk_mq_free_tag_set(&dev->tagset);
-       if (dev->admin_q)
-               blk_put_queue(dev->admin_q);
+       if (dev->ctrl.admin_q)
+               blk_put_queue(dev->ctrl.admin_q);
        kfree(dev->queues);
        kfree(dev->entry);
        kfree(dev);
 }
 
-static int nvme_dev_open(struct inode *inode, struct file *f)
+static void nvme_reset_work(struct work_struct *work)
 {
-       struct nvme_dev *dev;
-       int instance = iminor(inode);
-       int ret = -ENODEV;
-
-       spin_lock(&dev_list_lock);
-       list_for_each_entry(dev, &dev_list, node) {
-               if (dev->instance == instance) {
-                       if (!dev->admin_q) {
-                               ret = -EWOULDBLOCK;
-                               break;
-                       }
-                       if (!kref_get_unless_zero(&dev->kref))
-                               break;
-                       f->private_data = dev;
-                       ret = 0;
-                       break;
-               }
-       }
-       spin_unlock(&dev_list_lock);
-
-       return ret;
-}
-
-static int nvme_dev_release(struct inode *inode, struct file *f)
-{
-       struct nvme_dev *dev = f->private_data;
-       kref_put(&dev->kref, nvme_free_dev);
-       return 0;
-}
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+       int result;
 
-static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
-       struct nvme_dev *dev = f->private_data;
-       struct nvme_ns *ns;
-
-       switch (cmd) {
-       case NVME_IOCTL_ADMIN_CMD:
-               return nvme_user_cmd(dev, NULL, (void __user *)arg);
-       case NVME_IOCTL_IO_CMD:
-               if (list_empty(&dev->namespaces))
-                       return -ENOTTY;
-               ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
-               return nvme_user_cmd(dev, ns, (void __user *)arg);
-       case NVME_IOCTL_RESET:
-               dev_warn(dev->dev, "resetting controller\n");
-               return nvme_reset(dev);
-       case NVME_IOCTL_SUBSYS_RESET:
-               return nvme_subsys_reset(dev);
-       default:
-               return -ENOTTY;
-       }
-}
+       if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
+               goto out;
 
-static const struct file_operations nvme_dev_fops = {
-       .owner          = THIS_MODULE,
-       .open           = nvme_dev_open,
-       .release        = nvme_dev_release,
-       .unlocked_ioctl = nvme_dev_ioctl,
-       .compat_ioctl   = nvme_dev_ioctl,
-};
+       /*
+        * If we're called to reset a live controller first shut it down before
+        * moving on.
+        */
+       if (dev->bar)
+               nvme_dev_disable(dev, false);
 
-static void nvme_probe_work(struct work_struct *work)
-{
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
-       bool start_thread = false;
-       int result;
+       set_bit(NVME_CTRL_RESETTING, &dev->flags);
 
        result = nvme_dev_map(dev);
        if (result)
@@ -3145,35 +1924,24 @@ static void nvme_probe_work(struct work_struct *work)
        if (result)
                goto unmap;
 
-       spin_lock(&dev_list_lock);
-       if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
-               start_thread = true;
-               nvme_thread = NULL;
-       }
-       list_add(&dev->node, &dev_list);
-       spin_unlock(&dev_list_lock);
-
-       if (start_thread) {
-               nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
-               wake_up_all(&nvme_kthread_wait);
-       } else
-               wait_event_killable(nvme_kthread_wait, nvme_thread);
-
-       if (IS_ERR_OR_NULL(nvme_thread)) {
-               result = nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
-               goto disable;
-       }
-
        nvme_init_queue(dev->queues[0], 0);
        result = nvme_alloc_admin_tags(dev);
        if (result)
                goto disable;
 
+       result = nvme_init_identify(&dev->ctrl);
+       if (result)
+               goto free_tags;
+
        result = nvme_setup_io_queues(dev);
        if (result)
                goto free_tags;
 
-       dev->event_limit = 1;
+       dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
+
+       result = nvme_dev_list_add(dev);
+       if (result)
+               goto remove;
 
        /*
         * Keep the controller around but remove all namespaces if we don't have
@@ -3181,117 +1949,98 @@ static void nvme_probe_work(struct work_struct *work)
         */
        if (dev->online_queues < 2) {
                dev_warn(dev->dev, "IO queues not created\n");
-               nvme_dev_remove(dev);
+               nvme_remove_namespaces(&dev->ctrl);
        } else {
-               nvme_unfreeze_queues(dev);
+               nvme_start_queues(&dev->ctrl);
                nvme_dev_add(dev);
        }
 
+       clear_bit(NVME_CTRL_RESETTING, &dev->flags);
        return;
 
+ remove:
+       nvme_dev_list_remove(dev);
  free_tags:
        nvme_dev_remove_admin(dev);
-       blk_put_queue(dev->admin_q);
-       dev->admin_q = NULL;
+       blk_put_queue(dev->ctrl.admin_q);
+       dev->ctrl.admin_q = NULL;
        dev->queues[0]->tags = NULL;
  disable:
-       nvme_disable_queue(dev, 0);
-       nvme_dev_list_remove(dev);
+       nvme_disable_admin_queue(dev, false);
  unmap:
        nvme_dev_unmap(dev);
  out:
-       if (!work_busy(&dev->reset_work))
-               nvme_dead_ctrl(dev);
+       nvme_remove_dead_ctrl(dev);
 }
 
-static int nvme_remove_dead_ctrl(void *arg)
+static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 {
-       struct nvme_dev *dev = (struct nvme_dev *)arg;
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
        if (pci_get_drvdata(pdev))
                pci_stop_and_remove_bus_device_locked(pdev);
-       kref_put(&dev->kref, nvme_free_dev);
-       return 0;
+       nvme_put_ctrl(&dev->ctrl);
 }
 
-static void nvme_dead_ctrl(struct nvme_dev *dev)
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
 {
-       dev_warn(dev->dev, "Device failed to resume\n");
-       kref_get(&dev->kref);
-       if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
-                                               dev->instance))) {
-               dev_err(dev->dev,
-                       "Failed to start controller remove task\n");
-               kref_put(&dev->kref, nvme_free_dev);
-       }
+       dev_warn(dev->dev, "Removing after probe failure\n");
+       kref_get(&dev->ctrl.kref);
+       if (!schedule_work(&dev->remove_work))
+               nvme_put_ctrl(&dev->ctrl);
 }
 
-static void nvme_reset_work(struct work_struct *ws)
+static int nvme_reset(struct nvme_dev *dev)
 {
-       struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
-       bool in_probe = work_busy(&dev->probe_work);
-
-       nvme_dev_shutdown(dev);
+       if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
+               return -ENODEV;
 
-       /* Synchronize with device probe so that work will see failure status
-        * and exit gracefully without trying to schedule another reset */
-       flush_work(&dev->probe_work);
+       if (!queue_work(nvme_workq, &dev->reset_work))
+               return -EBUSY;
 
-       /* Fail this device if reset occured during probe to avoid
-        * infinite initialization loops. */
-       if (in_probe) {
-               nvme_dead_ctrl(dev);
-               return;
-       }
-       /* Schedule device resume asynchronously so the reset work is available
-        * to cleanup errors that may occur during reinitialization */
-       schedule_work(&dev->probe_work);
+       flush_work(&dev->reset_work);
+       return 0;
 }
 
-static int __nvme_reset(struct nvme_dev *dev)
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
-       if (work_pending(&dev->reset_work))
-               return -EBUSY;
-       list_del_init(&dev->node);
-       queue_work(nvme_workq, &dev->reset_work);
+       *val = readl(to_nvme_dev(ctrl)->bar + off);
        return 0;
 }
 
-static int nvme_reset(struct nvme_dev *dev)
+static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 {
-       int ret;
-
-       if (!dev->admin_q || blk_queue_dying(dev->admin_q))
-               return -ENODEV;
-
-       spin_lock(&dev_list_lock);
-       ret = __nvme_reset(dev);
-       spin_unlock(&dev_list_lock);
-
-       if (!ret) {
-               flush_work(&dev->reset_work);
-               flush_work(&dev->probe_work);
-               return 0;
-       }
+       writel(val, to_nvme_dev(ctrl)->bar + off);
+       return 0;
+}
 
-       return ret;
+static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+{
+       *val = readq(to_nvme_dev(ctrl)->bar + off);
+       return 0;
 }
 
-static ssize_t nvme_sysfs_reset(struct device *dev,
-                               struct device_attribute *attr, const char *buf,
-                               size_t count)
+static bool nvme_pci_io_incapable(struct nvme_ctrl *ctrl)
 {
-       struct nvme_dev *ndev = dev_get_drvdata(dev);
-       int ret;
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
 
-       ret = nvme_reset(ndev);
-       if (ret < 0)
-               return ret;
+       return !dev->bar || dev->online_queues < 2;
+}
 
-       return count;
+static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+       return nvme_reset(to_nvme_dev(ctrl));
 }
-static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+       .reg_read32             = nvme_pci_reg_read32,
+       .reg_write32            = nvme_pci_reg_write32,
+       .reg_read64             = nvme_pci_reg_read64,
+       .io_incapable           = nvme_pci_io_incapable,
+       .reset_ctrl             = nvme_pci_reset_ctrl,
+       .free_ctrl              = nvme_pci_free_ctrl,
+};
 
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -3314,46 +2063,30 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (!dev->queues)
                goto free;
 
-       INIT_LIST_HEAD(&dev->namespaces);
-       INIT_WORK(&dev->reset_work, nvme_reset_work);
        dev->dev = get_device(&pdev->dev);
        pci_set_drvdata(pdev, dev);
-       result = nvme_set_instance(dev);
-       if (result)
-               goto put_pci;
+
+       INIT_LIST_HEAD(&dev->node);
+       INIT_WORK(&dev->scan_work, nvme_dev_scan);
+       INIT_WORK(&dev->reset_work, nvme_reset_work);
+       INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
+       mutex_init(&dev->shutdown_lock);
+       init_completion(&dev->ioq_wait);
 
        result = nvme_setup_prp_pools(dev);
        if (result)
-               goto release;
-
-       kref_init(&dev->kref);
-       dev->device = device_create(nvme_class, &pdev->dev,
-                               MKDEV(nvme_char_major, dev->instance),
-                               dev, "nvme%d", dev->instance);
-       if (IS_ERR(dev->device)) {
-               result = PTR_ERR(dev->device);
-               goto release_pools;
-       }
-       get_device(dev->device);
-       dev_set_drvdata(dev->device, dev);
+               goto put_pci;
 
-       result = device_create_file(dev->device, &dev_attr_reset_controller);
+       result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+                       id->driver_data);
        if (result)
-               goto put_dev;
+               goto release_pools;
 
-       INIT_LIST_HEAD(&dev->node);
-       INIT_WORK(&dev->scan_work, nvme_dev_scan);
-       INIT_WORK(&dev->probe_work, nvme_probe_work);
-       schedule_work(&dev->probe_work);
+       queue_work(nvme_workq, &dev->reset_work);
        return 0;
 
- put_dev:
-       device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
-       put_device(dev->device);
  release_pools:
        nvme_release_prp_pools(dev);
- release:
-       nvme_release_instance(dev);
  put_pci:
        put_device(dev->dev);
  free:
@@ -3368,54 +2101,41 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
        struct nvme_dev *dev = pci_get_drvdata(pdev);
 
        if (prepare)
-               nvme_dev_shutdown(dev);
+               nvme_dev_disable(dev, false);
        else
-               schedule_work(&dev->probe_work);
+               queue_work(nvme_workq, &dev->reset_work);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
-       nvme_dev_shutdown(dev);
+       nvme_dev_disable(dev, true);
 }
 
 static void nvme_remove(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
 
-       spin_lock(&dev_list_lock);
-       list_del_init(&dev->node);
-       spin_unlock(&dev_list_lock);
-
        pci_set_drvdata(pdev, NULL);
-       flush_work(&dev->probe_work);
-       flush_work(&dev->reset_work);
        flush_work(&dev->scan_work);
-       device_remove_file(dev->device, &dev_attr_reset_controller);
-       nvme_dev_remove(dev);
-       nvme_dev_shutdown(dev);
+       nvme_remove_namespaces(&dev->ctrl);
+       nvme_uninit_ctrl(&dev->ctrl);
+       nvme_dev_disable(dev, true);
+       flush_work(&dev->reset_work);
        nvme_dev_remove_admin(dev);
-       device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
        nvme_free_queues(dev, 0);
        nvme_release_cmb(dev);
        nvme_release_prp_pools(dev);
-       kref_put(&dev->kref, nvme_free_dev);
+       nvme_put_ctrl(&dev->ctrl);
 }
 
-/* These functions are yet to be implemented */
-#define nvme_error_detected NULL
-#define nvme_dump_registers NULL
-#define nvme_link_reset NULL
-#define nvme_slot_reset NULL
-#define nvme_error_resume NULL
-
 #ifdef CONFIG_PM_SLEEP
 static int nvme_suspend(struct device *dev)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-       nvme_dev_shutdown(ndev);
+       nvme_dev_disable(ndev, true);
        return 0;
 }
 
@@ -3424,17 +2144,53 @@ static int nvme_resume(struct device *dev)
        struct pci_dev *pdev = to_pci_dev(dev);
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-       schedule_work(&ndev->probe_work);
+       queue_work(nvme_workq, &ndev->reset_work);
        return 0;
 }
 #endif
 
 static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
 
+static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
+                                               pci_channel_state_t state)
+{
+       struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+       /*
+        * A frozen channel requires a reset. When detected, this method will
+        * shutdown the controller to quiesce. The controller will be restarted
+        * after the slot reset through driver's slot_reset callback.
+        */
+       dev_warn(&pdev->dev, "error detected: state:%d\n", state);
+       switch (state) {
+       case pci_channel_io_normal:
+               return PCI_ERS_RESULT_CAN_RECOVER;
+       case pci_channel_io_frozen:
+               nvme_dev_disable(dev, false);
+               return PCI_ERS_RESULT_NEED_RESET;
+       case pci_channel_io_perm_failure:
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
+{
+       struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+       dev_info(&pdev->dev, "restart after slot reset\n");
+       pci_restore_state(pdev);
+       queue_work(nvme_workq, &dev->reset_work);
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void nvme_error_resume(struct pci_dev *pdev)
+{
+       pci_cleanup_aer_uncorrect_error_status(pdev);
+}
+
 static const struct pci_error_handlers nvme_err_handler = {
        .error_detected = nvme_error_detected,
-       .mmio_enabled   = nvme_dump_registers,
-       .link_reset     = nvme_link_reset,
        .slot_reset     = nvme_slot_reset,
        .resume         = nvme_error_resume,
        .reset_notify   = nvme_reset_notify,
@@ -3444,6 +2200,10 @@ static const struct pci_error_handlers nvme_err_handler = {
 #define PCI_CLASS_STORAGE_EXPRESS      0x010802
 
 static const struct pci_device_id nvme_id_table[] = {
+       { PCI_VDEVICE(INTEL, 0x0953),
+               .driver_data = NVME_QUIRK_STRIPE_SIZE, },
+       { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
+               .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { 0, }
@@ -3468,40 +2228,21 @@ static int __init nvme_init(void)
 
        init_waitqueue_head(&nvme_kthread_wait);
 
-       nvme_workq = create_singlethread_workqueue("nvme");
+       nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
        if (!nvme_workq)
                return -ENOMEM;
 
-       result = register_blkdev(nvme_major, "nvme");
+       result = nvme_core_init();
        if (result < 0)
                goto kill_workq;
-       else if (result > 0)
-               nvme_major = result;
-
-       result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
-                                                       &nvme_dev_fops);
-       if (result < 0)
-               goto unregister_blkdev;
-       else if (result > 0)
-               nvme_char_major = result;
-
-       nvme_class = class_create(THIS_MODULE, "nvme");
-       if (IS_ERR(nvme_class)) {
-               result = PTR_ERR(nvme_class);
-               goto unregister_chrdev;
-       }
 
        result = pci_register_driver(&nvme_driver);
        if (result)
-               goto destroy_class;
+               goto core_exit;
        return 0;
 
- destroy_class:
-       class_destroy(nvme_class);
- unregister_chrdev:
-       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
- unregister_blkdev:
-       unregister_blkdev(nvme_major, "nvme");
+ core_exit:
+       nvme_core_exit();
  kill_workq:
        destroy_workqueue(nvme_workq);
        return result;
@@ -3510,10 +2251,8 @@ static int __init nvme_init(void)
 static void __exit nvme_exit(void)
 {
        pci_unregister_driver(&nvme_driver);
-       unregister_blkdev(nvme_major, "nvme");
+       nvme_core_exit();
        destroy_workqueue(nvme_workq);
-       class_destroy(nvme_class);
-       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
        BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
        _nvme_check_size();
 }
index c3d8d3887a313bc8ee914292a83d6c6dcbf41afa..e947e298a737b17a5267071a9102dbc7da72f6cb 100644 (file)
@@ -524,7 +524,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
                                        struct sg_io_hdr *hdr, u8 *inq_response,
                                        int alloc_len)
 {
-       struct nvme_dev *dev = ns->dev;
+       struct nvme_ctrl *ctrl = ns->ctrl;
        struct nvme_id_ns *id_ns;
        int res;
        int nvme_sc;
@@ -532,10 +532,10 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
        u8 resp_data_format = 0x02;
        u8 protect;
        u8 cmdque = 0x01 << 1;
-       u8 fw_offset = sizeof(dev->firmware_rev);
+       u8 fw_offset = sizeof(ctrl->firmware_rev);
 
        /* nvme ns identify - use DPS value for PROTECT field */
-       nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+       nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
                return res;
@@ -553,12 +553,12 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
        inq_response[5] = protect;      /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
        inq_response[7] = cmdque;       /* wbus16=0 | sync=0 | vs=0 */
        strncpy(&inq_response[8], "NVMe    ", 8);
-       strncpy(&inq_response[16], dev->model, 16);
+       strncpy(&inq_response[16], ctrl->model, 16);
 
-       while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
+       while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
                fw_offset--;
        fw_offset -= 4;
-       strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
+       strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
 
        xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
        return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -588,82 +588,113 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
                                        struct sg_io_hdr *hdr, u8 *inq_response,
                                        int alloc_len)
 {
-       struct nvme_dev *dev = ns->dev;
        int xfer_len;
 
        memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
        inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
        inq_response[3] = INQ_SERIAL_NUMBER_LENGTH;    /* Page Length */
-       strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
+       strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
 
        xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
        return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       u8 *inq_response, int alloc_len)
+static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+               u8 *inq_response, int alloc_len)
 {
-       struct nvme_dev *dev = ns->dev;
-       int res;
-       int nvme_sc;
-       int xfer_len;
-       __be32 tmp_id = cpu_to_be32(ns->ns_id);
+       struct nvme_id_ns *id_ns;
+       int nvme_sc, res;
+       size_t len;
+       void *eui;
 
-       memset(inq_response, 0, alloc_len);
-       inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;    /* Page Code */
-       if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
-               struct nvme_id_ns *id_ns;
-               void *eui;
-               int len;
+       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
+       res = nvme_trans_status_code(hdr, nvme_sc);
+       if (res)
+               return res;
 
-               nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
-               res = nvme_trans_status_code(hdr, nvme_sc);
-               if (res)
-                       return res;
+       eui = id_ns->eui64;
+       len = sizeof(id_ns->eui64);
 
-               eui = id_ns->eui64;
-               len = sizeof(id_ns->eui64);
-               if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
-                       if (bitmap_empty(eui, len * 8)) {
-                               eui = id_ns->nguid;
-                               len = sizeof(id_ns->nguid);
-                       }
-               }
+       if (ns->ctrl->vs >= NVME_VS(1, 2)) {
                if (bitmap_empty(eui, len * 8)) {
-                       kfree(id_ns);
-                       goto scsi_string;
+                       eui = id_ns->nguid;
+                       len = sizeof(id_ns->nguid);
                }
+       }
 
-               inq_response[3] = 4 + len; /* Page Length */
-               /* Designation Descriptor start */
-               inq_response[4] = 0x01;    /* Proto ID=0h | Code set=1h */
-               inq_response[5] = 0x02;    /* PIV=0b | Asso=00b | Designator Type=2h */
-               inq_response[6] = 0x00;    /* Rsvd */
-               inq_response[7] = len;     /* Designator Length */
-               memcpy(&inq_response[8], eui, len);
-               kfree(id_ns);
-       } else {
- scsi_string:
-               if (alloc_len < 72) {
-                       return nvme_trans_completion(hdr,
-                                       SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               }
-               inq_response[3] = 0x48;    /* Page Length */
-               /* Designation Descriptor start */
-               inq_response[4] = 0x03;    /* Proto ID=0h | Code set=3h */
-               inq_response[5] = 0x08;    /* PIV=0b | Asso=00b | Designator Type=8h */
-               inq_response[6] = 0x00;    /* Rsvd */
-               inq_response[7] = 0x44;    /* Designator Length */
-
-               sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
-               memcpy(&inq_response[12], dev->model, sizeof(dev->model));
-               sprintf(&inq_response[52], "%04x", tmp_id);
-               memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+       if (bitmap_empty(eui, len * 8)) {
+               res = -EOPNOTSUPP;
+               goto out_free_id;
        }
-       xfer_len = alloc_len;
-       return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
+
+       memset(inq_response, 0, alloc_len);
+       inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+       inq_response[3] = 4 + len; /* Page Length */
+
+       /* Designation Descriptor start */
+       inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
+       inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
+       inq_response[6] = 0x00; /* Rsvd */
+       inq_response[7] = len;  /* Designator Length */
+       memcpy(&inq_response[8], eui, len);
+
+       res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+out_free_id:
+       kfree(id_ns);
+       return res;
+}
+
+static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
+               struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
+{
+       struct nvme_ctrl *ctrl = ns->ctrl;
+       struct nvme_id_ctrl *id_ctrl;
+       int nvme_sc, res;
+
+       if (alloc_len < 72) {
+               return nvme_trans_completion(hdr,
+                               SAM_STAT_CHECK_CONDITION,
+                               ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
+                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+       }
+
+       nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
+       res = nvme_trans_status_code(hdr, nvme_sc);
+       if (res)
+               return res;
+
+       memset(inq_response, 0, alloc_len);
+       inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+       inq_response[3] = 0x48; /* Page Length */
+
+       /* Designation Descriptor start */
+       inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
+       inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
+       inq_response[6] = 0x00; /* Rsvd */
+       inq_response[7] = 0x44; /* Designator Length */
+
+       sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
+       memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
+       sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
+       memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
+
+       res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+       kfree(id_ctrl);
+       return res;
+}
+
+static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+                                       u8 *resp, int alloc_len)
+{
+       int res;
+
+       if (ns->ctrl->vs >= NVME_VS(1, 1)) {
+               res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
+               if (res != -EOPNOTSUPP)
+                       return res;
+       }
+
+       return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
 }
 
 static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
@@ -672,7 +703,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        u8 *inq_response;
        int res;
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
+       struct nvme_ctrl *ctrl = ns->ctrl;
        struct nvme_id_ctrl *id_ctrl;
        struct nvme_id_ns *id_ns;
        int xfer_len;
@@ -688,7 +719,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        if (inq_response == NULL)
                return -ENOMEM;
 
-       nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+       nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
                goto out_free_inq;
@@ -704,7 +735,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        app_chk = protect << 1;
        ref_chk = protect;
 
-       nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+       nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
                goto out_free_inq;
@@ -815,7 +846,6 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
        int res;
        int xfer_len;
        u8 *log_response;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_smart_log *smart_log;
        u8 temp_c;
        u16 temp_k;
@@ -824,7 +854,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
        if (log_response == NULL)
                return -ENOMEM;
 
-       res = nvme_get_log_page(dev, &smart_log);
+       res = nvme_get_log_page(ns->ctrl, &smart_log);
        if (res < 0)
                goto out_free_response;
 
@@ -862,7 +892,6 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        int res;
        int xfer_len;
        u8 *log_response;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_smart_log *smart_log;
        u32 feature_resp;
        u8 temp_c_cur, temp_c_thresh;
@@ -872,7 +901,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        if (log_response == NULL)
                return -ENOMEM;
 
-       res = nvme_get_log_page(dev, &smart_log);
+       res = nvme_get_log_page(ns->ctrl, &smart_log);
        if (res < 0)
                goto out_free_response;
 
@@ -886,7 +915,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        kfree(smart_log);
 
        /* Get Features for Temp Threshold */
-       res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
+       res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0,
                                                                &feature_resp);
        if (res != NVME_SC_SUCCESS)
                temp_c_thresh = LOG_TEMP_UNKNOWN;
@@ -948,7 +977,6 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
        int res;
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_id_ns *id_ns;
        u8 flbas;
        u32 lba_length;
@@ -958,7 +986,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
                return -EINVAL;
 
-       nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
                return res;
@@ -1014,14 +1042,13 @@ static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
 {
        int res = 0;
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
        u32 feature_resp;
        u8 vwc;
 
        if (len < MODE_PAGE_CACHING_LEN)
                return -EINVAL;
 
-       nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
+       nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0,
                                                                &feature_resp);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
@@ -1207,12 +1234,11 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
        int res;
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_id_ctrl *id_ctrl;
        int lowest_pow_st;      /* max npss = lowest power consumption */
        unsigned ps_desired = 0;
 
-       nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+       nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
                return res;
@@ -1256,7 +1282,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
                                SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
                break;
        }
-       nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
+       nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0,
                                    NULL);
        return nvme_trans_status_code(hdr, nvme_sc);
 }
@@ -1280,7 +1306,6 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
                                        u8 buffer_id)
 {
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_command c;
 
        if (hdr->iovec_count > 0) {
@@ -1297,7 +1322,7 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
        c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
        c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
 
-       nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+       nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
                        hdr->dxferp, tot_len, NULL, 0);
        return nvme_trans_status_code(hdr, nvme_sc);
 }
@@ -1364,14 +1389,13 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
        int res = 0;
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
        unsigned dword11;
 
        switch (page_code) {
        case MODE_PAGE_CACHING:
                dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
-               nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
-                                           0, NULL);
+               nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
+                                           dword11, 0, NULL);
                res = nvme_trans_status_code(hdr, nvme_sc);
                break;
        case MODE_PAGE_CONTROL:
@@ -1473,7 +1497,6 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 {
        int res = 0;
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
        u8 flbas;
 
        /*
@@ -1486,7 +1509,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
        if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
                struct nvme_id_ns *id_ns;
 
-               nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+               nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
                res = nvme_trans_status_code(hdr, nvme_sc);
                if (res)
                        return res;
@@ -1570,7 +1593,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
        int res;
        int nvme_sc;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_id_ns *id_ns;
        u8 i;
        u8 flbas, nlbaf;
@@ -1579,7 +1601,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        struct nvme_command c;
 
        /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-       nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
                return res;
@@ -1611,7 +1633,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        c.format.nsid = cpu_to_le32(ns->ns_id);
        c.format.cdw10 = cpu_to_le32(cdw10);
 
-       nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+       nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
        res = nvme_trans_status_code(hdr, nvme_sc);
 
        kfree(id_ns);
@@ -1704,7 +1726,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
                        nvme_sc = NVME_SC_LBA_RANGE;
                        break;
                }
-               nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+               nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
                                next_mapping_addr, unit_len, NULL, 0);
                if (nvme_sc)
                        break;
@@ -2040,7 +2062,6 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        u32 alloc_len;
        u32 resp_size;
        u32 xfer_len;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_id_ns *id_ns;
        u8 *response;
 
@@ -2052,7 +2073,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
                resp_size = READ_CAP_10_RESP_SIZE;
        }
 
-       nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
        res = nvme_trans_status_code(hdr, nvme_sc);
        if (res)
                return res;     
@@ -2080,7 +2101,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        int nvme_sc;
        u32 alloc_len, xfer_len, resp_size;
        u8 *response;
-       struct nvme_dev *dev = ns->dev;
        struct nvme_id_ctrl *id_ctrl;
        u32 ll_length, lun_id;
        u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
@@ -2094,7 +2114,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        case ALL_LUNS_RETURNED:
        case ALL_WELL_KNOWN_LUNS_RETURNED:
        case RESTRICTED_LUNS_RETURNED:
-               nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+               nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
                res = nvme_trans_status_code(hdr, nvme_sc);
                if (res)
                        return res;
@@ -2295,9 +2315,7 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
                                        struct sg_io_hdr *hdr,
                                        u8 *cmd)
 {
-       struct nvme_dev *dev = ns->dev;
-
-       if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
+       if (nvme_ctrl_ready(ns->ctrl))
                return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
                                            NOT_READY, SCSI_ASC_LUN_NOT_READY,
                                            SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
index 6fd4e5a5ef4a495bbd412ee33b931f4fb3a8a24f..9d11d98373128fef3de3406975d8bcc2ca286b9a 100644 (file)
@@ -70,6 +70,9 @@ static ssize_t bin_attr_nvmem_read(struct file *filp, struct kobject *kobj,
        if (pos >= nvmem->size)
                return 0;
 
+       if (count < nvmem->word_size)
+               return -EINVAL;
+
        if (pos + count > nvmem->size)
                count = nvmem->size - pos;
 
@@ -95,6 +98,9 @@ static ssize_t bin_attr_nvmem_write(struct file *filp, struct kobject *kobj,
        if (pos >= nvmem->size)
                return 0;
 
+       if (count < nvmem->word_size)
+               return -EINVAL;
+
        if (pos + count > nvmem->size)
                count = nvmem->size - pos;
 
index afb67e7eeee4a89612d56d0404726d7778c12a53..3829e5fbf8c366bf3ae7fa7149e87e5c0ddcf6f3 100644 (file)
@@ -21,6 +21,7 @@ static struct regmap_config qfprom_regmap_config = {
        .reg_bits = 32,
        .val_bits = 8,
        .reg_stride = 1,
+       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static struct nvmem_config econfig = {
index 706e3ff67f8b02f4403a841c8b92f66f3c0e5678..e7bfc175b8e1e9a0a171a3173191861399d21b1d 100644 (file)
@@ -635,6 +635,13 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np,
                msi_base = be32_to_cpup(msi_map + 2);
                rid_len = be32_to_cpup(msi_map + 3);
 
+               if (rid_base & ~map_mask) {
+                       dev_err(parent_dev,
+                               "Invalid msi-map translation - msi-map-mask (0x%x) ignores rid-base (0x%x)\n",
+                               map_mask, rid_base);
+                       return rid_out;
+               }
+
                msi_controller_node = of_find_node_by_phandle(phandle);
 
                matched = (masked_rid >= rid_base &&
@@ -654,7 +661,7 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np,
        if (!matched)
                return rid_out;
 
-       rid_out = masked_rid + msi_base;
+       rid_out = masked_rid - rid_base + msi_base;
        dev_dbg(dev,
                "msi-map at: %s, using mask %08x, rid-base: %08x, msi-base: %08x, length: %08x, rid: %08x -> %08x\n",
                dev_name(parent_dev), map_mask, rid_base, msi_base,
@@ -679,18 +686,6 @@ u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
        return __of_msi_map_rid(dev, &msi_np, rid_in);
 }
 
-static struct irq_domain *__of_get_msi_domain(struct device_node *np,
-                                             enum irq_domain_bus_token token)
-{
-       struct irq_domain *d;
-
-       d = irq_find_matching_host(np, token);
-       if (!d)
-               d = irq_find_host(np);
-
-       return d;
-}
-
 /**
  * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
  * @dev: device for which the mapping is to be done.
@@ -706,7 +701,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid)
        struct device_node *np = NULL;
 
        __of_msi_map_rid(dev, &np, rid);
-       return __of_get_msi_domain(np, DOMAIN_BUS_PCI_MSI);
+       return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
 }
 
 /**
@@ -730,7 +725,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
        /* Check for a single msi-parent property */
        msi_np = of_parse_phandle(np, "msi-parent", 0);
        if (msi_np && !of_property_read_bool(msi_np, "#msi-cells")) {
-               d = __of_get_msi_domain(msi_np, token);
+               d = irq_find_matching_host(msi_np, token);
                if (!d)
                        of_node_put(msi_np);
                return d;
@@ -744,7 +739,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
                while (!of_parse_phandle_with_args(np, "msi-parent",
                                                   "#msi-cells",
                                                   index, &args)) {
-                       d = __of_get_msi_domain(args.np, token);
+                       d = irq_find_matching_host(args.np, token);
                        if (d)
                                return d;
 
index 86829f8064a61eb324a0a9ea28f63f0bf3ceadbd..39c4be41ef83d6cf23302fb9af6270f8857f85de 100644 (file)
@@ -143,11 +143,32 @@ int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
 }
 EXPORT_SYMBOL(of_mdio_parse_addr);
 
+/* The following is a list of PHY compatible strings which appear in
+ * some DTBs. The compatible string is never matched against a PHY
+ * driver, so is pointless. We only expect devices which are not PHYs
+ * to have a compatible string, so they can be matched to an MDIO
+ * driver.  Encourage users to upgrade their DT blobs to remove these.
+ */
+static const struct of_device_id whitelist_phys[] = {
+       { .compatible = "brcm,40nm-ephy" },
+       { .compatible = "marvell,88E1111", },
+       { .compatible = "marvell,88e1116", },
+       { .compatible = "marvell,88e1118", },
+       { .compatible = "marvell,88e1145", },
+       { .compatible = "marvell,88e1149r", },
+       { .compatible = "marvell,88e1310", },
+       { .compatible = "marvell,88E1510", },
+       { .compatible = "marvell,88E1514", },
+       { .compatible = "moxa,moxart-rtl8201cp", },
+       {}
+};
+
 /*
  * Return true if the child node is for a phy. It must either:
  * o Compatible string of "ethernet-phy-idX.X"
  * o Compatible string of "ethernet-phy-ieee802.3-c45"
  * o Compatible string of "ethernet-phy-ieee802.3-c22"
+ * o In the white list above (and issue a warning)
  * o No compatibility string
  *
  * A device which is not a phy is expected to have a compatible string
@@ -166,6 +187,13 @@ static bool of_mdiobus_child_is_phy(struct device_node *child)
        if (of_device_is_compatible(child, "ethernet-phy-ieee802.3-c22"))
                return true;
 
+       if (of_match_node(whitelist_phys, child)) {
+               pr_warn(FW_WARN
+                       "%s: Whitelisted compatible string. Please remove\n",
+                       child->full_name);
+               return true;
+       }
+
        if (!of_find_property(child, "compatible", NULL))
                return true;
 
@@ -256,11 +284,19 @@ static int of_phy_match(struct device *dev, void *phy_np)
 struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
        struct device *d;
+       struct mdio_device *mdiodev;
+
        if (!phy_np)
                return NULL;
 
        d = bus_find_device(&mdio_bus_type, NULL, phy_np, of_phy_match);
-       return d ? to_phy_device(d) : NULL;
+       if (d) {
+               mdiodev = to_mdio_device(d);
+               if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
+                       return to_phy_device(d);
+       }
+
+       return NULL;
 }
 EXPORT_SYMBOL(of_phy_find_device);
 
index dd92c5edf219453aac0bb98d161fc1136395f1c7..b48ac6300c792d4336487f841a2267a5b883c6f5 100644 (file)
@@ -138,22 +138,22 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        dentry = d_alloc_name(root, name);
        if (!dentry) {
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                return -ENOMEM;
        }
        inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                return -ENOMEM;
        }
        inode->i_fop = fops;
        inode->i_private = priv;
        d_add(dentry, inode);
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        return 0;
 }
 
@@ -215,22 +215,22 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name)
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = d_alloc_name(parent, name);
        if (!dentry) {
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                return NULL;
        }
        inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                return NULL;
        }
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        d_add(dentry, inode);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return dentry;
 }
 
index 75a6054265384d806623caabcc34d98e29a45f82..d1cdd9c992ac018fb0d8adb45780a01c201bac25 100644 (file)
@@ -14,6 +14,7 @@ config PCI_DRA7XX
 config PCI_MVEBU
        bool "Marvell EBU PCIe controller"
        depends on ARCH_MVEBU || ARCH_DOVE
+       depends on ARM
        depends on OF
 
 config PCIE_DW
index 5816bceddb650c24576464913f9b7044d38be585..a576aeeb22da6cec1a01784328e209a66e96e8df 100644 (file)
@@ -64,7 +64,6 @@
 #define OARR_SIZE_CFG                BIT(OARR_SIZE_CFG_SHIFT)
 
 #define MAX_NUM_OB_WINDOWS           2
-#define MAX_NUM_PAXC_PF              4
 
 #define IPROC_PCIE_REG_INVALID 0xffff
 
@@ -170,20 +169,6 @@ static inline void iproc_pcie_ob_write(struct iproc_pcie *pcie,
        writel(val, pcie->base + offset + (window * 8));
 }
 
-static inline bool iproc_pcie_device_is_valid(struct iproc_pcie *pcie,
-                                             unsigned int slot,
-                                             unsigned int fn)
-{
-       if (slot > 0)
-               return false;
-
-       /* PAXC can only support limited number of functions */
-       if (pcie->type == IPROC_PCIE_PAXC && fn >= MAX_NUM_PAXC_PF)
-               return false;
-
-       return true;
-}
-
 /**
  * Note access to the configuration registers are protected at the higher layer
  * by 'pci_lock' in drivers/pci/access.c
@@ -199,11 +184,11 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct pci_bus *bus,
        u32 val;
        u16 offset;
 
-       if (!iproc_pcie_device_is_valid(pcie, slot, fn))
-               return NULL;
-
        /* root complex access */
        if (busno == 0) {
+               if (slot > 0 || fn > 0)
+                       return NULL;
+
                iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_IND_ADDR,
                                     where & CFG_IND_ADDR_MASK);
                offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_IND_DATA);
@@ -213,6 +198,14 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct pci_bus *bus,
                        return (pcie->base + offset);
        }
 
+       /*
+        * PAXC is connected to an internally emulated EP within the SoC.  It
+        * allows only one device.
+        */
+       if (pcie->type == IPROC_PCIE_PAXC)
+               if (slot > 0)
+                       return NULL;
+
        /* EP device access */
        val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
                (slot << CFG_ADDR_DEV_NUM_SHIFT) |
index 5f2fda12e0066d9bdd90bdf08139c706a2f1a2f0..fa49f9143b80631108e10ef78e5e45e285c40cf8 100644 (file)
@@ -953,8 +953,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot)
 {
        pci_lock_rescan_remove();
 
-       if (slot->flags & SLOT_IS_GOING_AWAY)
+       if (slot->flags & SLOT_IS_GOING_AWAY) {
+               pci_unlock_rescan_remove();
                return -ENODEV;
+       }
 
        /* configure all functions */
        if (!(slot->flags & SLOT_ENABLED))
index 0bf82a20a0fb479ccfbdb43479bf9b0cf6ecff6a..48d21e0edd568cedc16b1626f78f0a95b22bca1c 100644 (file)
@@ -262,7 +262,6 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev)
        rpc->rpd = dev;
        INIT_WORK(&rpc->dpc_handler, aer_isr);
        mutex_init(&rpc->rpc_mutex);
-       init_waitqueue_head(&rpc->wait_release);
 
        /* Use PCIe bus function to store rpc into PCIe device */
        set_service_data(dev, rpc);
@@ -285,8 +284,7 @@ static void aer_remove(struct pcie_device *dev)
                if (rpc->isr)
                        free_irq(dev->irq, dev);
 
-               wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx);
-
+               flush_work(&rpc->dpc_handler);
                aer_disable_rootport(rpc);
                kfree(rpc);
                set_service_data(dev, NULL);
index 84420b7c9456ecbb0e43a9e8ca539af5b2ee1a20..945c939a86c5c2919335d85a95dbeee34f9c0bff 100644 (file)
@@ -72,7 +72,6 @@ struct aer_rpc {
                                         * recovery on the same
                                         * root port hierarchy
                                         */
-       wait_queue_head_t wait_release;
 };
 
 struct aer_broadcast_data {
index 712392504ed9b9f1a8992079f801cf2c271a0ae6..521e39c1b66d597f8881d9ab02a4e4b41c1b68a0 100644 (file)
@@ -811,8 +811,6 @@ void aer_isr(struct work_struct *work)
        while (get_e_source(rpc, &e_src))
                aer_isr_one_error(p_device, &e_src);
        mutex_unlock(&rpc->rpc_mutex);
-
-       wake_up(&rpc->wait_release);
 }
 
 /**
index c777b97207d5378936a8f25a54be603d76691ab9..5f70fee59a947da1daddbeb2253043dc79c794fb 100644 (file)
@@ -53,7 +53,7 @@ struct pcifront_device {
 };
 
 struct pcifront_sd {
-       int domain;
+       struct pci_sysdata sd;
        struct pcifront_device *pdev;
 };
 
@@ -67,7 +67,9 @@ static inline void pcifront_init_sd(struct pcifront_sd *sd,
                                    unsigned int domain, unsigned int bus,
                                    struct pcifront_device *pdev)
 {
-       sd->domain = domain;
+       /* Because we do not expose that information via XenBus. */
+       sd->sd.node = first_online_node;
+       sd->sd.domain = domain;
        sd->pdev = pdev;
 }
 
@@ -468,8 +470,8 @@ static int pcifront_scan_root(struct pcifront_device *pdev,
        dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
                 domain, bus);
 
-       bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
-       sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+       bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL);
+       sd = kzalloc(sizeof(*sd), GFP_KERNEL);
        if (!bus_entry || !sd) {
                err = -ENOMEM;
                goto err_out;
index e7e117d5dbbe466f71d7a836ee3b898a560a1bed..0124d17bd9fe4bbff14eaced2db3a5f6d5e09b1b 100644 (file)
@@ -224,6 +224,7 @@ config PHY_MT65XX_USB3
 
 config PHY_HI6220_USB
        tristate "hi6220 USB PHY support"
+       depends on (ARCH_HISI && ARM64) || COMPILE_TEST
        select GENERIC_PHY
        select MFD_SYSCON
        help
index 8c7f27db6ad352260f8ad8675758ed365b6390a6..e7e574dc667a35d6d2c68ddf9e21744453d11bec 100644 (file)
@@ -275,20 +275,21 @@ EXPORT_SYMBOL_GPL(phy_exit);
 
 int phy_power_on(struct phy *phy)
 {
-       int ret;
+       int ret = 0;
 
        if (!phy)
-               return 0;
+               goto out;
 
        if (phy->pwr) {
                ret = regulator_enable(phy->pwr);
                if (ret)
-                       return ret;
+                       goto out;
        }
 
        ret = phy_pm_runtime_get_sync(phy);
        if (ret < 0 && ret != -ENOTSUPP)
-               return ret;
+               goto err_pm_sync;
+
        ret = 0; /* Override possible ret == -ENOTSUPP */
 
        mutex_lock(&phy->mutex);
@@ -296,19 +297,20 @@ int phy_power_on(struct phy *phy)
                ret = phy->ops->power_on(phy);
                if (ret < 0) {
                        dev_err(&phy->dev, "phy poweron failed --> %d\n", ret);
-                       goto out;
+                       goto err_pwr_on;
                }
        }
        ++phy->power_count;
        mutex_unlock(&phy->mutex);
        return 0;
 
-out:
+err_pwr_on:
        mutex_unlock(&phy->mutex);
        phy_pm_runtime_put_sync(phy);
+err_pm_sync:
        if (phy->pwr)
                regulator_disable(phy->pwr);
-
+out:
        return ret;
 }
 EXPORT_SYMBOL_GPL(phy_power_on);
index 4a3fc6e59f8e1982ecdd4b13b1500456959c0b71..840f3eae428b84292534d324ef856b485d001584 100644 (file)
@@ -715,6 +715,7 @@ static int twl4030_usb_probe(struct platform_device *pdev)
        pm_runtime_use_autosuspend(&pdev->dev);
        pm_runtime_set_autosuspend_delay(&pdev->dev, 2000);
        pm_runtime_enable(&pdev->dev);
+       pm_runtime_get_sync(&pdev->dev);
 
        /* Our job is to use irqs and status from the power module
         * to keep the transceiver disabled when nothing's connected.
@@ -750,6 +751,7 @@ static int twl4030_usb_remove(struct platform_device *pdev)
        struct twl4030_usb *twl = platform_get_drvdata(pdev);
        int val;
 
+       usb_remove_phy(&twl->phy);
        pm_runtime_get_sync(twl->dev);
        cancel_delayed_work(&twl->id_workaround_work);
        device_remove_file(twl->dev, &dev_attr_vbus);
@@ -757,6 +759,13 @@ static int twl4030_usb_remove(struct platform_device *pdev)
        /* set transceiver mode to power on defaults */
        twl4030_usb_set_mode(twl, -1);
 
+       /* idle ulpi before powering off */
+       if (cable_present(twl->linkstat))
+               pm_runtime_put_noidle(twl->dev);
+       pm_runtime_mark_last_busy(twl->dev);
+       pm_runtime_put_sync_suspend(twl->dev);
+       pm_runtime_disable(twl->dev);
+
        /* autogate 60MHz ULPI clock,
         * clear dpll clock request for i2c access,
         * disable 32KHz
@@ -771,11 +780,6 @@ static int twl4030_usb_remove(struct platform_device *pdev)
        /* disable complete OTG block */
        twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB);
 
-       if (cable_present(twl->linkstat))
-               pm_runtime_put_noidle(twl->dev);
-       pm_runtime_mark_last_busy(twl->dev);
-       pm_runtime_put(twl->dev);
-
        return 0;
 }
 
index 16d48a4ed2253b7760e296deb95de9d76a6d85b4..e96e86d2e745e06e691344cca3fd38922450de58 100644 (file)
@@ -347,6 +347,7 @@ static int mtk_pconf_parse_conf(struct pinctrl_dev *pctldev,
                ret = mtk_pconf_set_pull_select(pctl, pin, true, false, arg);
                break;
        case PIN_CONFIG_INPUT_ENABLE:
+               mtk_pmx_gpio_set_direction(pctldev, NULL, pin, true);
                ret = mtk_pconf_set_ies_smt(pctl, pin, arg, param);
                break;
        case PIN_CONFIG_OUTPUT:
@@ -354,6 +355,7 @@ static int mtk_pconf_parse_conf(struct pinctrl_dev *pctldev,
                ret = mtk_pmx_gpio_set_direction(pctldev, NULL, pin, false);
                break;
        case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+               mtk_pmx_gpio_set_direction(pctldev, NULL, pin, true);
                ret = mtk_pconf_set_ies_smt(pctl, pin, arg, param);
                break;
        case PIN_CONFIG_DRIVE_STRENGTH:
index e4d473811bb366c84cf5ab180529c36411aacd6d..3ef798fac81bdba4926273f74b8c2b62b80102b3 100644 (file)
@@ -666,16 +666,19 @@ int mvebu_pinctrl_probe(struct platform_device *pdev)
                struct mvebu_mpp_ctrl_setting *set = &mode->settings[0];
                struct mvebu_pinctrl_group *grp;
                unsigned num_settings;
+               unsigned supp_settings;
 
-               for (num_settings = 0; ; set++) {
+               for (num_settings = 0, supp_settings = 0; ; set++) {
                        if (!set->name)
                                break;
 
+                       num_settings++;
+
                        /* skip unsupported settings for this variant */
                        if (pctl->variant && !(pctl->variant & set->variant))
                                continue;
 
-                       num_settings++;
+                       supp_settings++;
 
                        /* find gpio/gpo/gpi settings */
                        if (strcmp(set->name, "gpio") == 0)
@@ -688,7 +691,7 @@ int mvebu_pinctrl_probe(struct platform_device *pdev)
                }
 
                /* skip modes with no settings for this variant */
-               if (!num_settings)
+               if (!supp_settings)
                        continue;
 
                grp = mvebu_pinctrl_find_group_by_pid(pctl, mode->pid);
index 085e60106ec2c2f94dd83bfbc2f21fbd8ba17da6..1f7469c9857d6ad14f80a4232aa836ce65d39620 100644 (file)
@@ -191,6 +191,7 @@ static void abx500_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
                dev_err(pct->dev, "%s write failed (%d)\n", __func__, ret);
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int abx500_get_pull_updown(struct abx500_pinctrl *pct, int offset,
                                  enum abx500_gpio_pull_updown *pull_updown)
 {
@@ -226,6 +227,7 @@ out:
 
        return ret;
 }
+#endif
 
 static int abx500_set_pull_updown(struct abx500_pinctrl *pct,
                                  int offset, enum abx500_gpio_pull_updown val)
@@ -468,6 +470,7 @@ out:
        return ret;
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int abx500_get_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip,
                          unsigned gpio)
 {
@@ -553,8 +556,6 @@ out:
        return ret;
 }
 
-#ifdef CONFIG_DEBUG_FS
-
 #include <linux/seq_file.h>
 
 static void abx500_gpio_dbg_show_one(struct seq_file *s,
index d90e205cf809e5f3ec4c9d9ed9ca15bec8288d82..216f227c60092f580df47bc5e5f7c64828419d80 100644 (file)
@@ -426,6 +426,7 @@ int pxa2xx_pinctrl_init(struct platform_device *pdev,
 
        return 0;
 }
+EXPORT_SYMBOL(pxa2xx_pinctrl_init);
 
 int pxa2xx_pinctrl_exit(struct platform_device *pdev)
 {
index f67b1e958589b9f9b4a891ce860bcfeb8909690d..5cc97f85db02d4986b3c9249a1116d952838fde0 100644 (file)
@@ -514,25 +514,35 @@ static const struct pinconf_ops samsung_pinconf_ops = {
        .pin_config_group_set   = samsung_pinconf_group_set,
 };
 
-/* gpiolib gpio_set callback function */
-static void samsung_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
+/*
+ * The samsung_gpio_set_vlaue() should be called with "bank->slock" held
+ * to avoid race condition.
+ */
+static void samsung_gpio_set_value(struct gpio_chip *gc,
+                                         unsigned offset, int value)
 {
        struct samsung_pin_bank *bank = gpiochip_get_data(gc);
        const struct samsung_pin_bank_type *type = bank->type;
-       unsigned long flags;
        void __iomem *reg;
        u32 data;
 
        reg = bank->drvdata->virt_base + bank->pctl_offset;
 
-       spin_lock_irqsave(&bank->slock, flags);
-
        data = readl(reg + type->reg_offset[PINCFG_TYPE_DAT]);
        data &= ~(1 << offset);
        if (value)
                data |= 1 << offset;
        writel(data, reg + type->reg_offset[PINCFG_TYPE_DAT]);
+}
+
+/* gpiolib gpio_set callback function */
+static void samsung_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
+{
+       struct samsung_pin_bank *bank = gpiochip_get_data(gc);
+       unsigned long flags;
 
+       spin_lock_irqsave(&bank->slock, flags);
+       samsung_gpio_set_value(gc, offset, value);
        spin_unlock_irqrestore(&bank->slock, flags);
 }
 
@@ -553,6 +563,8 @@ static int samsung_gpio_get(struct gpio_chip *gc, unsigned offset)
 }
 
 /*
+ * The samsung_gpio_set_direction() should be called with "bank->slock" held
+ * to avoid race condition.
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call.
  */
@@ -564,7 +576,6 @@ static int samsung_gpio_set_direction(struct gpio_chip *gc,
        struct samsung_pinctrl_drv_data *drvdata;
        void __iomem *reg;
        u32 data, mask, shift;
-       unsigned long flags;
 
        bank = gpiochip_get_data(gc);
        type = bank->type;
@@ -581,31 +592,42 @@ static int samsung_gpio_set_direction(struct gpio_chip *gc,
                reg += 4;
        }
 
-       spin_lock_irqsave(&bank->slock, flags);
-
        data = readl(reg);
        data &= ~(mask << shift);
        if (!input)
                data |= FUNC_OUTPUT << shift;
        writel(data, reg);
 
-       spin_unlock_irqrestore(&bank->slock, flags);
-
        return 0;
 }
 
 /* gpiolib gpio_direction_input callback function. */
 static int samsung_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
 {
-       return samsung_gpio_set_direction(gc, offset, true);
+       struct samsung_pin_bank *bank = gpiochip_get_data(gc);
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&bank->slock, flags);
+       ret = samsung_gpio_set_direction(gc, offset, true);
+       spin_unlock_irqrestore(&bank->slock, flags);
+       return ret;
 }
 
 /* gpiolib gpio_direction_output callback function. */
 static int samsung_gpio_direction_output(struct gpio_chip *gc, unsigned offset,
                                                        int value)
 {
-       samsung_gpio_set(gc, offset, value);
-       return samsung_gpio_set_direction(gc, offset, false);
+       struct samsung_pin_bank *bank = gpiochip_get_data(gc);
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&bank->slock, flags);
+       samsung_gpio_set_value(gc, offset, value);
+       ret = samsung_gpio_set_direction(gc, offset, false);
+       spin_unlock_irqrestore(&bank->slock, flags);
+
+       return ret;
 }
 
 /*
index 77d4cf047ceea71dd9bcc50bf0d4e59527d7aac8..11760bbe9d51acd4ebb4d44b82af2c525f68e1dd 100644 (file)
@@ -492,6 +492,7 @@ static const struct sunxi_pinctrl_desc sun8i_h3_pinctrl_data = {
        .pins = sun8i_h3_pins,
        .npins = ARRAY_SIZE(sun8i_h3_pins),
        .irq_banks = 2,
+       .irq_read_needs_mux = true
 };
 
 static int sun8i_h3_pinctrl_probe(struct platform_device *pdev)
index d28db0e793dfb0a3b103bcfc71a9a6a859506ed4..d78ee151c9e4a703bbeb071cda98032cd03dccee 100644 (file)
@@ -899,6 +899,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
                        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3"),
                },
        },
+       {
+               .ident = "Lenovo Yoga 700",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 700"),
+               },
+       },
        {
                .ident = "Lenovo Yoga 900",
                .matches = {
index 20f0ad9bb9f373efedfdf42de3af7da5321f21a6..e20f23e04c24ce8071ea03ea238b5a1a765d70ff 100644 (file)
@@ -41,8 +41,7 @@ static const struct key_entry intel_hid_keymap[] = {
        { KE_KEY, 4, { KEY_HOME } },
        { KE_KEY, 5, { KEY_END } },
        { KE_KEY, 6, { KEY_PAGEUP } },
-       { KE_KEY, 4, { KEY_PAGEDOWN } },
-       { KE_KEY, 4, { KEY_HOME } },
+       { KE_KEY, 7, { KEY_PAGEDOWN } },
        { KE_KEY, 8, { KEY_RFKILL } },
        { KE_KEY, 9, { KEY_POWER } },
        { KE_KEY, 11, { KEY_SLEEP } },
index 02bc5a6343c3fbeeb34b00ef282ba5f31ceff96a..aa454241489c9f541864f34828e19318514d3560 100644 (file)
@@ -49,7 +49,7 @@ struct scu_ipc_data {
 
 static int scu_reg_access(u32 cmd, struct scu_ipc_data  *data)
 {
-       int count = data->count;
+       unsigned int count = data->count;
 
        if (count == 0 || count == 3 || count > 4)
                return -EINVAL;
index 5b31d1548c07b1221114031eefc966e02538af2d..f5134acd6ff05d8a5eac080eed9f4b26a96847f3 100644 (file)
        } \
 }
 
+#ifdef CONFIG_PM_SLEEP
 static u8 suspend_prep_ok;
 static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
 static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
+#endif
 
 struct telemetry_susp_stats {
        u32 shlw_swake_ctr;
index f700723ca5d6b1a8a0c342c568908f9c53aa2435..d28e3ab9479c64e41fe014714c08721fc503faf3 100644 (file)
@@ -342,6 +342,7 @@ static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
 /* Device IDs of parts that have 32KB MCH space */
 static const unsigned int mch_quirk_devices[] = {
        0x0154, /* Ivy Bridge */
+       0x0a04, /* Haswell-ULT */
        0x0c00, /* Haswell */
        0x1604, /* Broadwell */
 };
index 9429e66be09641fd818f8da593d47c45124d3efe..8eafc6f0df88be5fbf26c6725beb656e49464c0b 100644 (file)
@@ -21,6 +21,9 @@
 
 #include <linux/power/bq27xxx_battery.h>
 
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
 static irqreturn_t bq27xxx_battery_irq_handler_thread(int irq, void *data)
 {
        struct bq27xxx_device_info *di = data;
@@ -70,19 +73,33 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
 {
        struct bq27xxx_device_info *di;
        int ret;
+       char *name;
+       int num;
+
+       /* Get new ID for the new battery device */
+       mutex_lock(&battery_mutex);
+       num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
+       mutex_unlock(&battery_mutex);
+       if (num < 0)
+               return num;
+
+       name = devm_kasprintf(&client->dev, GFP_KERNEL, "%s-%d", id->name, num);
+       if (!name)
+               goto err_mem;
 
        di = devm_kzalloc(&client->dev, sizeof(*di), GFP_KERNEL);
        if (!di)
-               return -ENOMEM;
+               goto err_mem;
 
+       di->id = num;
        di->dev = &client->dev;
        di->chip = id->driver_data;
-       di->name = id->name;
+       di->name = name;
        di->bus.read = bq27xxx_battery_i2c_read;
 
        ret = bq27xxx_battery_setup(di);
        if (ret)
-               return ret;
+               goto err_failed;
 
        /* Schedule a polling after about 1 min */
        schedule_delayed_work(&di->work, 60 * HZ);
@@ -103,6 +120,16 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
        }
 
        return 0;
+
+err_mem:
+       ret = -ENOMEM;
+
+err_failed:
+       mutex_lock(&battery_mutex);
+       idr_remove(&battery_id, num);
+       mutex_unlock(&battery_mutex);
+
+       return ret;
 }
 
 static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
@@ -111,6 +138,10 @@ static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
 
        bq27xxx_battery_teardown(di);
 
+       mutex_lock(&battery_mutex);
+       idr_remove(&battery_id, di->id);
+       mutex_unlock(&battery_mutex);
+
        return 0;
 }
 
index 934c139916c609e3647acd5c6229a781ca348d56..ee4f183ef9eed337b012c4088bfeba5ae0c949a3 100644 (file)
@@ -178,7 +178,6 @@ static int ptp_ixp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
        u64 ns;
-       u32 remainder;
        unsigned long flags;
        struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
        struct ixp46x_ts_regs *regs = ixp_clock->regs;
@@ -189,8 +188,7 @@ static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 
        spin_unlock_irqrestore(&register_lock, flags);
 
-       ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-       ts->tv_nsec = remainder;
+       *ts = ns_to_timespec64(ns);
        return 0;
 }
 
@@ -202,8 +200,7 @@ static int ptp_ixp_settime(struct ptp_clock_info *ptp,
        struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
        struct ixp46x_ts_regs *regs = ixp_clock->regs;
 
-       ns = ts->tv_sec * 1000000000ULL;
-       ns += ts->tv_nsec;
+       ns = timespec64_to_ns(ts);
 
        spin_lock_irqsave(&register_lock, flags);
 
index 41605dac83094813441530ae9cacad093a352f71..c78db05e75b1323f9b0adb35aac8cc78316355ab 100644 (file)
@@ -3035,6 +3035,7 @@ static void dasd_setup_queue(struct dasd_block *block)
                max = block->base->discipline->max_blocks << block->s2b_shift;
        }
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, block->request_queue);
+       block->request_queue->limits.max_dev_sectors = max;
        blk_queue_logical_block_size(block->request_queue,
                                     block->bp_block);
        blk_queue_max_hw_sectors(block->request_queue, max);
index 184b1dbeb55463b768eb9aa7215665686d5e4591..286782c60da4e3b197d7d9901b7e17c2e69673ac 100644 (file)
@@ -264,8 +264,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device)
                spin_unlock_irqrestore(&lcu->lock, flags);
                cancel_work_sync(&lcu->suc_data.worker);
                spin_lock_irqsave(&lcu->lock, flags);
-               if (device == lcu->suc_data.device)
+               if (device == lcu->suc_data.device) {
+                       dasd_put_device(device);
                        lcu->suc_data.device = NULL;
+               }
        }
        was_pending = 0;
        if (device == lcu->ruac_data.device) {
@@ -273,8 +275,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device)
                was_pending = 1;
                cancel_delayed_work_sync(&lcu->ruac_data.dwork);
                spin_lock_irqsave(&lcu->lock, flags);
-               if (device == lcu->ruac_data.device)
+               if (device == lcu->ruac_data.device) {
+                       dasd_put_device(device);
                        lcu->ruac_data.device = NULL;
+               }
        }
        private->lcu = NULL;
        spin_unlock_irqrestore(&lcu->lock, flags);
@@ -549,8 +553,10 @@ static void lcu_update_work(struct work_struct *work)
        if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) {
                DBF_DEV_EVENT(DBF_WARNING, device, "could not update"
                            " alias data in lcu (rc = %d), retry later", rc);
-               schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ);
+               if (!schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ))
+                       dasd_put_device(device);
        } else {
+               dasd_put_device(device);
                lcu->ruac_data.device = NULL;
                lcu->flags &= ~UPDATE_PENDING;
        }
@@ -593,8 +599,10 @@ static int _schedule_lcu_update(struct alias_lcu *lcu,
         */
        if (!usedev)
                return -EINVAL;
+       dasd_get_device(usedev);
        lcu->ruac_data.device = usedev;
-       schedule_delayed_work(&lcu->ruac_data.dwork, 0);
+       if (!schedule_delayed_work(&lcu->ruac_data.dwork, 0))
+               dasd_put_device(usedev);
        return 0;
 }
 
@@ -723,7 +731,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu,
        ASCEBC((char *) &cqr->magic, 4);
        ccw = cqr->cpaddr;
        ccw->cmd_code = DASD_ECKD_CCW_RSCK;
-       ccw->flags = ;
+       ccw->flags = CCW_FLAG_SLI;
        ccw->count = 16;
        ccw->cda = (__u32)(addr_t) cqr->data;
        ((char *)cqr->data)[0] = reason;
@@ -930,6 +938,7 @@ static void summary_unit_check_handling_work(struct work_struct *work)
        /* 3. read new alias configuration */
        _schedule_lcu_update(lcu, device);
        lcu->suc_data.device = NULL;
+       dasd_put_device(device);
        spin_unlock_irqrestore(&lcu->lock, flags);
 }
 
@@ -989,6 +998,8 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device,
        }
        lcu->suc_data.reason = reason;
        lcu->suc_data.device = device;
+       dasd_get_device(device);
        spin_unlock(&lcu->lock);
-       schedule_work(&lcu->suc_data.worker);
+       if (!schedule_work(&lcu->suc_data.worker))
+               dasd_put_device(device);
 };
index c692dfebd0bab1c808f460349dff194b028f5c5c..50597f9522fe375ed764aab8842a400e56ef808d 100644 (file)
@@ -139,11 +139,11 @@ static ssize_t chp_measurement_chars_read(struct file *filp,
 
        device = container_of(kobj, struct device, kobj);
        chp = to_channelpath(device);
-       if (!chp->cmg_chars)
+       if (chp->cmg == -1)
                return 0;
 
-       return memory_read_from_buffer(buf, count, &off,
-                               chp->cmg_chars, sizeof(struct cmg_chars));
+       return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars,
+                                      sizeof(chp->cmg_chars));
 }
 
 static struct bin_attribute chp_measurement_chars_attr = {
@@ -416,7 +416,8 @@ static void chp_release(struct device *dev)
  * chp_update_desc - update channel-path description
  * @chp - channel-path
  *
- * Update the channel-path description of the specified channel-path.
+ * Update the channel-path description of the specified channel-path
+ * including channel measurement related information.
  * Return zero on success, non-zero otherwise.
  */
 int chp_update_desc(struct channel_path *chp)
@@ -428,8 +429,10 @@ int chp_update_desc(struct channel_path *chp)
                return rc;
 
        rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+       if (rc)
+               return rc;
 
-       return rc;
+       return chsc_get_channel_measurement_chars(chp);
 }
 
 /**
@@ -466,14 +469,6 @@ int chp_new(struct chp_id chpid)
                ret = -ENODEV;
                goto out_free;
        }
-       /* Get channel-measurement characteristics. */
-       if (css_chsc_characteristics.scmc && css_chsc_characteristics.secm) {
-               ret = chsc_get_channel_measurement_chars(chp);
-               if (ret)
-                       goto out_free;
-       } else {
-               chp->cmg = -1;
-       }
        dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
        /* make it known to the system */
index 4efd5b867cc390780a18c2f983a2215ed05dcd72..af0232290dc4c7c183a65e3fe13d6683bb48cf1e 100644 (file)
@@ -48,7 +48,7 @@ struct channel_path {
        /* Channel-measurement related stuff: */
        int cmg;
        int shared;
-       void *cmg_chars;
+       struct cmg_chars cmg_chars;
 };
 
 /* Return channel_path struct for given chpid. */
index a831d18596a5ef592f88ec5af031f639606c8c0b..c424c0c7367e360acd9ccd1ce899dc80bb575265 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 
 #include <asm/cio.h>
@@ -224,8 +225,9 @@ out_unreg:
 
 void chsc_chp_offline(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -236,6 +238,11 @@ void chsc_chp_offline(struct chp_id chpid)
        link.chpid = chpid;
        /* Wait until previous actions have settled. */
        css_wait_for_slow_path();
+
+       mutex_lock(&chp->lock);
+       chp_update_desc(chp);
+       mutex_unlock(&chp->lock);
+
        for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link);
 }
 
@@ -690,8 +697,9 @@ static void chsc_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
 
 void chsc_chp_online(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -701,6 +709,11 @@ void chsc_chp_online(struct chp_id chpid)
                link.chpid = chpid;
                /* Wait until previous actions have settled. */
                css_wait_for_slow_path();
+
+               mutex_lock(&chp->lock);
+               chp_update_desc(chp);
+               mutex_unlock(&chp->lock);
+
                for_each_subchannel_staged(__s390_process_res_acc, NULL,
                                           &link);
                css_schedule_reprobe();
@@ -967,22 +980,19 @@ static void
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
                          struct cmg_chars *chars)
 {
-       struct cmg_chars *cmg_chars;
        int i, mask;
 
-       cmg_chars = chp->cmg_chars;
        for (i = 0; i < NR_MEASUREMENT_CHARS; i++) {
                mask = 0x80 >> (i + 3);
                if (cmcv & mask)
-                       cmg_chars->values[i] = chars->values[i];
+                       chp->cmg_chars.values[i] = chars->values[i];
                else
-                       cmg_chars->values[i] = 0;
+                       chp->cmg_chars.values[i] = 0;
        }
 }
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
-       struct cmg_chars *cmg_chars;
        int ccode, ret;
 
        struct {
@@ -1006,10 +1016,11 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                u32 data[NR_MEASUREMENT_CHARS];
        } __attribute__ ((packed)) *scmc_area;
 
-       chp->cmg_chars = NULL;
-       cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL);
-       if (!cmg_chars)
-               return -ENOMEM;
+       chp->shared = -1;
+       chp->cmg = -1;
+
+       if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
+               return 0;
 
        spin_lock_irq(&chsc_page_lock);
        memset(chsc_page, 0, PAGE_SIZE);
@@ -1031,25 +1042,19 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                              scmc_area->response.code);
                goto out;
        }
-       if (scmc_area->not_valid) {
-               chp->cmg = -1;
-               chp->shared = -1;
+       if (scmc_area->not_valid)
                goto out;
-       }
+
        chp->cmg = scmc_area->cmg;
        chp->shared = scmc_area->shared;
        if (chp->cmg != 2 && chp->cmg != 3) {
                /* No cmg-dependent data. */
                goto out;
        }
-       chp->cmg_chars = cmg_chars;
        chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
                                  (struct cmg_chars *) &scmc_area->data);
 out:
        spin_unlock_irq(&chsc_page_lock);
-       if (!chp->cmg_chars)
-               kfree(cmg_chars);
-
        return ret;
 }
 
index 7b23f43c7b080cc81b4de94ed9016cc0d8a91546..de1b6c1d172c8c0866dc0fd8fb709d032bdef820 100644 (file)
@@ -112,9 +112,10 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        case REP82_ERROR_TRANSPORT_FAIL:
        case REP82_ERROR_MACHINE_FAILURE:
@@ -123,16 +124,18 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        default:
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 74edf2934e7c9fd652c33d1d14bb47b9e2bf44b8..eedfaa2cf715f698db447c1a79e857f170727dc5 100644 (file)
@@ -336,9 +336,10 @@ static int convert_type80(struct zcrypt_device *zdev,
                /* The result is too short, the CEX2A card may not do that.. */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online, t80h->code);
+                              AP_QID_DEVICE(zdev->ap_dev->qid),
+                              zdev->online, t80h->code);
 
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -368,9 +369,9 @@ static int convert_response(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 9a2dd472c1cc9870d37e98c5692c1c9c1e0ebac3..21959719daef94a00bd3f40282a2022077ec1344 100644 (file)
@@ -572,9 +572,9 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
                        return -EINVAL;
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online,
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
                               msg->hdr.reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -715,9 +715,9 @@ static int convert_response_ica(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -747,9 +747,9 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
                xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -773,9 +773,9 @@ static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -800,9 +800,9 @@ static int convert_response_rng(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 2940bd769936cd7f75d2d20adc324914b0df4b84..25aba1613e2157f7a2e468007c202a16015b3f40 100644 (file)
@@ -1045,6 +1045,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
 static const struct file_operations tw_fops = {
        .owner          = THIS_MODULE,
        .unlocked_ioctl = tw_chrdev_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = tw_chrdev_ioctl,
+#endif
        .open           = tw_chrdev_open,
        .release        = NULL,
        .llseek         = noop_llseek,
index c1fe0d2f90ca353722cf7e2b16c2bd2c9b47601b..e2f31c93717db6e6a248d58ffaff6b8a2ba2d23f 100644 (file)
@@ -1106,6 +1106,7 @@ config SCSI_IPR
        tristate "IBM Power Linux RAID adapter support"
        depends on PCI && SCSI && ATA
        select FW_LOADER
+       select IRQ_POLL
        ---help---
          This driver supports the IBM Power Linux family RAID adapters.
          This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
@@ -1620,23 +1621,6 @@ config ATARI_SCSI
          ST-DMA, replacing ACSI).  It does NOT support other schemes, like
          in the Hades (without DMA).
 
-config ATARI_SCSI_TOSHIBA_DELAY
-       bool "Long delays for Toshiba CD-ROMs"
-       depends on ATARI_SCSI
-       help
-         This option increases the delay after a SCSI arbitration to
-         accommodate some flaky Toshiba CD-ROM drives. Say Y if you intend to
-         use a Toshiba CD-ROM drive; otherwise, the option is not needed and
-         would impact performance a bit, so say N.
-
-config ATARI_SCSI_RESET_BOOT
-       bool "Reset SCSI-devices at boottime"
-       depends on ATARI_SCSI
-       help
-         Reset the devices on your Atari whenever it boots.  This makes the
-         boot process fractionally longer but may assist recovery from errors
-         that leave the devices with SCSI operations partway completed.
-
 config MAC_SCSI
        tristate "Macintosh NCR5380 SCSI"
        depends on MAC && SCSI=y
index a777e5c412df262b3f7fc72b55676d681ade9cb8..d72867257346eebfe885bb189eb7e5a61cd6fbbe 100644 (file)
@@ -1,17 +1,17 @@
-/* 
+/*
  * NCR 5380 generic driver routines.  These should make it *trivial*
- *      to implement 5380 SCSI drivers under Linux with a non-trantor
- *      architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
  *
- *      Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
  *
  * Copyright 1993, Drew Eckhardt
- *      Visionary Computing 
- *      (Unix and Linux consulting and custom programming)
- *      drew@colorado.edu
- *      +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
  *
- * For more information, please consult 
+ * For more information, please consult
  *
  * NCR 5380 Family
  * SCSI Protocol Controller
  */
 
 /*
- * Revision 1.10 1998/9/2      Alan Cox
- *                             (alan@lxorguk.ukuu.org.uk)
- * Fixed up the timer lockups reported so far. Things still suck. Looking 
- * forward to 2.3 and per device request queues. Then it'll be possible to
- * SMP thread this beast and improve life no end.
- * Revision 1.9  1997/7/27     Ronald van Cuijlenborg
- *                             (ronald.van.cuijlenborg@tip.nl or nutty@dds.nl)
- * (hopefully) fixed and enhanced USLEEP
- * added support for DTC3181E card (for Mustek scanner)
- *
-
- * Revision 1.8                        Ingmar Baumgart
- *                             (ingmar@gonzo.schwaben.de)
- * added support for NCR53C400a card
- *
-
- * Revision 1.7  1996/3/2       Ray Van Tassle (rayvt@comm.mot.com)
- * added proc_info
- * added support needed for DTC 3180/3280
- * fixed a couple of bugs
- *
-
- * Revision 1.5  1994/01/19  09:14:57  drew
- * Fixed udelay() hack that was being used on DATAOUT phases
- * instead of a proper wait for the final handshake.
- *
- * Revision 1.4  1994/01/19  06:44:25  drew
- * *** empty log message ***
- *
- * Revision 1.3  1994/01/19  05:24:40  drew
- * Added support for TCR LAST_BYTE_SENT bit.
- *
- * Revision 1.2  1994/01/15  06:14:11  drew
- * REAL DMA support, bug fixes.
- *
- * Revision 1.1  1994/01/15  06:00:54  drew
- * Initial revision
- *
+ * With contributions from Ray Van Tassle, Ingmar Baumgart,
+ * Ronald van Cuijlenborg, Alan Cox and others.
  */
 
 /*
- * Further development / testing that should be done : 
+ * Further development / testing that should be done :
  * 1.  Cleanup the NCR5380_transfer_dma function and DMA operation complete
- *     code so that everything does the same thing that's done at the 
- *     end of a pseudo-DMA read operation.
+ * code so that everything does the same thing that's done at the
+ * end of a pseudo-DMA read operation.
  *
  * 2.  Fix REAL_DMA (interrupt driven, polled works fine) -
- *     basically, transfer size needs to be reduced by one 
- *     and the last byte read as is done with PSEUDO_DMA.
- * 
- * 4.  Test SCSI-II tagged queueing (I have no devices which support 
- *      tagged queueing)
- *
- * 5.  Test linked command handling code after Eric is ready with 
- *      the high level code.
+ * basically, transfer size needs to be reduced by one
+ * and the last byte read as is done with PSEUDO_DMA.
+ *
+ * 4.  Test SCSI-II tagged queueing (I have no devices which support
+ * tagged queueing)
  */
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x,y) {printk("LINE:%d   Adding %p to %p\n", __LINE__, (void*)(x), (void*)(y)); if ((x)==(y)) udelay(5); }
-#define REMOVE(w,x,y,z) {printk("LINE:%d   Removing: %p->%p  %p->%p \n", __LINE__, (void*)(w), (void*)(x), (void*)(y), (void*)(z)); if ((x)==(y)) udelay(5); }
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
 
 #ifndef notyet
-#undef LINKED
 #undef REAL_DMA
 #endif
 
-#ifdef REAL_DMA_POLL
-#undef READ_OVERRUNS
-#define READ_OVERRUNS
-#endif
-
 #ifdef BOARD_REQUIRES_NO_DELAY
 #define io_recovery_delay(x)
 #else
 /*
  * Design
  *
- * This is a generic 5380 driver.  To use it on a different platform, 
+ * This is a generic 5380 driver.  To use it on a different platform,
  * one simply writes appropriate system specific macros (ie, data
- * transfer - some PC's will use the I/O bus, 68K's must use 
+ * transfer - some PC's will use the I/O bus, 68K's must use
  * memory mapped) and drops this file in their 'C' wrapper.
  *
- * (Note from hch:  unfortunately it was not enough for the different
- * m68k folks and instead of improving this driver they copied it
- * and hacked it up for their needs.  As a consequence they lost
- * most updates to this driver.  Maybe someone will fix all these
- * drivers to use a common core one day..)
- *
- * As far as command queueing, two queues are maintained for 
+ * As far as command queueing, two queues are maintained for
  * each 5380 in the system - commands that haven't been issued yet,
- * and commands that are currently executing.  This means that an 
- * unlimited number of commands may be queued, letting 
- * more commands propagate from the higher driver levels giving higher 
- * throughput.  Note that both I_T_L and I_T_L_Q nexuses are supported, 
- * allowing multiple commands to propagate all the way to a SCSI-II device 
+ * and commands that are currently executing.  This means that an
+ * unlimited number of commands may be queued, letting
+ * more commands propagate from the higher driver levels giving higher
+ * throughput.  Note that both I_T_L and I_T_L_Q nexuses are supported,
+ * allowing multiple commands to propagate all the way to a SCSI-II device
  * while a command is already executing.
  *
  *
- * Issues specific to the NCR5380 : 
- *
- * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead 
- * piece of hardware that requires you to sit in a loop polling for 
- * the REQ signal as long as you are connected.  Some devices are 
- * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect 
- * while doing long seek operations.
- * 
- * The workaround for this is to keep track of devices that have
- * disconnected.  If the device hasn't disconnected, for commands that
- * should disconnect, we do something like 
+ * Issues specific to the NCR5380 :
  *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- * 
- * Some tweaking of N and M needs to be done.  An algorithm based 
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these 
+ * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead
+ * piece of hardware that requires you to sit in a loop polling for
+ * the REQ signal as long as you are connected.  Some devices are
+ * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
+ * while doing long seek operations. [...] These
  * broken devices are the exception rather than the rule and I'd rather
  * spend my time optimizing for the normal case.
  *
  * which is started from a workqueue for each NCR5380 host in the
  * system.  It attempts to establish I_T_L or I_T_L_Q nexuses by
  * removing the commands from the issue queue and calling
- * NCR5380_select() if a nexus is not established. 
+ * NCR5380_select() if a nexus is not established.
  *
  * Once a nexus is established, the NCR5380_information_transfer()
  * phase goes through the various phases as instructed by the target.
  * if the target goes into MSG IN and sends a DISCONNECT message,
  * the command structure is placed into the per instance disconnected
- * queue, and NCR5380_main tries to find more work.  If the target is 
+ * queue, and NCR5380_main tries to find more work.  If the target is
  * idle for too long, the system will try to sleep.
  *
  * If a command has disconnected, eventually an interrupt will trigger,
  * calling NCR5380_intr()  which will in turn call NCR5380_reselect
  * to reestablish a nexus.  This will run main if necessary.
  *
- * On command termination, the done function will be called as 
+ * On command termination, the done function will be called as
  * appropriate.
  *
- * SCSI pointers are maintained in the SCp field of SCSI command 
+ * SCSI pointers are maintained in the SCp field of SCSI command
  * structures, being initialized after the command is connected
  * in NCR5380_select, and set as appropriate in NCR5380_information_transfer.
  * Note that in violation of the standard, an implicit SAVE POINTERS operation
 /*
  * Using this file :
  * This file a skeleton Linux SCSI driver for the NCR 5380 series
- * of chips.  To use it, you write an architecture specific functions 
+ * of chips.  To use it, you write an architecture specific functions
  * and macros and include this file in your driver.
  *
- * These macros control options : 
- * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be 
- *      defined.
- * 
+ * These macros control options :
+ * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be
+ * defined.
+ *
  * AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- *      for commands that return with a CHECK CONDITION status. 
+ * for commands that return with a CHECK CONDITION status.
  *
  * DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- *      transceivers. 
+ * transceivers.
  *
  * DONT_USE_INTR - if defined, never use interrupts, even if we probe or
- *      override-configure an IRQ.
- *
- * LIMIT_TRANSFERSIZE - if defined, limit the pseudo-dma transfers to 512
- *      bytes at a time.  Since interrupts are disabled by default during
- *      these transfers, we might need this to give reasonable interrupt
- *      service time if the transfer size gets too large.
- *
- * LINKED - if defined, linked commands are supported.
+ * override-configure an IRQ.
  *
  * PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
  *
  * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
  *
  * REAL_DMA_POLL - if defined, REAL DMA is used but the driver doesn't
- *      rely on phase mismatch and EOP interrupts to determine end 
- *      of phase.
- *
- * UNSAFE - leave interrupts enabled during pseudo-DMA transfers.  You
- *          only really want to use this if you're having a problem with
- *          dropped characters during high speed communications, and even
- *          then, you're going to be better off twiddling with transfersize
- *          in the high level code.
- *
- * Defaults for these will be provided although the user may want to adjust 
- * these to allocate CPU resources to the SCSI driver or "real" code.
- * 
- * USLEEP_SLEEP - amount of time, in jiffies, to sleep
- *
- * USLEEP_POLL - amount of time, in jiffies, to poll
+ * rely on phase mismatch and EOP interrupts to determine end
+ * of phase.
  *
  * These macros MUST be defined :
- * NCR5380_local_declare() - declare any local variables needed for your
- *      transfer routines.
  *
- * NCR5380_setup(instance) - initialize any local variables needed from a given
- *      instance of the host adapter for NCR5380_{read,write,pread,pwrite}
- * 
  * NCR5380_read(register)  - read from the specified register
  *
- * NCR5380_write(register, value) - write to the specific register 
+ * NCR5380_write(register, value) - write to the specific register
  *
- * NCR5380_implementation_fields  - additional fields needed for this 
- *      specific implementation of the NCR5380
+ * NCR5380_implementation_fields  - additional fields needed for this
+ * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
- * REAL functions : 
+ * REAL functions :
  * NCR5380_REAL_DMA should be defined if real DMA is to be used.
- * Note that the DMA setup functions should return the number of bytes 
- *      that they were able to program the controller for.
+ * Note that the DMA setup functions should return the number of bytes
+ * that they were able to program the controller for.
  *
- * Also note that generic i386/PC versions of these macros are 
- *      available as NCR5380_i386_dma_write_setup,
- *      NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * Also note that generic i386/PC versions of these macros are
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * NCR5380_pread(instance, dst, count);
  *
  * The generic driver is initialized by calling NCR5380_init(instance),
- * after setting the appropriate host specific fields and ID.  If the 
+ * after setting the appropriate host specific fields and ID.  If the
  * driver wishes to autoprobe for an IRQ line, the NCR5380_probe_irq(instance,
  * possible) function may be used.
  */
 
-static int do_abort(struct Scsi_Host *host);
-static void do_reset(struct Scsi_Host *host);
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
 
-/*
- *     initialize_SCp          -       init the scsi pointer field
- *     @cmd: command block to set up
+/**
+ * initialize_SCp - init the scsi pointer field
+ * @cmd: command block to set up
  *
- *     Set up the internal fields in the SCSI command.
+ * Set up the internal fields in the SCSI command.
  */
 
 static inline void initialize_SCp(struct scsi_cmnd *cmd)
 {
-       /* 
-        * Initialize the Scsi Pointer field so that all of the commands in the 
+       /*
+        * Initialize the Scsi Pointer field so that all of the commands in the
         * various queues are valid.
         */
 
@@ -295,120 +198,123 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.ptr = NULL;
                cmd->SCp.this_residual = 0;
        }
+
+       cmd->SCp.Status = 0;
+       cmd->SCp.Message = 0;
 }
 
 /**
- *     NCR5380_poll_politely   -       wait for NCR5380 status bits
- *     @instance: controller to poll
- *     @reg: 5380 register to poll
- *     @bit: Bitmask to check
- *     @val: Value required to exit
- *
- *     Polls the NCR5380 in a reasonably efficient manner waiting for
- *     an event to occur, after a short quick poll we begin giving the
- *     CPU back in non IRQ contexts
- *
- *     Returns the value of the register or a negative error code.
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
  */
-static int NCR5380_poll_politely(struct Scsi_Host *instance, int reg, int bit, int val, int t)
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+                                  int reg1, int bit1, int val1,
+                                  int reg2, int bit2, int val2, int wait)
 {
-       NCR5380_local_declare();
-       int n = 500;            /* At about 8uS a cycle for the cpu access */
-       unsigned long end = jiffies + t;
-       int r;
-       
-       NCR5380_setup(instance);
-
-       while( n-- > 0)
-       {
-               r = NCR5380_read(reg);
-               if((r & bit) == val)
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long deadline = jiffies + wait;
+       unsigned long n;
+
+       /* Busy-wait for up to 10 ms */
+       n = min(10000U, jiffies_to_usecs(wait));
+       n *= hostdata->accesses_per_ms;
+       n /= 2000;
+       do {
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
                        return 0;
                cpu_relax();
-       }
-       
-       /* t time yet ? */
-       while(time_before(jiffies, end))
-       {
-               r = NCR5380_read(reg);
-               if((r & bit) == val)
+       } while (n--);
+
+       if (irqs_disabled() || in_interrupt())
+               return -ETIMEDOUT;
+
+       /* Repeatedly sleep for 1 ms until deadline */
+       while (time_is_after_jiffies(deadline)) {
+               schedule_timeout_uninterruptible(1);
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
                        return 0;
-               if(!in_interrupt())
-                       cond_resched();
-               else
-                       cpu_relax();
        }
+
        return -ETIMEDOUT;
 }
 
-static struct {
-       unsigned char value;
-       const char *name;
-} phases[] __maybe_unused = {
-       {PHASE_DATAOUT, "DATAOUT"}, 
-       {PHASE_DATAIN, "DATAIN"}, 
-       {PHASE_CMDOUT, "CMDOUT"}, 
-       {PHASE_STATIN, "STATIN"}, 
-       {PHASE_MSGOUT, "MSGOUT"}, 
-       {PHASE_MSGIN, "MSGIN"}, 
-       {PHASE_UNKNOWN, "UNKNOWN"}
-};
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+                                        int reg, int bit, int val, int wait)
+{
+       return NCR5380_poll_politely2(instance, reg, bit, val,
+                                               reg, bit, val, wait);
+}
 
 #if NDEBUG
 static struct {
        unsigned char mask;
        const char *name;
-} signals[] = { 
-       {SR_DBP, "PARITY"}, 
-       {SR_RST, "RST"}, 
-       {SR_BSY, "BSY"}, 
-       {SR_REQ, "REQ"}, 
-       {SR_MSG, "MSG"}, 
-       {SR_CD, "CD"}, 
-       {SR_IO, "IO"}, 
-       {SR_SEL, "SEL"}, 
+} signals[] = {
+       {SR_DBP, "PARITY"},
+       {SR_RST, "RST"},
+       {SR_BSY, "BSY"},
+       {SR_REQ, "REQ"},
+       {SR_MSG, "MSG"},
+       {SR_CD, "CD"},
+       {SR_IO, "IO"},
+       {SR_SEL, "SEL"},
        {0, NULL}
-}, 
+},
 basrs[] = {
-       {BASR_ATN, "ATN"}, 
-       {BASR_ACK, "ACK"}, 
+       {BASR_ATN, "ATN"},
+       {BASR_ACK, "ACK"},
        {0, NULL}
-}, 
-icrs[] = { 
-       {ICR_ASSERT_RST, "ASSERT RST"}, 
-       {ICR_ASSERT_ACK, "ASSERT ACK"}, 
-       {ICR_ASSERT_BSY, "ASSERT BSY"}, 
-       {ICR_ASSERT_SEL, "ASSERT SEL"}, 
-       {ICR_ASSERT_ATN, "ASSERT ATN"}, 
-       {ICR_ASSERT_DATA, "ASSERT DATA"}, 
+},
+icrs[] = {
+       {ICR_ASSERT_RST, "ASSERT RST"},
+       {ICR_ASSERT_ACK, "ASSERT ACK"},
+       {ICR_ASSERT_BSY, "ASSERT BSY"},
+       {ICR_ASSERT_SEL, "ASSERT SEL"},
+       {ICR_ASSERT_ATN, "ASSERT ATN"},
+       {ICR_ASSERT_DATA, "ASSERT DATA"},
        {0, NULL}
-}, 
-mrs[] = { 
-       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, 
-       {MR_TARGET, "MODE TARGET"}, 
-       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, 
-       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"}, 
-       {MR_MONITOR_BSY, "MODE MONITOR BSY"}, 
-       {MR_DMA_MODE, "MODE DMA"}, 
-       {MR_ARBITRATE, "MODE ARBITRATION"}, 
+},
+mrs[] = {
+       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+       {MR_TARGET, "MODE TARGET"},
+       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+       {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
+       {MR_MONITOR_BSY, "MODE MONITOR BSY"},
+       {MR_DMA_MODE, "MODE DMA"},
+       {MR_ARBITRATE, "MODE ARBITRATION"},
        {0, NULL}
 };
 
 /**
- *     NCR5380_print   -       print scsi bus signals
- *     @instance:      adapter state to dump
- *
- *     Print the SCSI bus signals for debugging purposes
+ * NCR5380_print - print scsi bus signals
+ * @instance: adapter state to dump
  *
- *     Locks: caller holds hostdata lock (not essential)
+ * Print the SCSI bus signals for debugging purposes
  */
 
 static void NCR5380_print(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
        unsigned char status, data, basr, mr, icr, i;
-       NCR5380_setup(instance);
 
        data = NCR5380_read(CURRENT_SCSI_DATA_REG);
        status = NCR5380_read(STATUS_REG);
@@ -435,117 +341,56 @@ static void NCR5380_print(struct Scsi_Host *instance)
        printk("\n");
 }
 
+static struct {
+       unsigned char value;
+       const char *name;
+} phases[] = {
+       {PHASE_DATAOUT, "DATAOUT"},
+       {PHASE_DATAIN, "DATAIN"},
+       {PHASE_CMDOUT, "CMDOUT"},
+       {PHASE_STATIN, "STATIN"},
+       {PHASE_MSGOUT, "MSGOUT"},
+       {PHASE_MSGIN, "MSGIN"},
+       {PHASE_UNKNOWN, "UNKNOWN"}
+};
 
-/* 
- *     NCR5380_print_phase     -       show SCSI phase
- *     @instance: adapter to dump
- *
- *     Print the current SCSI phase for debugging purposes
+/**
+ * NCR5380_print_phase - show SCSI phase
+ * @instance: adapter to dump
  *
- *     Locks: none
+ * Print the current SCSI phase for debugging purposes
  */
 
 static void NCR5380_print_phase(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
        unsigned char status;
        int i;
-       NCR5380_setup(instance);
 
        status = NCR5380_read(STATUS_REG);
        if (!(status & SR_REQ))
-               printk("scsi%d : REQ not asserted, phase unknown.\n", instance->host_no);
+               shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
        else {
-               for (i = 0; (phases[i].value != PHASE_UNKNOWN) && (phases[i].value != (status & PHASE_MASK)); ++i);
-               printk("scsi%d : phase %s\n", instance->host_no, phases[i].name);
+               for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
+                    (phases[i].value != (status & PHASE_MASK)); ++i)
+                       ;
+               shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
        }
 }
 #endif
 
-/*
- * These need tweaking, and would probably work best as per-device 
- * flags initialized differently for disk, tape, cd, etc devices.
- * People with broken devices are free to experiment as to what gives
- * the best results for them.
- *
- * USLEEP_SLEEP should be a minimum seek time.
- *
- * USLEEP_POLL should be a maximum rotational latency.
- */
-#ifndef USLEEP_SLEEP
-/* 20 ms (reasonable hard disk speed) */
-#define USLEEP_SLEEP msecs_to_jiffies(20)
-#endif
-/* 300 RPM (floppy speed) */
-#ifndef USLEEP_POLL
-#define USLEEP_POLL msecs_to_jiffies(200)
-#endif
-#ifndef USLEEP_WAITLONG
-/* RvC: (reasonable time to wait on select error) */
-#define USLEEP_WAITLONG USLEEP_SLEEP
-#endif
-
-/* 
- * Function : int should_disconnect (unsigned char cmd)
- *
- * Purpose : decide whether a command would normally disconnect or 
- *      not, since if it won't disconnect we should go to sleep.
- *
- * Input : cmd - opcode of SCSI command
- *
- * Returns : DISCONNECT_LONG if we should disconnect for a really long 
- *      time (ie always, sleep, look for REQ active, sleep), 
- *      DISCONNECT_TIME_TO_DATA if we would only disconnect for a normal
- *      time-to-data delay, DISCONNECT_NONE if this command would return
- *      immediately.
- *
- *      Future sleep algorithms based on time to data can exploit 
- *      something like this so they can differentiate between "normal" 
- *      (ie, read, write, seek) and unusual commands (ie, * format).
- *
- * Note : We don't deal with commands that handle an immediate disconnect,
- *        
- */
 
-static int should_disconnect(unsigned char cmd)
-{
-       switch (cmd) {
-       case READ_6:
-       case WRITE_6:
-       case SEEK_6:
-       case READ_10:
-       case WRITE_10:
-       case SEEK_10:
-               return DISCONNECT_TIME_TO_DATA;
-       case FORMAT_UNIT:
-       case SEARCH_HIGH:
-       case SEARCH_LOW:
-       case SEARCH_EQUAL:
-               return DISCONNECT_LONG;
-       default:
-               return DISCONNECT_NONE;
-       }
-}
-
-static void NCR5380_set_timer(struct NCR5380_hostdata *hostdata, unsigned long timeout)
-{
-       hostdata->time_expires = jiffies + timeout;
-       schedule_delayed_work(&hostdata->coroutine, timeout);
-}
-
-
-static int probe_irq __initdata = 0;
+static int probe_irq __initdata;
 
 /**
- *     probe_intr      -       helper for IRQ autoprobe
- *     @irq: interrupt number
- *     @dev_id: unused
- *     @regs: unused
+ * probe_intr  -       helper for IRQ autoprobe
+ * @irq: interrupt number
+ * @dev_id: unused
+ * @regs: unused
  *
- *     Set a flag to indicate the IRQ in question was received. This is
- *     used by the IRQ probe code.
+ * Set a flag to indicate the IRQ in question was received. This is
+ * used by the IRQ probe code.
  */
+
 static irqreturn_t __init probe_intr(int irq, void *dev_id)
 {
        probe_irq = irq;
@@ -553,24 +398,20 @@ static irqreturn_t __init probe_intr(int irq, void *dev_id)
 }
 
 /**
- *     NCR5380_probe_irq       -       find the IRQ of an NCR5380
- *     @instance: NCR5380 controller
- *     @possible: bitmask of ISA IRQ lines
- *
- *     Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
- *     and then looking to see what interrupt actually turned up.
+ * NCR5380_probe_irq   -       find the IRQ of an NCR5380
+ * @instance: NCR5380 controller
+ * @possible: bitmask of ISA IRQ lines
  *
- *     Locks: none, irqs must be enabled on entry
+ * Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
+ * and then looking to see what interrupt actually turned up.
  */
 
 static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
                                                int possible)
 {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned long timeout;
        int trying_irqs, i, mask;
-       NCR5380_setup(instance);
 
        for (trying_irqs = 0, i = 1, mask = 2; i < 16; ++i, mask <<= 1)
                if ((mask & possible) && (request_irq(i, &probe_intr, 0, "NCR-probe", NULL) == 0))
@@ -581,7 +422,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 
        /*
         * A interrupt is triggered whenever BSY = false, SEL = true
-        * and a bit set in the SELECT_ENABLE_REG is asserted on the 
+        * and a bit set in the SELECT_ENABLE_REG is asserted on the
         * SCSI bus.
         *
         * Note that the bus is only driven when the phase control signals
@@ -596,7 +437,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 
        while (probe_irq == NO_IRQ && time_before(jiffies, timeout))
                schedule_timeout_uninterruptible(1);
-       
+
        NCR5380_write(SELECT_ENABLE_REG, 0);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
@@ -608,12 +449,10 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 }
 
 /**
- *     NCR58380_info - report driver and host information
- *     @instance: relevant scsi host instance
- *
- *     For use as the host template info() handler.
+ * NCR58380_info - report driver and host information
+ * @instance: relevant scsi host instance
  *
- *     Locks: none
+ * For use as the host template info() handler.
  */
 
 static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -633,20 +472,14 @@ static void prepare_info(struct Scsi_Host *instance)
                 "can_queue %d, cmd_per_lun %d, "
                 "sg_tablesize %d, this_id %d, "
                 "flags { %s%s%s}, "
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
-                "USLEEP_POLL %lu, USLEEP_WAITLONG %lu, "
-#endif
                 "options { %s} ",
                 instance->hostt->name, instance->io_port, instance->n_io_port,
                 instance->base, instance->irq,
                 instance->can_queue, instance->cmd_per_lun,
                 instance->sg_tablesize, instance->this_id,
-                hostdata->flags & FLAG_NCR53C400     ? "NCR53C400 "     : "",
-                hostdata->flags & FLAG_DTC3181E      ? "DTC3181E "      : "",
+                hostdata->flags & FLAG_NO_DMA_FIXUP  ? "NO_DMA_FIXUP "  : "",
                 hostdata->flags & FLAG_NO_PSEUDO_DMA ? "NO_PSEUDO_DMA " : "",
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
-                USLEEP_POLL, USLEEP_WAITLONG,
-#endif
+                hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY "  : "",
 #ifdef AUTOPROBE_IRQ
                 "AUTOPROBE_IRQ "
 #endif
@@ -664,47 +497,11 @@ static void prepare_info(struct Scsi_Host *instance)
 #endif
 #ifdef PSEUDO_DMA
                 "PSEUDO_DMA "
-#endif
-#ifdef UNSAFE
-                "UNSAFE "
-#endif
-#ifdef NCR53C400
-                "NCR53C400 "
 #endif
                 "");
 }
 
-/**
- *     NCR5380_print_status    -       dump controller info
- *     @instance: controller to dump
- *
- *     Print commands in the various queues, called from NCR5380_abort 
- *     and NCR5380_debug to aid debugging.
- *
- *     Locks: called functions disable irqs
- */
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
-       NCR5380_dprint(NDEBUG_ANY, instance);
-       NCR5380_dprint_phase(NDEBUG_ANY, instance);
-}
-
 #ifdef PSEUDO_DMA
-/******************************************/
-/*
- * /proc/scsi/[dtc pas16 t128 generic]/[0-ASC_NUM_BOARD_SUPPORTED]
- *
- * *buffer: I/O buffer
- * **start: if inout == FALSE pointer into buffer where user read should start
- * offset: current offset
- * length: length of buffer
- * hostno: Scsi_Host host_no
- * inout: TRUE - user is writing; FALSE - user is reading
- *
- * Return the number of bytes read from or written
- */
-
 static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
        char *buffer, int length)
 {
@@ -714,104 +511,41 @@ static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
        hostdata->spin_max_w = 0;
        return 0;
 }
-#endif
-
-static
-void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m);
-static
-void lprint_command(unsigned char *cmd, struct seq_file *m);
-static
-void lprint_opcode(int opcode, struct seq_file *m);
 
 static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-       struct Scsi_Host *instance)
+                                            struct Scsi_Host *instance)
 {
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-
-       hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
-#ifdef PSEUDO_DMA
        seq_printf(m, "Highwater I/O busy spin counts: write %d, read %d\n",
                hostdata->spin_max_w, hostdata->spin_max_r);
-#endif
-       spin_lock_irq(instance->host_lock);
-       if (!hostdata->connected)
-               seq_printf(m, "scsi%d: no currently connected command\n", instance->host_no);
-       else
-               lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
-       seq_printf(m, "scsi%d: issue_queue\n", instance->host_no);
-       for (ptr = (struct scsi_cmnd *) hostdata->issue_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
-               lprint_Scsi_Cmnd(ptr, m);
-
-       seq_printf(m, "scsi%d: disconnected_queue\n", instance->host_no);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
-               lprint_Scsi_Cmnd(ptr, m);
-       spin_unlock_irq(instance->host_lock);
        return 0;
 }
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
-       seq_printf(m, "scsi%d : destination target %d, lun %llu\n", cmd->device->host->host_no, cmd->device->id, cmd->device->lun);
-       seq_puts(m, "        command = ");
-       lprint_command(cmd->cmnd, m);
-}
-
-static void lprint_command(unsigned char *command, struct seq_file *m)
-{
-       int i, s;
-       lprint_opcode(command[0], m);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               seq_printf(m, "%02x ", command[i]);
-       seq_putc(m, '\n');
-}
-
-static void lprint_opcode(int opcode, struct seq_file *m)
-{
-       seq_printf(m, "%2d (0x%02x)", opcode, opcode);
-}
-
+#endif
 
 /**
- *     NCR5380_init    -       initialise an NCR5380
- *     @instance: adapter to configure
- *     @flags: control flags
+ * NCR5380_init - initialise an NCR5380
+ * @instance: adapter to configure
+ * @flags: control flags
  *
- *     Initializes *instance and corresponding 5380 chip,
- *      with flags OR'd into the initial flags value.
+ * Initializes *instance and corresponding 5380 chip,
+ * with flags OR'd into the initial flags value.
  *
- *     Notes : I assume that the host, hostno, and id bits have been
- *      set correctly.  I don't care about the irq and other fields. 
+ * Notes : I assume that the host, hostno, and id bits have been
+ * set correctly. I don't care about the irq and other fields.
  *
- *     Returns 0 for success
- *
- *     Locks: interrupts must be enabled when we are called 
+ * Returns 0 for success
  */
 
 static int NCR5380_init(struct Scsi_Host *instance, int flags)
 {
-       NCR5380_local_declare();
-       int i, pass;
-       unsigned long timeout;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
-       if(in_interrupt())
-               printk(KERN_ERR "NCR5380_init called with interrupts off!\n");
-       /* 
-        * On NCR53C400 boards, NCR5380 registers are mapped 8 past 
-        * the base address.
-        */
-
-#ifdef NCR53C400
-       if (flags & FLAG_NCR53C400)
-               instance->NCR5380_instance_name += NCR53C400_address_adjust;
-#endif
-
-       NCR5380_setup(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int i;
+       unsigned long deadline;
 
-       hostdata->aborted = 0;
+       hostdata->host = instance;
        hostdata->id_mask = 1 << instance->this_id;
+       hostdata->id_higher_mask = 0;
        for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
                if (i > hostdata->id_mask)
                        hostdata->id_higher_mask |= i;
@@ -820,21 +554,21 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
 #ifdef REAL_DMA
        hostdata->dmalen = 0;
 #endif
-       hostdata->targets_present = 0;
+       spin_lock_init(&hostdata->lock);
        hostdata->connected = NULL;
-       hostdata->issue_queue = NULL;
-       hostdata->disconnected_queue = NULL;
-       
-       INIT_DELAYED_WORK(&hostdata->coroutine, NCR5380_main);
-       
-       /* The CHECK code seems to break the 53C400. Will check it later maybe */
-       if (flags & FLAG_NCR53C400)
-               hostdata->flags = FLAG_HAS_LAST_BYTE_SENT | flags;
-       else
-               hostdata->flags = FLAG_CHECK_LAST_BYTE_SENT | flags;
+       hostdata->sensing = NULL;
+       INIT_LIST_HEAD(&hostdata->autosense);
+       INIT_LIST_HEAD(&hostdata->unissued);
+       INIT_LIST_HEAD(&hostdata->disconnected);
 
-       hostdata->host = instance;
-       hostdata->time_expires = 0;
+       hostdata->flags = flags;
+
+       INIT_WORK(&hostdata->main_task, NCR5380_main);
+       hostdata->work_q = alloc_workqueue("ncr5380_%d",
+                               WQ_UNBOUND | WQ_MEM_RECLAIM,
+                               1, instance->host_no);
+       if (!hostdata->work_q)
+               return -ENOMEM;
 
        prepare_info(instance);
 
@@ -843,43 +577,69 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
-#ifdef NCR53C400
-       if (hostdata->flags & FLAG_NCR53C400) {
-               NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
-       }
-#endif
+       /* Calibrate register polling loop */
+       i = 0;
+       deadline = jiffies + 1;
+       do {
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       deadline += msecs_to_jiffies(256);
+       do {
+               NCR5380_read(STATUS_REG);
+               ++i;
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       hostdata->accesses_per_ms = i / 256;
 
-       /* 
-        * Detect and correct bus wedge problems.
-        *
-        * If the system crashed, it may have crashed in a state 
-        * where a SCSI command was still executing, and the 
-        * SCSI bus is not in a BUS FREE STATE.
-        *
-        * If this is the case, we'll try to abort the currently
-        * established nexus which we know nothing about, and that
-        * failing, do a hard reset of the SCSI bus 
-        */
+       return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int pass;
 
        for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
                switch (pass) {
                case 1:
                case 3:
                case 5:
-                       printk(KERN_INFO "scsi%d: SCSI bus busy, waiting up to five seconds\n", instance->host_no);
-                       timeout = jiffies + 5 * HZ;
-                       NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, 0, 5*HZ);
+                       shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+                       NCR5380_poll_politely(instance,
+                                             STATUS_REG, SR_BSY, 0, 5 * HZ);
                        break;
                case 2:
-                       printk(KERN_WARNING "scsi%d: bus busy, attempting abort\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
                        do_abort(instance);
                        break;
                case 4:
-                       printk(KERN_WARNING "scsi%d: bus busy, attempting reset\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
                        do_reset(instance);
+                       /* Wait after a reset; the SCSI standard calls for
+                        * 250ms, we wait 500ms to be on the safe side.
+                        * But some Toshiba CD-ROMs need ten times that.
+                        */
+                       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+                               msleep(2500);
+                       else
+                               msleep(500);
                        break;
                case 6:
-                       printk(KERN_ERR "scsi%d: bus locked solid or invalid override\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus locked solid\n");
                        return -ENXIO;
                }
        }
@@ -887,450 +647,513 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
 }
 
 /**
- *     NCR5380_exit    -       remove an NCR5380
- *     @instance: adapter to remove
+ * NCR5380_exit - remove an NCR5380
+ * @instance: adapter to remove
+ *
+ * Assumes that no more work can be queued (e.g. by NCR5380_intr).
  */
 
 static void NCR5380_exit(struct Scsi_Host *instance)
 {
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
-       cancel_delayed_work_sync(&hostdata->coroutine);
+       cancel_work_sync(&hostdata->main_task);
+       destroy_workqueue(hostdata->work_q);
 }
 
 /**
- *     NCR5380_queue_command           -       queue a command
- *     @cmd: SCSI command
- *     @done: completion handler
- *
- *      cmd is added to the per instance issue_queue, with minor 
- *      twiddling done to the host specific fields of cmd.  If the 
- *      main coroutine is not running, it is restarted.
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+                         struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+       dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+       if (hostdata->sensing == cmd) {
+               /* Autosense processing ends here */
+               if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+                       set_host_byte(cmd, DID_ERROR);
+               } else
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               hostdata->sensing = NULL;
+       }
+
+       hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+
+       cmd->scsi_done(cmd);
+}
+
+/**
+ * NCR5380_queue_command - queue a command
+ * @instance: the relevant SCSI adapter
+ * @cmd: SCSI command
  *
- *     Locks: host lock taken by caller
+ * cmd is added to the per-instance issue queue, with minor
+ * twiddling done to the host specific fields of cmd.  If the
+ * main coroutine is not running, it is restarted.
  */
 
-static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd, void (*done) (struct scsi_cmnd *))
+static int NCR5380_queue_command(struct Scsi_Host *instance,
+                                 struct scsi_cmnd *cmd)
 {
-       struct Scsi_Host *instance = cmd->device->host;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       struct scsi_cmnd *tmp;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+       unsigned long flags;
 
 #if (NDEBUG & NDEBUG_NO_WRITE)
        switch (cmd->cmnd[0]) {
        case WRITE_6:
        case WRITE_10:
-               printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n", instance->host_no);
+               shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
                cmd->result = (DID_ERROR << 16);
-               done(cmd);
+               cmd->scsi_done(cmd);
                return 0;
        }
-#endif                         /* (NDEBUG & NDEBUG_NO_WRITE) */
+#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
-       /* 
-        * We use the host_scribble field as a pointer to the next command  
-        * in a queue 
-        */
-
-       cmd->host_scribble = NULL;
-       cmd->scsi_done = done;
        cmd->result = 0;
 
-       /* 
-        * Insert the cmd into the issue queue. Note that REQUEST SENSE 
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+       /*
+        * Insert the cmd into the issue queue. Note that REQUEST SENSE
         * commands are added to the head of the queue since any command will
-        * clear the contingent allegiance condition that exists and the 
+        * clear the contingent allegiance condition that exists and the
         * sense data is only guaranteed to be valid while the condition exists.
         */
 
-       if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
-               LIST(cmd, hostdata->issue_queue);
-               cmd->host_scribble = (unsigned char *) hostdata->issue_queue;
-               hostdata->issue_queue = cmd;
-       } else {
-               for (tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp->host_scribble; tmp = (struct scsi_cmnd *) tmp->host_scribble);
-               LIST(cmd, tmp);
-               tmp->host_scribble = (unsigned char *) cmd;
-       }
-       dprintk(NDEBUG_QUEUES, "scsi%d : command added to %s of queue\n", instance->host_no, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+       if (cmd->cmnd[0] == REQUEST_SENSE)
+               list_add(&ncmd->list, &hostdata->unissued);
+       else
+               list_add_tail(&ncmd->list, &hostdata->unissued);
+
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
+       dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+                cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
 
-       /* Run the coroutine if it isn't already running. */
        /* Kick off command processing */
-       schedule_delayed_work(&hostdata->coroutine, 0);
+       queue_work(hostdata->work_q, &hostdata->main_task);
        return 0;
 }
 
-static DEF_SCSI_QCMD(NCR5380_queue_command)
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *cmd;
+
+       if (list_empty(&hostdata->autosense)) {
+               list_for_each_entry(ncmd, &hostdata->unissued, list) {
+                       cmd = NCR5380_to_scmd(ncmd);
+                       dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+                                cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+                       if (!(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))) {
+                               list_del(&ncmd->list);
+                               dsprintk(NDEBUG_QUEUES, instance,
+                                        "dequeue: removed %p from issue queue\n", cmd);
+                               return cmd;
+                       }
+               }
+       } else {
+               /* Autosense processing begins here */
+               ncmd = list_first_entry(&hostdata->autosense,
+                                       struct NCR5380_cmd, list);
+               list_del(&ncmd->list);
+               cmd = NCR5380_to_scmd(ncmd);
+               dsprintk(NDEBUG_QUEUES, instance,
+                        "dequeue: removed %p from autosense queue\n", cmd);
+               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+               hostdata->sensing = cmd;
+               return cmd;
+       }
+       return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+       if (hostdata->sensing) {
+               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               list_add(&ncmd->list, &hostdata->autosense);
+               hostdata->sensing = NULL;
+       } else
+               list_add(&ncmd->list, &hostdata->unissued);
+}
 
 /**
- *     NCR5380_main    -       NCR state machines
- *
- *     NCR5380_main is a coroutine that runs as long as more work can 
- *      be done on the NCR5380 host adapters in a system.  Both 
- *      NCR5380_queue_command() and NCR5380_intr() will try to start it 
- *      in case it is not running.
- * 
- *     Locks: called as its own thread with no locks held. Takes the
- *     host lock and called routines may take the isa dma lock.
+ * NCR5380_main - NCR state machines
+ *
+ * NCR5380_main is a coroutine that runs as long as more work can
+ * be done on the NCR5380 host adapters in a system.  Both
+ * NCR5380_queue_command() and NCR5380_intr() will try to start it
+ * in case it is not running.
  */
 
 static void NCR5380_main(struct work_struct *work)
 {
        struct NCR5380_hostdata *hostdata =
-               container_of(work, struct NCR5380_hostdata, coroutine.work);
+               container_of(work, struct NCR5380_hostdata, main_task);
        struct Scsi_Host *instance = hostdata->host;
-       struct scsi_cmnd *tmp, *prev;
+       struct scsi_cmnd *cmd;
        int done;
-       
-       spin_lock_irq(instance->host_lock);
+
        do {
-               /* Lock held here */
                done = 1;
-               if (!hostdata->connected && !hostdata->selecting) {
-                       dprintk(NDEBUG_MAIN, "scsi%d : not connected\n", instance->host_no);
-                       /*
-                        * Search through the issue_queue for a command destined
-                        * for a target that's not busy.
-                        */
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                       {
-                               if (prev != tmp)
-                                   dprintk(NDEBUG_LISTS, "MAIN tmp=%p   target=%d   busy=%d lun=%llu\n", tmp, tmp->device->id, hostdata->busy[tmp->device->id], tmp->device->lun);
-                               /*  When we find one, remove it from the issue queue. */
-                               if (!(hostdata->busy[tmp->device->id] &
-                                     (1 << (u8)(tmp->device->lun & 0xff)))) {
-                                       if (prev) {
-                                               REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
-                                               prev->host_scribble = tmp->host_scribble;
-                                       } else {
-                                               REMOVE(-1, hostdata->issue_queue, tmp, tmp->host_scribble);
-                                               hostdata->issue_queue = (struct scsi_cmnd *) tmp->host_scribble;
-                                       }
-                                       tmp->host_scribble = NULL;
 
-                                       /* 
-                                        * Attempt to establish an I_T_L nexus here. 
-                                        * On success, instance->hostdata->connected is set.
-                                        * On failure, we must add the command back to the
-                                        *   issue queue so we can keep trying. 
-                                        */
-                                       dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main() : command for target %d lun %llu removed from issue_queue\n", instance->host_no, tmp->device->id, tmp->device->lun);
-       
-                                       /*
-                                        * A successful selection is defined as one that 
-                                        * leaves us with the command connected and 
-                                        * in hostdata->connected, OR has terminated the
-                                        * command.
-                                        *
-                                        * With successful commands, we fall through
-                                        * and see if we can do an information transfer,
-                                        * with failures we will restart.
-                                        */
-                                       hostdata->selecting = NULL;
-                                       /* RvC: have to preset this to indicate a new command is being performed */
+               spin_lock_irq(&hostdata->lock);
+               while (!hostdata->connected &&
+                      (cmd = dequeue_next_cmd(instance))) {
 
-                                       /*
-                                        * REQUEST SENSE commands are issued without tagged
-                                        * queueing, even on SCSI-II devices because the
-                                        * contingent allegiance condition exists for the
-                                        * entire unit.
-                                        */
+                       dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
 
-                                       if (!NCR5380_select(instance, tmp)) {
-                                               break;
-                                       } else {
-                                               LIST(tmp, hostdata->issue_queue);
-                                               tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
-                                               hostdata->issue_queue = tmp;
-                                               done = 0;
-                                               dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no);
-                                       }
-                                       /* lock held here still */
-                               }       /* if target/lun is not busy */
-                       }       /* for */
-                       /* exited locked */
-               }       /* if (!hostdata->connected) */
-               if (hostdata->selecting) {
-                       tmp = (struct scsi_cmnd *) hostdata->selecting;
-                       /* Selection will drop and retake the lock */
-                       if (!NCR5380_select(instance, tmp)) {
-                               /* Ok ?? */
+                       /*
+                        * Attempt to establish an I_T_L nexus here.
+                        * On success, instance->hostdata->connected is set.
+                        * On failure, we must add the command back to the
+                        * issue queue so we can keep trying.
+                        */
+                       /*
+                        * REQUEST SENSE commands are issued without tagged
+                        * queueing, even on SCSI-II devices because the
+                        * contingent allegiance condition exists for the
+                        * entire unit.
+                        */
+
+                       cmd = NCR5380_select(instance, cmd);
+                       if (!cmd) {
+                               dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
                        } else {
-                               /* RvC: device failed, so we wait a long time
-                                  this is needed for Mustek scanners, that
-                                  do not respond to commands immediately
-                                  after a scan */
-                               printk(KERN_DEBUG "scsi%d: device %d did not respond in time\n", instance->host_no, tmp->device->id);
-                               LIST(tmp, hostdata->issue_queue);
-                               tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
-                               hostdata->issue_queue = tmp;
-                               NCR5380_set_timer(hostdata, USLEEP_WAITLONG);
+                               dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+                                        "main: select failed, returning %p to queue\n", cmd);
+                               requeue_cmd(instance, cmd);
                        }
-               }       /* if hostdata->selecting */
+               }
                if (hostdata->connected
 #ifdef REAL_DMA
                    && !hostdata->dmalen
 #endif
-                   && (!hostdata->time_expires || time_before_eq(hostdata->time_expires, jiffies))
                    ) {
-                       dprintk(NDEBUG_MAIN, "scsi%d : main() : performing information transfer\n", instance->host_no);
+                       dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
                        NCR5380_information_transfer(instance);
-                       dprintk(NDEBUG_MAIN, "scsi%d : main() : done set false\n", instance->host_no);
                        done = 0;
-               } else
-                       break;
+               }
+               spin_unlock_irq(&hostdata->lock);
+               if (!done)
+                       cond_resched();
        } while (!done);
-       
-       spin_unlock_irq(instance->host_lock);
 }
 
 #ifndef DONT_USE_INTR
 
 /**
- *     NCR5380_intr    -       generic NCR5380 irq handler
- *     @irq: interrupt number
- *     @dev_id: device info
- *
- *     Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
- *      from the disconnected queue, and restarting NCR5380_main() 
- *      as required.
- *
- *     Locks: takes the needed instance locks
+ * NCR5380_intr - generic NCR5380 irq handler
+ * @irq: interrupt number
+ * @dev_id: device info
+ *
+ * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
+ * from the disconnected queue, and restarting NCR5380_main()
+ * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
  */
 
-static irqreturn_t NCR5380_intr(int dummy, void *dev_id)
+static irqreturn_t NCR5380_intr(int irq, void *dev_id)
 {
-       NCR5380_local_declare();
        struct Scsi_Host *instance = dev_id;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       int done;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int handled = 0;
        unsigned char basr;
        unsigned long flags;
 
-       dprintk(NDEBUG_INTR, "scsi : NCR5380 irq %d triggered\n",
-               instance->irq);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+       basr = NCR5380_read(BUS_AND_STATUS_REG);
+       if (basr & BASR_IRQ) {
+               unsigned char mr = NCR5380_read(MODE_REG);
+               unsigned char sr = NCR5380_read(STATUS_REG);
+
+               dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+                        irq, basr, sr, mr);
 
-       do {
-               done = 1;
-               spin_lock_irqsave(instance->host_lock, flags);
-               /* Look for pending interrupts */
-               NCR5380_setup(instance);
-               basr = NCR5380_read(BUS_AND_STATUS_REG);
-               /* XXX dispatch to appropriate routine if found and done=0 */
-               if (basr & BASR_IRQ) {
-                       NCR5380_dprint(NDEBUG_INTR, instance);
-                       if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
-                               done = 0;
-                               dprintk(NDEBUG_INTR, "scsi%d : SEL interrupt\n", instance->host_no);
-                               NCR5380_reselect(instance);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else if (basr & BASR_PARITY_ERROR) {
-                               dprintk(NDEBUG_INTR, "scsi%d : PARITY interrupt\n", instance->host_no);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-                               dprintk(NDEBUG_INTR, "scsi%d : RESET interrupt\n", instance->host_no);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else {
 #if defined(REAL_DMA)
-                               /*
-                                * We should only get PHASE MISMATCH and EOP interrupts
-                                * if we have DMA enabled, so do a sanity check based on
-                                * the current setting of the MODE register.
-                                */
+               if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+                       /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+                        * We ack IRQ after clearing Mode Register. Workarounds
+                        * for End of DMA errata need to happen in DMA Mode.
+                        */
 
-                               if ((NCR5380_read(MODE_REG) & MR_DMA) && ((basr & BASR_END_DMA_TRANSFER) || !(basr & BASR_PHASE_MATCH))) {
-                                       int transferred;
+                       dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
 
-                                       if (!hostdata->connected)
-                                               panic("scsi%d : received end of DMA interrupt with no connected cmd\n", instance->hostno);
+                       int transferred;
 
-                                       transferred = (hostdata->dmalen - NCR5380_dma_residual(instance));
-                                       hostdata->connected->SCp.this_residual -= transferred;
-                                       hostdata->connected->SCp.ptr += transferred;
-                                       hostdata->dmalen = 0;
+                       if (!hostdata->connected)
+                               panic("scsi%d : DMA interrupt with no connected cmd\n",
+                                     instance->hostno);
 
-                                       (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                                                       
-                                       /* FIXME: we need to poll briefly then defer a workqueue task ! */
-                                       NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2*HZ);
+                       transferred = hostdata->dmalen - NCR5380_dma_residual(instance);
+                       hostdata->connected->SCp.this_residual -= transferred;
+                       hostdata->connected->SCp.ptr += transferred;
+                       hostdata->dmalen = 0;
 
-                                       NCR5380_write(MODE_REG, MR_BASE);
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                               }
-#else
-                               dprintk(NDEBUG_INTR, "scsi : unknown interrupt, BASR 0x%X, MR 0x%X, SR 0x%x\n", basr, NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-#endif
+                       /* FIXME: we need to poll briefly then defer a workqueue task ! */
+                       NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2 * HZ);
+
+                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+                       NCR5380_write(MODE_REG, MR_BASE);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+               } else
+#endif /* REAL_DMA */
+               if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+                   (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+                       /* Probably reselected */
+                       NCR5380_write(SELECT_ENABLE_REG, 0);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+                       if (!hostdata->connected) {
+                               NCR5380_reselect(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
                        }
-               }       /* if BASR_IRQ */
-               spin_unlock_irqrestore(instance->host_lock, flags);
-               if(!done)
-                       schedule_delayed_work(&hostdata->coroutine, 0);
-       } while (!done);
-       return IRQ_HANDLED;
+                       if (!hostdata->connected)
+                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               } else {
+                       /* Probably Bus Reset */
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
+               }
+               handled = 1;
+       } else {
+               shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
+       }
+
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
+       return IRQ_RETVAL(handled);
 }
 
-#endif 
+#endif
 
-/* 
+/*
  * Function : int NCR5380_select(struct Scsi_Host *instance,
- *                               struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
  *
  * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- *      including ARBITRATION, SELECTION, and initial message out for 
- *      IDENTIFY and queue messages. 
- *
- * Inputs : instance - instantiation of the 5380 driver on which this 
- *      target lives, cmd - SCSI command to execute.
- * 
- * Returns : -1 if selection could not execute for some reason,
- *      0 if selection succeeded or failed because the target 
- *      did not respond.
- *
- * Side effects : 
- *      If bus busy, arbitration failed, etc, NCR5380_select() will exit 
- *              with registers as they should have been on entry - ie
- *              SELECT_ENABLE will be set appropriately, the NCR5380
- *              will cease to drive any SCSI bus signals.
- *
- *      If successful : I_T_L or I_T_L_Q nexus will be established, 
- *              instance->connected will be set to cmd.  
- *              SELECT interrupt will be disabled.
- *
- *      If failed (no target) : cmd->scsi_done() will be called, and the 
- *              cmd->result host byte set to DID_BAD_TARGET.
- *
- *     Locks: caller holds hostdata lock in IRQ mode
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
+ *
+ * Inputs : instance - instantiation of the 5380 driver on which this
+ * target lives, cmd - SCSI command to execute.
+ *
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
+ *
+ * Side effects :
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
+ *
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
+ *
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
  */
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+                                        struct scsi_cmnd *cmd)
 {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char tmp[3], phase;
        unsigned char *data;
        int len;
-       unsigned long timeout;
-       unsigned char value;
        int err;
-       NCR5380_setup(instance);
-
-       if (hostdata->selecting)
-               goto part2;
-
-       hostdata->restart_select = 0;
 
        NCR5380_dprint(NDEBUG_ARBITRATION, instance);
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id);
+       dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+                instance->this_id);
+
+       /*
+        * Arbitration and selection phases are slow and involve dropping the
+        * lock, so we have to watch out for EH. An exception handler may
+        * change 'selecting' to NULL. This function will then return NULL
+        * so that the caller will forget about 'cmd'. (During information
+        * transfer phases, EH may change 'connected' to NULL.)
+        */
+       hostdata->selecting = cmd;
 
-       /* 
-        * Set the phase bits to 0, otherwise the NCR5380 won't drive the 
+       /*
+        * Set the phase bits to 0, otherwise the NCR5380 won't drive the
         * data bus during SELECTION.
         */
 
        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-       /* 
+       /*
         * Start arbitration.
         */
 
        NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
        NCR5380_write(MODE_REG, MR_ARBITRATE);
 
+       /* The chip now waits for BUS FREE phase. Then after the 800 ns
+        * Bus Free Delay, arbitration will begin.
+        */
 
-       /* We can be relaxed here, interrupts are on, we are
-          in workqueue context, the birds are singing in the trees */
-       spin_unlock_irq(instance->host_lock);
-       err = NCR5380_poll_politely(instance, INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS, ICR_ARBITRATION_PROGRESS, 5*HZ);
-       spin_lock_irq(instance->host_lock);
+       spin_unlock_irq(&hostdata->lock);
+       err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+                       INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+                                              ICR_ARBITRATION_PROGRESS, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+               /* Reselection interrupt */
+               goto out;
+       }
        if (err < 0) {
-               printk(KERN_DEBUG "scsi: arbitration timeout at %d\n", __LINE__);
                NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               goto failed;
+               shost_printk(KERN_ERR, instance,
+                            "select: arbitration timeout\n");
+               goto out;
        }
+       spin_unlock_irq(&hostdata->lock);
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : arbitration complete\n", instance->host_no);
-
-       /* 
-        * The arbitration delay is 2.2us, but this is a minimum and there is 
-        * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
-        * the integral nature of udelay().
-        *
-        */
-
+       /* The SCSI-2 arbitration delay is 2.4 us */
        udelay(3);
 
        /* Check for lost arbitration */
-       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) || (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) || (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
-               NCR5380_write(MODE_REG, MR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no);
-               goto failed;
-       }
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_SEL);
-
-       if (!(hostdata->flags & FLAG_DTC3181E) &&
-           /* RvC: DTC3181E has some trouble with this
-            *      so we simply removed it. Seems to work with
-            *      only Mustek scanner attached
-            */
+       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
+           (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
            (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
                NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting ICR_ASSERT_SEL\n", instance->host_no);
-               goto failed;
+               dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+               spin_lock_irq(&hostdata->lock);
+               goto out;
        }
-       /* 
-        * Again, bus clear + bus settle time is 1.2us, however, this is 
+
+       /* After/during arbitration, BSY should be asserted.
+        * IBM DPES-31080 Version S31Q works now
+        * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+        */
+       NCR5380_write(INITIATOR_COMMAND_REG,
+                     ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
+
+       /*
+        * Again, bus clear + bus settle time is 1.2us, however, this is
         * a minimum so we'll udelay ceil(1.2)
         */
 
-       udelay(2);
+       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+               udelay(15);
+       else
+               udelay(2);
+
+       spin_lock_irq(&hostdata->lock);
+
+       /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+               goto out;
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : won arbitration\n", instance->host_no);
+       if (!hostdata->selecting) {
+               NCR5380_write(MODE_REG, MR_BASE);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               goto out;
+       }
 
-       /* 
-        * Now that we have won arbitration, start Selection process, asserting 
+       dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
+
+       /*
+        * Now that we have won arbitration, start Selection process, asserting
         * the host and target ID's on the SCSI bus.
         */
 
-       NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << scmd_id(cmd))));
+       NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
 
-       /* 
+       /*
         * Raise ATN while SEL is true before BSY goes false from arbitration,
         * since this is the only way to guarantee that we'll get a MESSAGE OUT
         * phase immediately after selection.
         */
 
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
        NCR5380_write(MODE_REG, MR_BASE);
 
-       /* 
+       /*
         * Reselect interrupts must be turned off prior to the dropping of BSY,
         * otherwise we will trigger an interrupt.
         */
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       spin_unlock_irq(&hostdata->lock);
+
        /*
-        * The initiator shall then wait at least two deskew delays and release 
+        * The initiator shall then wait at least two deskew delays and release
         * the BSY signal.
         */
-       udelay(1);              /* wingel -- wait two bus deskew delay >2*45ns */
+       udelay(1);        /* wingel -- wait two bus deskew delay >2*45ns */
 
        /* Reset BSY */
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+                     ICR_ASSERT_ATN | ICR_ASSERT_SEL);
 
-       /* 
+       /*
         * Something weird happens when we cease to drive BSY - looks
-        * like the board/chip is letting us do another read before the 
+        * like the board/chip is letting us do another read before the
         * appropriate propagation delay has expired, and we're confusing
         * a BSY signal from ourselves as the target's response to SELECTION.
         *
         * A small delay (the 'C++' frontend breaks the pipeline with an
         * unnecessary jump, making it work on my 386-33/Trantor T128, the
-        * tighter 'C' code breaks and requires this) solves the problem - 
-        * the 1 us delay is arbitrary, and only used because this delay will 
-        * be the same on other platforms and since it works here, it should 
+        * tighter 'C' code breaks and requires this) solves the problem -
+        * the 1 us delay is arbitrary, and only used because this delay will
+        * be the same on other platforms and since it works here, it should
         * work there.
         *
         * wingel suggests that this could be due to failing to wait
@@ -1339,50 +1162,43 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        udelay(1);
 
-       dprintk(NDEBUG_SELECTION, "scsi%d : selecting target %d\n", instance->host_no, scmd_id(cmd));
+       dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
 
-       /* 
-        * The SCSI specification calls for a 250 ms timeout for the actual 
+       /*
+        * The SCSI specification calls for a 250 ms timeout for the actual
         * selection.
         */
 
-       timeout = jiffies + msecs_to_jiffies(250);
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+                                   msecs_to_jiffies(250));
 
-       /* 
-        * XXX very interesting - we're seeing a bounce where the BSY we 
-        * asserted is being reflected / still asserted (propagation delay?)
-        * and it's detecting as true.  Sigh.
-        */
-
-       hostdata->select_time = 0;      /* we count the clock ticks at which we polled */
-       hostdata->selecting = cmd;
-
-part2:
-       /* RvC: here we enter after a sleeping period, or immediately after
-          execution of part 1
-          we poll only once ech clock tick */
-       value = NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO);
-
-       if (!value && (hostdata->select_time < HZ/4)) {
-               /* RvC: we still must wait for a device response */
-               hostdata->select_time++;        /* after 25 ticks the device has failed */
-               NCR5380_set_timer(hostdata, 1);
-               return 0;       /* RvC: we return here with hostdata->selecting set,
-                                  to go to sleep */
-       }
-
-       hostdata->selecting = NULL;/* clear this pointer, because we passed the
-                                          waiting period */
        if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+               spin_lock_irq(&hostdata->lock);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_reselect(instance);
-               printk("scsi%d : reselection after won arbitration?\n", instance->host_no);
+               if (!hostdata->connected)
+                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+               goto out;
+       }
+
+       if (err < 0) {
+               spin_lock_irq(&hostdata->lock);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return -1;
+               /* Can't touch cmd if it has been reclaimed by the scsi ML */
+               if (hostdata->selecting) {
+                       cmd->result = DID_BAD_TARGET << 16;
+                       complete_cmd(instance, cmd);
+                       dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+                       cmd = NULL;
+               }
+               goto out;
        }
-       /* 
-        * No less than two deskew delays after the initiator detects the 
-        * BSY signal is true, it shall release the SEL signal and may 
+
+       /*
+        * No less than two deskew delays after the initiator detects the
+        * BSY signal is true, it shall release the SEL signal and may
         * change the DATA BUS.                                     -wingel
         */
 
@@ -1390,53 +1206,38 @@ part2:
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
-       if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               if (hostdata->targets_present & (1 << scmd_id(cmd))) {
-                       printk(KERN_DEBUG "scsi%d : weirdness\n", instance->host_no);
-                       if (hostdata->restart_select)
-                               printk(KERN_DEBUG "\trestart select\n");
-                       NCR5380_dprint(NDEBUG_SELECTION, instance);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
-               cmd->result = DID_BAD_TARGET << 16;
-               cmd->scsi_done(cmd);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", instance->host_no);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return 0;
-       }
-       hostdata->targets_present |= (1 << scmd_id(cmd));
-
        /*
-        * Since we followed the SCSI spec, and raised ATN while SEL 
+        * Since we followed the SCSI spec, and raised ATN while SEL
         * was true but before BSY was false during selection, the information
         * transfer phase should be a MESSAGE OUT phase so that we can send the
         * IDENTIFY message.
-        * 
+        *
         * If SCSI-II tagged queuing is enabled, we also send a SIMPLE_QUEUE_TAG
         * message (2 bytes) with a tag ID that we increment with every command
         * until it wraps back to 0.
         *
         * XXX - it turns out that there are some broken SCSI-II devices,
-        *       which claim to support tagged queuing but fail when more than
-        *       some number of commands are issued at once.
+        * which claim to support tagged queuing but fail when more than
+        * some number of commands are issued at once.
         */
 
        /* Wait for start of REQ/ACK handshake */
 
-       spin_unlock_irq(instance->host_lock);
        err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
-       spin_lock_irq(instance->host_lock);
-       
-       if(err) {
-               printk(KERN_ERR "scsi%d: timeout at NCR5380.c:%d\n", instance->host_no, __LINE__);
+       spin_lock_irq(&hostdata->lock);
+       if (err < 0) {
+               shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               goto failed;
+               goto out;
+       }
+       if (!hostdata->selecting) {
+               do_abort(instance);
+               goto out;
        }
 
-       dprintk(NDEBUG_SELECTION, "scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id);
+       dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+                scmd_id(cmd));
        tmp[0] = IDENTIFY(((instance->irq == NO_IRQ) ? 0 : 1), cmd->device->lun);
 
        len = 1;
@@ -1446,104 +1247,82 @@ part2:
        data = tmp;
        phase = PHASE_MSGOUT;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
-       dprintk(NDEBUG_SELECTION, "scsi%d : nexus established.\n", instance->host_no);
+       dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
        /* XXX need to handle errors here */
+
        hostdata->connected = cmd;
-       hostdata->busy[cmd->device->id] |= (1 << (cmd->device->lun & 0xFF));
+       hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 
        initialize_SCp(cmd);
 
-       return 0;
-
-       /* Selection failed */
-failed:
-       return -1;
+       cmd = NULL;
 
+out:
+       if (!hostdata->selecting)
+               return NULL;
+       hostdata->selecting = NULL;
+       return cmd;
 }
 
-/* 
- * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance, 
- *      unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using polled I/O
  *
- * Inputs : instance - instance of driver, *phase - pointer to 
- *      what phase is expected, *count - pointer to number of 
- *      bytes to transfer, **data - pointer to data pointer.
- * 
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
+ *
  * Returns : -1 when different phase is entered without transferring
- *      maximum number of bytes, 0 if all bytes or transferred or exit
- *      is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
  *
- *      Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
  *
  * XXX Note : handling for bus free may be useful.
  */
 
 /*
- * Note : this code is not as quick as it could be, however it 
+ * Note : this code is not as quick as it could be, however it
  * IS 100% reliable, and for the actual data transfer where speed
  * counts, we will always do a pseudo DMA or DMA transfer.
  */
 
-static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
-       NCR5380_local_declare();
+static int NCR5380_transfer_pio(struct Scsi_Host *instance,
+                               unsigned char *phase, int *count,
+                               unsigned char **data)
+{
        unsigned char p = *phase, tmp;
        int c = *count;
        unsigned char *d = *data;
-       /*
-        *      RvC: some administrative data to process polling time
-        */
-       int break_allowed = 0;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       NCR5380_setup(instance);
-
-       if (!(p & SR_IO))
-               dprintk(NDEBUG_PIO, "scsi%d : pio write %d bytes\n", instance->host_no, c);
-       else
-               dprintk(NDEBUG_PIO, "scsi%d : pio read %d bytes\n", instance->host_no, c);
 
-       /* 
-        * The NCR5380 chip will only drive the SCSI bus when the 
+       /*
+        * The NCR5380 chip will only drive the SCSI bus when the
         * phase specified in the appropriate bits of the TARGET COMMAND
         * REGISTER match the STATUS REGISTER
         */
 
-        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-       /* RvC: don't know if this is necessary, but other SCSI I/O is short
-        *      so breaks are not necessary there
-        */
-       if ((p == PHASE_DATAIN) || (p == PHASE_DATAOUT)) {
-               break_allowed = 1;
-       }
-       do {
-               /* 
-                * Wait for assertion of REQ, after which the phase bits will be 
-                * valid 
-                */
+       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
 
-               /* RvC: we simply poll once, after that we stop temporarily
-                *      and let the device buffer fill up
-                *      if breaking is not allowed, we keep polling as long as needed
+       do {
+               /*
+                * Wait for assertion of REQ, after which the phase bits will be
+                * valid
                 */
 
-               /* FIXME */
-               while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ) && !break_allowed);
-               if (!(tmp & SR_REQ)) {
-                       /* timeout condition */
-                       NCR5380_set_timer(hostdata, USLEEP_SLEEP);
+               if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
                        break;
-               }
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d : REQ detected\n", instance->host_no);
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
 
                /* Check for phase mismatch */
-               if ((tmp & PHASE_MASK) != p) {
-                       dprintk(NDEBUG_HANDSHAKE, "scsi%d : phase mismatch\n", instance->host_no);
-                       NCR5380_dprint_phase(NDEBUG_HANDSHAKE, instance);
+               if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+                       dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
+                       NCR5380_dprint_phase(NDEBUG_PIO, instance);
                        break;
                }
+
                /* Do actual transfer from SCSI bus to / from memory */
                if (!(p & SR_IO))
                        NCR5380_write(OUTPUT_DATA_REG, *d);
@@ -1552,7 +1331,7 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
 
                ++d;
 
-               /* 
+               /*
                 * The SCSI standard suggests that in MSGOUT phase, the initiator
                 * should drop ATN on the last byte of the message phase
                 * after REQ has been asserted for the handshake but before
@@ -1563,29 +1342,34 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
                        if (!((p & SR_MSG) && c > 1)) {
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
                                NCR5380_dprint(NDEBUG_PIO, instance);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
                        } else {
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
                                NCR5380_dprint(NDEBUG_PIO, instance);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
                        }
                } else {
                        NCR5380_dprint(NDEBUG_PIO, instance);
                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
                }
 
-               /* FIXME - if this fails bus reset ?? */
-               NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 5*HZ);
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d : req false, handshake complete\n", instance->host_no);
+               if (NCR5380_poll_politely(instance,
+                                         STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+                       break;
+
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
 
 /*
- * We have several special cases to consider during REQ/ACK handshaking : 
- * 1.  We were in MSGOUT phase, and we are on the last byte of the 
- *      message.  ATN must be dropped as ACK is dropped.
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1.  We were in MSGOUT phase, and we are on the last byte of the
+ * message.  ATN must be dropped as ACK is dropped.
  *
- * 2.  We are in a MSGIN phase, and we are on the last byte of the  
- *      message.  We must exit with ACK asserted, so that the calling
- *      code may raise ATN before dropping ACK to reject the message.
+ * 2.  We are in a MSGIN phase, and we are on the last byte of the
+ * message.  We must exit with ACK asserted, so that the calling
+ * code may raise ATN before dropping ACK to reject the message.
  *
  * 3.  ACK and ATN are clear and the target may proceed as normal.
  */
@@ -1597,12 +1381,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
                }
        } while (--c);
 
-       dprintk(NDEBUG_PIO, "scsi%d : residual %d\n", instance->host_no, c);
+       dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
 
        *count = c;
        *data = d;
        tmp = NCR5380_read(STATUS_REG);
-       if (tmp & SR_REQ)
+       /* The phase read from the bus is valid if either REQ is (already)
+        * asserted or if ACK hasn't been released yet. The latter applies if
+        * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
+        */
+       if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
                *phase = tmp & PHASE_MASK;
        else
                *phase = PHASE_UNKNOWN;
@@ -1614,79 +1402,80 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
 }
 
 /**
- *     do_reset        -       issue a reset command
- *     @host: adapter to reset
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
  *
- *     Issue a reset sequence to the NCR5380 and try and get the bus
- *     back into sane shape.
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
  *
- *     Locks: caller holds queue lock
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
  */
-static void do_reset(struct Scsi_Host *host) {
-       NCR5380_local_declare();
-       NCR5380_setup(host);
 
-       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+static void do_reset(struct Scsi_Host *instance)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       NCR5380_write(TARGET_COMMAND_REG,
+                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       udelay(25);
+       udelay(50);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       local_irq_restore(flags);
 }
 
-/*
- * Function : do_abort (Scsi_Host *host)
- * 
- * Purpose : abort the currently established nexus.  Should only be 
- *      called from a routine which can drop into a 
- * 
- * Returns : 0 on success, -1 on failure.
- *
- * Locks: queue lock held by caller
- *     FIXME: sort this out and get new_eh running
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
+ *
+ * Returns 0 on success, -1 on failure.
  */
 
-static int do_abort(struct Scsi_Host *host) {
-       NCR5380_local_declare();
+static int do_abort(struct Scsi_Host *instance)
+{
        unsigned char *msgptr, phase, tmp;
        int len;
        int rc;
-       NCR5380_setup(host);
-
 
        /* Request message out phase */
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
-       /* 
-        * Wait for the target to indicate a valid phase by asserting 
-        * REQ.  Once this happens, we'll have either a MSGOUT phase 
-        * and can immediately send the ABORT message, or we'll have some 
+       /*
+        * Wait for the target to indicate a valid phase by asserting
+        * REQ.  Once this happens, we'll have either a MSGOUT phase
+        * and can immediately send the ABORT message, or we'll have some
         * other phase and will have to source/sink data.
-        * 
+        *
         * We really don't care what value was on the bus or what value
         * the target sees, so we just handshake.
         */
 
-       rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, SR_REQ, 60 * HZ);
-       
-       if(rc < 0)
-               return -1;
+       rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+       if (rc < 0)
+               goto timeout;
+
+       tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
-       tmp = (unsigned char)rc;
-       
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-       if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-               rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, 0, 3*HZ);
+       if (tmp != PHASE_MSGOUT) {
+               NCR5380_write(INITIATOR_COMMAND_REG,
+                             ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+               rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+               if (rc < 0)
+                       goto timeout;
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
-               if(rc == -1)
-                       return -1;
        }
+
        tmp = ABORT;
        msgptr = &tmp;
        len = 1;
        phase = PHASE_MSGOUT;
-       NCR5380_transfer_pio(host, &phase, &len, &msgptr);
+       NCR5380_transfer_pio(instance, &phase, &len, &msgptr);
 
        /*
         * If we got here, and the command completed successfully,
@@ -1694,32 +1483,37 @@ static int do_abort(struct Scsi_Host *host) {
         */
 
        return len ? -1 : 0;
+
+timeout:
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       return -1;
 }
 
 #if defined(REAL_DMA) || defined(PSEUDO_DMA) || defined (REAL_DMA_POLL)
-/* 
- * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance, 
- *      unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using either real
- *      or pseudo DMA.
+ * or pseudo DMA.
  *
- * Inputs : instance - instance of driver, *phase - pointer to 
- *      what phase is expected, *count - pointer to number of 
- *      bytes to transfer, **data - pointer to data pointer.
- * 
- * Returns : -1 when different phase is entered without transferring
- *      maximum number of bytes, 0 if all bytes or transferred or exit
- *      is in same phase.
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
- *      Also, *phase, *count, *data are modified in place.
+ * Returns : -1 when different phase is entered without transferring
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
  *
- *     Locks: io_request lock held by caller
+ * Also, *phase, *count, *data are modified in place.
  */
 
 
-static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
-       NCR5380_local_declare();
+static int NCR5380_transfer_dma(struct Scsi_Host *instance,
+                               unsigned char *phase, int *count,
+                               unsigned char **data)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        register int c = *count;
        register unsigned char p = *phase;
        register unsigned char *d = *data;
@@ -1730,54 +1524,47 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        unsigned char saved_data = 0, overrun = 0, residue;
 #endif
 
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
-       NCR5380_setup(instance);
-
        if ((tmp = (NCR5380_read(STATUS_REG) & PHASE_MASK)) != p) {
                *phase = tmp;
                return -1;
        }
 #if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-#ifdef READ_OVERRUNS
        if (p & SR_IO) {
-               c -= 2;
+               if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS))
+                       c -= 2;
        }
-#endif
-       dprintk(NDEBUG_DMA, "scsi%d : initializing DMA channel %d for %s, %d bytes %s %0x\n", instance->host_no, instance->dma_channel, (p & SR_IO) ? "reading" : "writing", c, (p & SR_IO) ? "to" : "from", (unsigned) d);
        hostdata->dma_len = (p & SR_IO) ? NCR5380_dma_read_setup(instance, d, c) : NCR5380_dma_write_setup(instance, d, c);
+
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, *data);
 #endif
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
 
 #ifdef REAL_DMA
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
 #elif defined(REAL_DMA_POLL)
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 #else
        /*
         * Note : on my sample board, watch-dog timeouts occurred when interrupts
-        * were not disabled for the duration of a single DMA transfer, from 
+        * were not disabled for the duration of a single DMA transfer, from
         * before the setting of DMA mode to after transfer of the last byte.
         */
 
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
-       spin_unlock_irq(instance->host_lock);
-#endif
-       /* KLL May need eop and parity in 53c400 */
-       if (hostdata->flags & FLAG_NCR53C400)
-               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE |
-                               MR_ENABLE_PAR_CHECK | MR_ENABLE_PAR_INTR |
-                               MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+       if (hostdata->flags & FLAG_NO_DMA_FIXUP)
+               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                                       MR_ENABLE_EOP_INTR);
        else
-               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 #endif                         /* def REAL_DMA */
 
        dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, NCR5380_read(MODE_REG));
 
-       /* 
-        *      On the PAS16 at least I/O recovery delays are not needed here.
-        *      Everyone else seems to want them.
+       /*
+        * On the PAS16 at least I/O recovery delays are not needed here.
+        * Everyone else seems to want them.
         */
 
        if (p & SR_IO) {
@@ -1797,49 +1584,49 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        } while ((tmp & BASR_PHASE_MATCH) && !(tmp & (BASR_BUSY_ERROR | BASR_END_DMA_TRANSFER)));
 
 /*
-   At this point, either we've completed DMA, or we have a phase mismatch,
-   or we've unexpectedly lost BUSY (which is a real error).
-
-   For write DMAs, we want to wait until the last byte has been
-   transferred out over the bus before we turn off DMA mode.  Alas, there
-   seems to be no terribly good way of doing this on a 5380 under all
-   conditions.  For non-scatter-gather operations, we can wait until REQ
-   and ACK both go false, or until a phase mismatch occurs.  Gather-writes
-   are nastier, since the device will be expecting more data than we
-   are prepared to send it, and REQ will remain asserted.  On a 53C8[01] we
-   could test LAST BIT SENT to assure transfer (I imagine this is precisely
-   why this signal was added to the newer chips) but on the older 538[01]
-   this signal does not exist.  The workaround for this lack is a watchdog;
-   we bail out of the wait-loop after a modest amount of wait-time if
-   the usual exit conditions are not met.  Not a terribly clean or
-   correct solution :-%
-
-   Reads are equally tricky due to a nasty characteristic of the NCR5380.
-   If the chip is in DMA mode for an READ, it will respond to a target's
-   REQ by latching the SCSI data into the INPUT DATA register and asserting
-   ACK, even if it has _already_ been notified by the DMA controller that
-   the current DMA transfer has completed!  If the NCR5380 is then taken
-   out of DMA mode, this already-acknowledged byte is lost.
-
-   This is not a problem for "one DMA transfer per command" reads, because
-   the situation will never arise... either all of the data is DMA'ed
-   properly, or the target switches to MESSAGE IN phase to signal a
-   disconnection (either operation bringing the DMA to a clean halt).
-   However, in order to handle scatter-reads, we must work around the
-   problem.  The chosen fix is to DMA N-2 bytes, then check for the
-   condition before taking the NCR5380 out of DMA mode.  One or two extra
-   bytes are transferred via PIO as necessary to fill out the original
-   request.
+ * At this point, either we've completed DMA, or we have a phase mismatch,
+ * or we've unexpectedly lost BUSY (which is a real error).
+ *
+ * For DMA sends, we want to wait until the last byte has been
+ * transferred out over the bus before we turn off DMA mode.  Alas, there
+ * seems to be no terribly good way of doing this on a 5380 under all
+ * conditions.  For non-scatter-gather operations, we can wait until REQ
+ * and ACK both go false, or until a phase mismatch occurs.  Gather-sends
+ * are nastier, since the device will be expecting more data than we
+ * are prepared to send it, and REQ will remain asserted.  On a 53C8[01] we
+ * could test Last Byte Sent to assure transfer (I imagine this is precisely
+ * why this signal was added to the newer chips) but on the older 538[01]
+ * this signal does not exist.  The workaround for this lack is a watchdog;
+ * we bail out of the wait-loop after a modest amount of wait-time if
+ * the usual exit conditions are not met.  Not a terribly clean or
+ * correct solution :-%
+ *
+ * DMA receive is equally tricky due to a nasty characteristic of the NCR5380.
+ * If the chip is in DMA receive mode, it will respond to a target's
+ * REQ by latching the SCSI data into the INPUT DATA register and asserting
+ * ACK, even if it has _already_ been notified by the DMA controller that
+ * the current DMA transfer has completed!  If the NCR5380 is then taken
+ * out of DMA mode, this already-acknowledged byte is lost. This is
+ * not a problem for "one DMA transfer per READ command", because
+ * the situation will never arise... either all of the data is DMA'ed
+ * properly, or the target switches to MESSAGE IN phase to signal a
+ * disconnection (either operation bringing the DMA to a clean halt).
+ * However, in order to handle scatter-receive, we must work around the
+ * problem.  The chosen fix is to DMA N-2 bytes, then check for the
+ * condition before taking the NCR5380 out of DMA mode.  One or two extra
+ * bytes are transferred via PIO as necessary to fill out the original
+ * request.
  */
 
        if (p & SR_IO) {
-#ifdef READ_OVERRUNS
-               udelay(10);
-               if (((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) == (BASR_PHASE_MATCH | BASR_ACK))) {
-                       saved_data = NCR5380_read(INPUT_DATA_REGISTER);
-                       overrun = 1;
+               if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS)) {
+                       udelay(10);
+                       if ((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) ==
+                           (BASR_PHASE_MATCH | BASR_ACK)) {
+                               saved_data = NCR5380_read(INPUT_DATA_REGISTER);
+                               overrun = 1;
+                       }
                }
-#endif
        } else {
                int limit = 100;
                while (((tmp = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_ACK) || (NCR5380_read(STATUS_REG) & SR_REQ)) {
@@ -1850,7 +1637,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
                }
        }
 
-       dprintk(NDEBUG_DMA, "scsi%d : polled DMA transfer complete, basr 0x%X, sr 0x%X\n", instance->host_no, tmp, NCR5380_read(STATUS_REG));
+       dsprintk(NDEBUG_DMA, "polled DMA transfer complete, basr 0x%02x, sr 0x%02x\n",
+                tmp, NCR5380_read(STATUS_REG));
 
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
@@ -1861,8 +1649,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        *data += c;
        *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
-#ifdef READ_OVERRUNS
-       if (*phase == p && (p & SR_IO) && residue == 0) {
+       if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS) &&
+           *phase == p && (p & SR_IO) && residue == 0) {
                if (overrun) {
                        dprintk(NDEBUG_DMA, "Got an input overrun, using saved byte\n");
                        **data = saved_data;
@@ -1877,7 +1665,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
                NCR5380_transfer_pio(instance, phase, &cnt, data);
                *count -= toPIO - cnt;
        }
-#endif
 
        dprintk(NDEBUG_DMA, "Return with data ptr = 0x%X, count %d, last 0x%X, next 0x%X\n", *data, *count, *(*data + *count - 1), *(*data + *count));
        return 0;
@@ -1886,95 +1673,64 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        return 0;
 #else                          /* defined(REAL_DMA_POLL) */
        if (p & SR_IO) {
-#ifdef DMA_WORKS_RIGHT
-               foo = NCR5380_pread(instance, d, c);
-#else
-               int diff = 1;
-               if (hostdata->flags & FLAG_NCR53C400) {
-                       diff = 0;
-               }
-               if (!(foo = NCR5380_pread(instance, d, c - diff))) {
+               foo = NCR5380_pread(instance, d,
+                       hostdata->flags & FLAG_NO_DMA_FIXUP ? c : c - 1);
+               if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
                        /*
-                        * We can't disable DMA mode after successfully transferring 
+                        * We can't disable DMA mode after successfully transferring
                         * what we plan to be the last byte, since that would open up
-                        * a race condition where if the target asserted REQ before 
+                        * a race condition where if the target asserted REQ before
                         * we got the DMA mode reset, the NCR5380 would have latched
                         * an additional byte into the INPUT DATA register and we'd
                         * have dropped it.
-                        * 
-                        * The workaround was to transfer one fewer bytes than we 
-                        * intended to with the pseudo-DMA read function, wait for 
+                        *
+                        * The workaround was to transfer one fewer bytes than we
+                        * intended to with the pseudo-DMA read function, wait for
                         * the chip to latch the last byte, read it, and then disable
                         * pseudo-DMA mode.
-                        * 
+                        *
                         * After REQ is asserted, the NCR5380 asserts DRQ and ACK.
                         * REQ is deasserted when ACK is asserted, and not reasserted
                         * until ACK goes false.  Since the NCR5380 won't lower ACK
                         * until DACK is asserted, which won't happen unless we twiddle
-                        * the DMA port or we take the NCR5380 out of DMA mode, we 
-                        * can guarantee that we won't handshake another extra 
+                        * the DMA port or we take the NCR5380 out of DMA mode, we
+                        * can guarantee that we won't handshake another extra
                         * byte.
                         */
 
-                       if (!(hostdata->flags & FLAG_NCR53C400)) {
-                               while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ));
-                               /* Wait for clean handshake */
-                               while (NCR5380_read(STATUS_REG) & SR_REQ);
-                               d[c - 1] = NCR5380_read(INPUT_DATA_REG);
+                       if (NCR5380_poll_politely(instance, BUS_AND_STATUS_REG,
+                                                 BASR_DRQ, BASR_DRQ, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n");
                        }
+                       if (NCR5380_poll_politely(instance, STATUS_REG,
+                                                 SR_REQ, 0, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n");
+                       }
+                       d[c - 1] = NCR5380_read(INPUT_DATA_REG);
                }
-#endif
        } else {
-#ifdef DMA_WORKS_RIGHT
                foo = NCR5380_pwrite(instance, d, c);
-#else
-               int timeout;
-               dprintk(NDEBUG_C400_PWRITE, "About to pwrite %d bytes\n", c);
-               if (!(foo = NCR5380_pwrite(instance, d, c))) {
+               if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
                        /*
-                        * Wait for the last byte to be sent.  If REQ is being asserted for 
-                        * the byte we're interested, we'll ACK it and it will go false.  
+                        * Wait for the last byte to be sent.  If REQ is being asserted for
+                        * the byte we're interested, we'll ACK it and it will go false.
                         */
-                       if (!(hostdata->flags & FLAG_HAS_LAST_BYTE_SENT)) {
-                               timeout = 20000;
-                               while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ) && (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH));
-
-                               if (!timeout)
-                                       dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : timed out on last byte\n", instance->host_no);
-
-                               if (hostdata->flags & FLAG_CHECK_LAST_BYTE_SENT) {
-                                       hostdata->flags &= ~FLAG_CHECK_LAST_BYTE_SENT;
-                                       if (NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT) {
-                                               hostdata->flags |= FLAG_HAS_LAST_BYTE_SENT;
-                                               dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : last byte sent works\n", instance->host_no);
-                                       }
-                               }
-                       } else {
-                               dprintk(NDEBUG_C400_PWRITE, "Waiting for LASTBYTE\n");
-                               while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT));
-                               dprintk(NDEBUG_C400_PWRITE, "Got LASTBYTE\n");
+                       if (NCR5380_poll_politely2(instance,
+                            BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
+                            BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n");
                        }
                }
-#endif
        }
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
-       if ((!(p & SR_IO)) && (hostdata->flags & FLAG_NCR53C400)) {
-               dprintk(NDEBUG_C400_PWRITE, "53C400w: Checking for IRQ\n");
-               if (NCR5380_read(BUS_AND_STATUS_REG) & BASR_IRQ) {
-                       dprintk(NDEBUG_C400_PWRITE, "53C400w:    got it, reading reset interrupt reg\n");
-                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else {
-                       printk("53C400w:    IRQ NOT THERE!\n");
-               }
-       }
+       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        *data = d + c;
        *count = 0;
        *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
-       spin_lock_irq(instance->host_lock);
-#endif                         /* defined(REAL_DMA_POLL) */
        return foo;
 #endif                         /* def REAL_DMA */
 }
@@ -1983,25 +1739,23 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
 /*
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
  *
- * Purpose : run through the various SCSI phases and do as the target 
- *      directs us to.  Operates on the currently connected command, 
- *      instance->connected.
+ * Purpose : run through the various SCSI phases and do as the target
+ * directs us to.  Operates on the currently connected command,
+ * instance->connected.
  *
  * Inputs : instance, instance for which we are doing commands
  *
- * Side effects : SCSI things happen, the disconnected queue will be 
- *      modified if a command disconnects, *instance->connected will
- *      change.
+ * Side effects : SCSI things happen, the disconnected queue will be
+ * modified if a command disconnects, *instance->connected will
+ * change.
  *
- * XXX Note : we need to watch for bus free or a reset condition here 
- *      to recover from an unexpected bus free condition.
- *
- * Locks: io_request_lock held by caller in IRQ mode
+ * XXX Note : we need to watch for bus free or a reset condition here
+ * to recover from an unexpected bus free condition.
  */
 
-static void NCR5380_information_transfer(struct Scsi_Host *instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)instance->hostdata;
+static void NCR5380_information_transfer(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char msgout = NOP;
        int sink = 0;
        int len;
@@ -2010,13 +1764,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
 #endif
        unsigned char *data;
        unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
-       struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
-       /* RvC: we need to set the end of the polling time */
-       unsigned long poll_time = jiffies + USLEEP_POLL;
+       struct scsi_cmnd *cmd;
 
-       NCR5380_setup(instance);
+       while ((cmd = hostdata->connected)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-       while (1) {
                tmp = NCR5380_read(STATUS_REG);
                /* We only have a valid SCSI phase when REQ is asserted */
                if (tmp & SR_REQ) {
@@ -2028,24 +1780,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                        if (sink && (phase != PHASE_MSGOUT)) {
                                NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-                               while (NCR5380_read(STATUS_REG) & SR_REQ);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
+                                             ICR_ASSERT_ACK);
+                               while (NCR5380_read(STATUS_REG) & SR_REQ)
+                                       ;
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_ATN);
                                sink = 0;
                                continue;
                        }
+
                        switch (phase) {
-                       case PHASE_DATAIN:
                        case PHASE_DATAOUT:
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
-                               printk("scsi%d : NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n", instance->host_no);
+                               shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
                                sink = 1;
                                do_abort(instance);
                                cmd->result = DID_ERROR << 16;
-                               cmd->scsi_done(cmd);
+                               complete_cmd(instance, cmd);
                                return;
 #endif
-                               /* 
+                       case PHASE_DATAIN:
+                               /*
                                 * If there is no room left in the current buffer in the
                                 * scatter-gather list, move onto the next one.
                                 */
@@ -2055,10 +1811,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        --cmd->SCp.buffers_residual;
                                        cmd->SCp.this_residual = cmd->SCp.buffer->length;
                                        cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
-                                       dprintk(NDEBUG_INFORMATION, "scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual);
+                                       dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+                                                cmd->SCp.this_residual,
+                                                cmd->SCp.buffers_residual);
                                }
+
                                /*
-                                * The preferred transfer method is going to be 
+                                * The preferred transfer method is going to be
                                 * PSEUDO-DMA for systems that are strictly PIO,
                                 * since we can let the hardware do the handshaking.
                                 *
@@ -2068,50 +1827,39 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                 */
 
 #if defined(PSEUDO_DMA) || defined(REAL_DMA_POLL)
-                               /* KLL
-                                * PSEUDO_DMA is defined here. If this is the g_NCR5380
-                                * driver then it will always be defined, so the
-                                * FLAG_NO_PSEUDO_DMA is used to inhibit PDMA in the base
-                                * NCR5380 case.  I think this is a fairly clean solution.
-                                * We supplement these 2 if's with the flag.
-                                */
-#ifdef NCR5380_dma_xfer_len
-                               if (!cmd->device->borken && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && (transfersize = NCR5380_dma_xfer_len(instance, cmd)) != 0) {
-#else
-                               transfersize = cmd->transfersize;
-
-#ifdef LIMIT_TRANSFERSIZE      /* If we have problems with interrupt service */
-                               if (transfersize > 512)
-                                       transfersize = 512;
-#endif                         /* LIMIT_TRANSFERSIZE */
-
-                               if (!cmd->device->borken && transfersize && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && cmd->SCp.this_residual && !(cmd->SCp.this_residual % transfersize)) {
-                                       /* Limit transfers to 32K, for xx400 & xx406
-                                        * pseudoDMA that transfers in 128 bytes blocks. */
-                                       if (transfersize > 32 * 1024)
-                                               transfersize = 32 * 1024;
-#endif
+                               transfersize = 0;
+                               if (!cmd->device->borken &&
+                                   !(hostdata->flags & FLAG_NO_PSEUDO_DMA))
+                                       transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+                               if (transfersize) {
                                        len = transfersize;
-                                       if (NCR5380_transfer_dma(instance, &phase, &len, (unsigned char **) &cmd->SCp.ptr)) {
+                                       if (NCR5380_transfer_dma(instance, &phase,
+                                           &len, (unsigned char **)&cmd->SCp.ptr)) {
                                                /*
-                                                * If the watchdog timer fires, all future accesses to this
-                                                * device will use the polled-IO.
+                                                * If the watchdog timer fires, all future
+                                                * accesses to this device will use the
+                                                * polled-IO.
                                                 */
                                                scmd_printk(KERN_INFO, cmd,
-                                                           "switching to slow handshake\n");
+                                                       "switching to slow handshake\n");
                                                cmd->device->borken = 1;
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
                                                sink = 1;
                                                do_abort(instance);
                                                cmd->result = DID_ERROR << 16;
-                                               cmd->scsi_done(cmd);
+                                               complete_cmd(instance, cmd);
                                                /* XXX - need to source or sink data here, as appropriate */
                                        } else
                                                cmd->SCp.this_residual -= transfersize - len;
                                } else
 #endif                         /* defined(PSEUDO_DMA) || defined(REAL_DMA_POLL) */
-                                       NCR5380_transfer_pio(instance, &phase, (int *) &cmd->SCp.this_residual, (unsigned char **)
-                                                            &cmd->SCp.ptr);
+                               {
+                                       spin_unlock_irq(&hostdata->lock);
+                                       NCR5380_transfer_pio(instance, &phase,
+                                                            (int *)&cmd->SCp.this_residual,
+                                                            (unsigned char **)&cmd->SCp.ptr);
+                                       spin_lock_irq(&hostdata->lock);
+                               }
                                break;
                        case PHASE_MSGIN:
                                len = 1;
@@ -2120,101 +1868,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                cmd->SCp.Message = tmp;
 
                                switch (tmp) {
-                                       /*
-                                        * Linking lets us reduce the time required to get the 
-                                        * next command out to the device, hopefully this will
-                                        * mean we don't waste another revolution due to the delays
-                                        * required by ARBITRATION and another SELECTION.
-                                        *
-                                        * In the current implementation proposal, low level drivers
-                                        * merely have to start the next command, pointed to by 
-                                        * next_link, done() is called as with unlinked commands.
-                                        */
-#ifdef LINKED
-                               case LINKED_CMD_COMPLETE:
-                               case LINKED_FLG_CMD_COMPLETE:
-                                       /* Accept message by clearing ACK */
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked command complete.\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       /* 
-                                        * Sanity check : A linked command should only terminate with
-                                        * one of these messages if there are more linked commands
-                                        * available.
-                                        */
-                                       if (!cmd->next_link) {
-                                           printk("scsi%d : target %d lun %llu linked command complete, no next_link\n" instance->host_no, cmd->device->id, cmd->device->lun);
-                                               sink = 1;
-                                               do_abort(instance);
-                                               return;
-                                       }
-                                       initialize_SCp(cmd->next_link);
-                                       /* The next command is still part of this process */
-                                       cmd->next_link->tag = cmd->tag;
-                                       cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked request done, calling scsi_done().\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       cmd->scsi_done(cmd);
-                                       cmd = hostdata->connected;
-                                       break;
-#endif                         /* def LINKED */
                                case ABORT:
                                case COMMAND_COMPLETE:
                                        /* Accept message by clearing ACK */
                                        sink = 1;
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       hostdata->connected = NULL;
-                                       dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d, lun %llu completed\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
-
-                                       /* 
-                                        * I'm not sure what the correct thing to do here is : 
-                                        * 
-                                        * If the command that just executed is NOT a request 
-                                        * sense, the obvious thing to do is to set the result
-                                        * code to the values of the stored parameters.
-                                        * 
-                                        * If it was a REQUEST SENSE command, we need some way 
-                                        * to differentiate between the failure code of the original
-                                        * and the failure code of the REQUEST sense - the obvious
-                                        * case is success, where we fall through and leave the result
-                                        * code unchanged.
-                                        * 
-                                        * The non-obvious place is where the REQUEST SENSE failed 
-                                        */
-
-                                       if (cmd->cmnd[0] != REQUEST_SENSE)
-                                               cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       else if (status_byte(cmd->SCp.Status) != GOOD)
-                                               cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
-                                       if ((cmd->cmnd[0] == REQUEST_SENSE) &&
-                                               hostdata->ses.cmd_len) {
-                                               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
-                                               hostdata->ses.cmd_len = 0 ;
-                                       }
+                                       dsprintk(NDEBUG_QUEUES, instance,
+                                                "COMMAND COMPLETE %p target %d lun %llu\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
 
-                                       if ((cmd->cmnd[0] != REQUEST_SENSE) && (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
-                                               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
-
-                                               dprintk(NDEBUG_AUTOSENSE, "scsi%d : performing request sense\n", instance->host_no);
+                                       hostdata->connected = NULL;
 
-                                               LIST(cmd, hostdata->issue_queue);
-                                               cmd->host_scribble = (unsigned char *)
-                                                   hostdata->issue_queue;
-                                               hostdata->issue_queue = (struct scsi_cmnd *) cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d : REQUEST SENSE added to head of issue queue\n", instance->host_no);
-                                       } else {
-                                               cmd->scsi_done(cmd);
+                                       cmd->result &= ~0xffff;
+                                       cmd->result |= cmd->SCp.Status;
+                                       cmd->result |= cmd->SCp.Message << 8;
+
+                                       if (cmd->cmnd[0] == REQUEST_SENSE)
+                                               complete_cmd(instance, cmd);
+                                       else {
+                                               if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+                                                   cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+                                                       dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+                                                                cmd);
+                                                       list_add_tail(&ncmd->list,
+                                                                     &hostdata->autosense);
+                                               } else
+                                                       complete_cmd(instance, cmd);
                                        }
 
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /* 
-                                        * Restore phase bits to 0 so an interrupted selection, 
+                                       /*
+                                        * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
                                         */
                                        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                case MESSAGE_REJECT:
                                        /* Accept message by clearing ACK */
@@ -2229,38 +1918,33 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        default:
                                                break;
                                        }
-                               case DISCONNECT:{
-                                               /* Accept message by clearing ACK */
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                               cmd->device->disconnect = 1;
-                                               LIST(cmd, hostdata->disconnected_queue);
-                                               cmd->host_scribble = (unsigned char *)
-                                                   hostdata->disconnected_queue;
-                                               hostdata->connected = NULL;
-                                               hostdata->disconnected_queue = cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d lun %llu was moved from connected to" "  the disconnected_queue\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                               /* 
-                                                * Restore phase bits to 0 so an interrupted selection, 
-                                                * arbitration can resume.
-                                                */
-                                               NCR5380_write(TARGET_COMMAND_REG, 0);
-
-                                               /* Enable reselect interrupts */
-                                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                               /* Wait for bus free to avoid nasty timeouts - FIXME timeout !*/
-                                               /* NCR538_poll_politely(instance, STATUS_REG, SR_BSY, 0, 30 * HZ); */
-                                               while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                                       barrier();
-                                               return;
-                                       }
-                                       /* 
+                                       break;
+                               case DISCONNECT:
+                                       /* Accept message by clearing ACK */
+                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+                                       hostdata->connected = NULL;
+                                       list_add(&ncmd->list, &hostdata->disconnected);
+                                       dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+                                                instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
+
+                                       /*
+                                        * Restore phase bits to 0 so an interrupted selection,
+                                        * arbitration can resume.
+                                        */
+                                       NCR5380_write(TARGET_COMMAND_REG, 0);
+
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+                                       return;
+                                       /*
                                         * The SCSI data pointer is *IMPLICITLY* saved on a disconnect
-                                        * operation, in violation of the SCSI spec so we can safely 
+                                        * operation, in violation of the SCSI spec so we can safely
                                         * ignore SAVE/RESTORE pointers calls.
                                         *
-                                        * Unfortunately, some disks violate the SCSI spec and 
+                                        * Unfortunately, some disks violate the SCSI spec and
                                         * don't issue the required SAVE_POINTERS message before
-                                        * disconnecting, and we have to break spec to remain 
+                                        * disconnecting, and we have to break spec to remain
                                         * compatible.
                                         */
                                case SAVE_POINTERS:
@@ -2269,31 +1953,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                        break;
                                case EXTENDED_MESSAGE:
-/* 
- * Extended messages are sent in the following format :
- * Byte         
- * 0            EXTENDED_MESSAGE == 1
- * 1            length (includes one byte for code, doesn't 
- *              include first two bytes)
- * 2            code
- * 3..length+1  arguments
- *
- * Start the extended message buffer with the EXTENDED_MESSAGE
- * byte, since spi_print_msg() wants the whole thing.  
- */
+                                       /*
+                                        * Start the message buffer with the EXTENDED_MESSAGE
+                                        * byte, since spi_print_msg() wants the whole thing.
+                                        */
                                        extended_msg[0] = EXTENDED_MESSAGE;
                                        /* Accept first byte by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d : receiving extended message\n", instance->host_no);
+
+                                       spin_unlock_irq(&hostdata->lock);
+
+                                       dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
 
                                        len = 2;
                                        data = extended_msg + 1;
                                        phase = PHASE_MSGIN;
                                        NCR5380_transfer_pio(instance, &phase, &len, &data);
+                                       dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+                                                (int)extended_msg[1],
+                                                (int)extended_msg[2]);
 
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d : length=%d, code=0x%02x\n", instance->host_no, (int) extended_msg[1], (int) extended_msg[2]);
-
-                                       if (!len && extended_msg[1] <= (sizeof(extended_msg) - 1)) {
+                                       if (!len && extended_msg[1] > 0 &&
+                                           extended_msg[1] <= sizeof(extended_msg) - 2) {
                                                /* Accept third byte by clearing ACK */
                                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                                len = extended_msg[1] - 1;
@@ -2301,7 +1982,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                                phase = PHASE_MSGIN;
 
                                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                               dprintk(NDEBUG_EXTENDED, "scsi%d : message received, residual %d\n", instance->host_no, len);
+                                               dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+                                                        len);
 
                                                switch (extended_msg[2]) {
                                                case EXTENDED_SDTR:
@@ -2311,34 +1993,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                                        tmp = 0;
                                                }
                                        } else if (len) {
-                                               printk("scsi%d: error receiving extended message\n", instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "error receiving extended message\n");
                                                tmp = 0;
                                        } else {
-                                               printk("scsi%d: extended message code %02x length %d is too long\n", instance->host_no, extended_msg[2], extended_msg[1]);
+                                               shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+                                                            extended_msg[2], extended_msg[1]);
                                                tmp = 0;
                                        }
+
+                                       spin_lock_irq(&hostdata->lock);
+                                       if (!hostdata->connected)
+                                               return;
+
                                        /* Fall through to reject message */
 
-                                       /* 
-                                        * If we get something weird that we aren't expecting, 
+                                       /*
+                                        * If we get something weird that we aren't expecting,
                                         * reject it.
                                         */
                                default:
                                        if (!tmp) {
-                                               printk("scsi%d: rejecting message ", instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "rejecting message ");
                                                spi_print_msg(extended_msg);
                                                printk("\n");
                                        } else if (tmp != EXTENDED_MESSAGE)
                                                scmd_printk(KERN_INFO, cmd,
-                                                       "rejecting unknown message %02x\n",tmp);
+                                                           "rejecting unknown message %02x\n",
+                                                           tmp);
                                        else
                                                scmd_printk(KERN_INFO, cmd,
-                                                       "rejecting unknown extended message code %02x, length %d\n", extended_msg[1], extended_msg[0]);
+                                                           "rejecting unknown extended message code %02x, length %d\n",
+                                                           extended_msg[1], extended_msg[0]);
 
                                        msgout = MESSAGE_REJECT;
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
                                        break;
-                               }       /* switch (tmp) */
+                               } /* switch (tmp) */
                                break;
                        case PHASE_MSGOUT:
                                len = 1;
@@ -2346,10 +2036,9 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                hostdata->last_message = msgout;
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                if (msgout == ABORT) {
-                                       hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
                                        hostdata->connected = NULL;
                                        cmd->result = DID_ERROR << 16;
-                                       cmd->scsi_done(cmd);
+                                       complete_cmd(instance, cmd);
                                        NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                }
@@ -2358,17 +2047,12 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                        case PHASE_CMDOUT:
                                len = cmd->cmd_len;
                                data = cmd->cmnd;
-                               /* 
-                                * XXX for performance reasons, on machines with a 
-                                * PSEUDO-DMA architecture we should probably 
-                                * use the dma transfer function.  
+                               /*
+                                * XXX for performance reasons, on machines with a
+                                * PSEUDO-DMA architecture we should probably
+                                * use the dma transfer function.
                                 */
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                               if (!cmd->device->disconnect && should_disconnect(cmd->cmnd[0])) {
-                                       NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-                                       dprintk(NDEBUG_USLEEP, "scsi%d : issued command, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
-                                       return;
-                               }
                                break;
                        case PHASE_STATIN:
                                len = 1;
@@ -2377,46 +2061,37 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                cmd->SCp.Status = tmp;
                                break;
                        default:
-                               printk("scsi%d : unknown phase\n", instance->host_no);
+                               shost_printk(KERN_ERR, instance, "unknown phase\n");
                                NCR5380_dprint(NDEBUG_ANY, instance);
-                       }       /* switch(phase) */
-               }               /* if (tmp * SR_REQ) */
-               else {
-                       /* RvC: go to sleep if polling time expired
-                        */
-                       if (!cmd->device->disconnect && time_after_eq(jiffies, poll_time)) {
-                               NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-                               dprintk(NDEBUG_USLEEP, "scsi%d : poll timed out, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
-                               return;
-                       }
+                       } /* switch(phase) */
+               } else {
+                       spin_unlock_irq(&hostdata->lock);
+                       NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+                       spin_lock_irq(&hostdata->lock);
                }
-       }                       /* while (1) */
+       }
 }
 
 /*
  * Function : void NCR5380_reselect (struct Scsi_Host *instance)
  *
- * Purpose : does reselection, initializing the instance->connected 
- *      field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- *      nexus has been reestablished,
- *      
- * Inputs : instance - this instance of the NCR5380.
+ * Purpose : does reselection, initializing the instance->connected
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
  *
- * Locks: io_request_lock held by caller if IRQ driven
+ * Inputs : instance - this instance of the NCR5380.
  */
 
-static void NCR5380_reselect(struct Scsi_Host *instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)
-        instance->hostdata;
+static void NCR5380_reselect(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char target_mask;
        unsigned char lun, phase;
        int len;
        unsigned char msg[3];
        unsigned char *data;
-       struct scsi_cmnd *tmp = NULL, *prev;
-       int abort = 0;
-       NCR5380_setup(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *tmp;
 
        /*
         * Disable arbitration, etc. since the host adapter obviously
@@ -2424,12 +2099,12 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
         */
 
        NCR5380_write(MODE_REG, MR_BASE);
-       hostdata->restart_select = 1;
 
        target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
-       dprintk(NDEBUG_SELECTION, "scsi%d : reselect\n", instance->host_no);
 
-       /* 
+       dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
+
+       /*
         * At this point, we have detected that our SCSI ID is on the bus,
         * SEL is true and BSY was false for at least one bus settle delay
         * (400 ns).
@@ -2439,103 +2114,110 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
         */
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
-       /* FIXME: timeout too long, must fail to workqueue */   
-       if(NCR5380_poll_politely(instance, STATUS_REG, SR_SEL, 0, 2*HZ)<0)
-               abort = 1;
-               
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               return;
+       }
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
         * Wait for target to go into MSGIN.
-        * FIXME: timeout needed and fail to work queeu
         */
 
-       if(NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 2*HZ))
-               abort = 1;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+               do_abort(instance);
+               return;
+       }
 
        len = 1;
        data = msg;
        phase = PHASE_MSGIN;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
 
+       if (len) {
+               do_abort(instance);
+               return;
+       }
+
        if (!(msg[0] & 0x80)) {
-               printk(KERN_ERR "scsi%d : expecting IDENTIFY message, got ", instance->host_no);
+               shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
                spi_print_msg(msg);
-               abort = 1;
-       } else {
-               /* Accept message by clearing ACK */
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               lun = (msg[0] & 0x07);
+               printk("\n");
+               do_abort(instance);
+               return;
+       }
+       lun = msg[0] & 0x07;
 
-               /* 
-                * We need to add code for SCSI-II to track which devices have
-                * I_T_L_Q nexuses established, and which have simple I_T_L
-                * nexuses so we can chose to do additional data transfer.
-                */
+       /*
+        * We need to add code for SCSI-II to track which devices have
+        * I_T_L_Q nexuses established, and which have simple I_T_L
+        * nexuses so we can chose to do additional data transfer.
+        */
 
-               /* 
-                * Find the command corresponding to the I_T_L or I_T_L_Q  nexus we 
-                * just reestablished, and remove it from the disconnected queue.
-                */
+       /*
+        * Find the command corresponding to the I_T_L or I_T_L_Q  nexus we
+        * just reestablished, and remove it from the disconnected queue.
+        */
 
+       tmp = NULL;
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
 
-               for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                       if ((target_mask == (1 << tmp->device->id)) && (lun == (u8)tmp->device->lun)
-                           ) {
-                               if (prev) {
-                                       REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
-                                       prev->host_scribble = tmp->host_scribble;
-                               } else {
-                                       REMOVE(-1, hostdata->disconnected_queue, tmp, tmp->host_scribble);
-                                       hostdata->disconnected_queue = (struct scsi_cmnd *) tmp->host_scribble;
-                               }
-                               tmp->host_scribble = NULL;
-                               break;
-                       }
-               if (!tmp) {
-                       printk(KERN_ERR "scsi%d : warning : target bitmask %02x lun %d not in disconnect_queue.\n", instance->host_no, target_mask, lun);
-                       /* 
-                        * Since we have an established nexus that we can't do anything with,
-                        * we must abort it.  
-                        */
-                       abort = 1;
+               if (target_mask == (1 << scmd_id(cmd)) &&
+                   lun == (u8)cmd->device->lun) {
+                       list_del(&ncmd->list);
+                       tmp = cmd;
+                       break;
                }
        }
 
-       if (abort) {
-               do_abort(instance);
+       if (tmp) {
+               dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+                        "reselect: removed %p from disconnected queue\n", tmp);
        } else {
-               hostdata->connected = tmp;
-               dprintk(NDEBUG_RESELECTION, "scsi%d : nexus established, target = %d, lun = %llu, tag = %d\n", instance->host_no, tmp->device->id, tmp->device->lun, tmp->tag);
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+                            target_mask, lun);
+               /*
+                * Since we have an established nexus that we can't do anything
+                * with, we must abort it.
+                */
+               do_abort(instance);
+               return;
        }
+
+       /* Accept message by clearing ACK */
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+
+       hostdata->connected = tmp;
+       dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+                scmd_id(tmp), tmp->device->lun, tmp->tag);
 }
 
 /*
  * Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
  *
  * Purpose : called by interrupt handler when DMA finishes or a phase
- *      mismatch occurs (which would finish the DMA transfer).  
+ * mismatch occurs (which would finish the DMA transfer).
  *
  * Inputs : instance - this instance of the NCR5380.
  *
  * Returns : pointer to the scsi_cmnd structure for which the I_T_L
- *      nexus has been reestablished, on failure NULL is returned.
+ * nexus has been reestablished, on failure NULL is returned.
  */
 
 #ifdef REAL_DMA
 static void NCR5380_dma_complete(NCR5380_instance * instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int transferred;
-       NCR5380_setup(instance);
 
        /*
         * XXX this might not be right.
         *
         * Wait for final byte to transfer, ie wait for ACK to go false.
         *
-        * We should use the Last Byte Sent bit, unfortunately this is 
+        * We should use the Last Byte Sent bit, unfortunately this is
         * not available on the 5380/5381 (only the various CMOS chips)
         *
         * FIXME: timeout, and need to handle long timeout/irq case
@@ -2543,7 +2225,6 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
 
        NCR5380_poll_politely(instance, BUS_AND_STATUS_REG, BASR_ACK, 0, 5*HZ);
 
-       NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
@@ -2560,190 +2241,251 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
 }
 #endif                         /* def REAL_DMA */
 
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- *      host byte of the result field to, if zero DID_ABORTED is
- *      used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- *     XXX - there is no way to abort the command that is currently
- *     connected, you have to wait for it to complete.  If this is
- *     a problem, we could implement longjmp() / setjmp(), setjmp()
- *     called where the loop started in NCR5380_main().
- *
- * Locks: host lock taken by caller
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
  */
 
-static int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+                          struct scsi_cmnd *needle)
 {
-       NCR5380_local_declare();
-       struct Scsi_Host *instance = cmd->device->host;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       struct scsi_cmnd *tmp, **prev;
+       struct NCR5380_cmd *ncmd;
 
-       scmd_printk(KERN_WARNING, cmd, "aborting command\n");
+       list_for_each_entry(ncmd, haystack, list)
+               if (NCR5380_to_scmd(ncmd) == needle)
+                       return true;
+       return false;
+}
 
-       NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
 
-       NCR5380_setup(instance);
+static bool list_del_cmd(struct list_head *haystack,
+                         struct scsi_cmnd *needle)
+{
+       if (list_find_cmd(haystack, needle)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
 
-       dprintk(NDEBUG_ABORT, "scsi%d : abort called\n", instance->host_no);
-       dprintk(NDEBUG_ABORT, "        basr 0x%X, sr 0x%X\n", NCR5380_read(BUS_AND_STATUS_REG), NCR5380_read(STATUS_REG));
+               list_del(&ncmd->list);
+               return true;
+       }
+       return false;
+}
 
-#if 0
-/*
- * Case 1 : If the command is the currently executing command, 
- * we'll set the aborted flag and return control so that 
- * information transfer routine can exit cleanly.
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
  */
 
-       if (hostdata->connected == cmd) {
-               dprintk(NDEBUG_ABORT, "scsi%d : aborting connected command\n", instance->host_no);
-               hostdata->aborted = 1;
-/*
- * We should perform BSY checking, and make sure we haven't slipped
- * into BUS FREE.
- */
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long flags;
+       int result = SUCCESS;
 
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN);
-/* 
- * Since we can't change phases until we've completed the current 
- * handshake, we have to source or sink a byte of data if the current
- * phase is not MSGOUT.
- */
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-/* 
- * Return control to the executing NCR drive so we can clear the
- * aborted flag and get back into our main loop.
- */
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-               return SUCCESS;
+       if (list_del_cmd(&hostdata->unissued, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from issue queue\n", cmd);
+               cmd->result = DID_ABORT << 16;
+               cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
        }
-#endif
 
-/* 
- * Case 2 : If the command hasn't been issued yet, we simply remove it 
- *          from the issue queue.
- */
-       dprintk(NDEBUG_ABORT, "scsi%d : abort going into loop.\n", instance->host_no);
-       for (prev = (struct scsi_cmnd **) &(hostdata->issue_queue), tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
-               if (cmd == tmp) {
-                       REMOVE(5, *prev, tmp, tmp->host_scribble);
-                       (*prev) = (struct scsi_cmnd *) tmp->host_scribble;
-                       tmp->host_scribble = NULL;
-                       tmp->result = DID_ABORT << 16;
-                       dprintk(NDEBUG_ABORT, "scsi%d : abort removed command from issue queue.\n", instance->host_no);
-                       tmp->scsi_done(tmp);
-                       return SUCCESS;
+       if (hostdata->selecting == cmd) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: cmd %p == selecting\n", cmd);
+               hostdata->selecting = NULL;
+               cmd->result = DID_ABORT << 16;
+               complete_cmd(instance, cmd);
+               goto out;
+       }
+
+       if (list_del_cmd(&hostdata->disconnected, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from disconnected list\n", cmd);
+               cmd->result = DID_ERROR << 16;
+               if (!hostdata->connected)
+                       NCR5380_select(instance, cmd);
+               if (hostdata->connected != cmd) {
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+       }
+
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
                }
-#if (NDEBUG  & NDEBUG_ABORT)
-       /* KLL */
-               else if (prev == tmp)
-                       printk(KERN_ERR "scsi%d : LOOP\n", instance->host_no);
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
 #endif
+               if (cmd->cmnd[0] == REQUEST_SENSE)
+                       complete_cmd(instance, cmd);
+               else {
+                       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-/* 
- * Case 3 : If any commands are connected, we're going to fail the abort
- *          and let the high level SCSI driver retry at a later time or 
- *          issue a reset.
- *
- *          Timeouts, and therefore aborted commands, will be highly unlikely
- *          and handling them cleanly in this situation would make the common
- *          case of noresets less efficient, and would pollute our code.  So,
- *          we fail.
- */
+                       /* Perform autosense for this command */
+                       list_add(&ncmd->list, &hostdata->autosense);
+               }
+       }
 
-       if (hostdata->connected) {
-               dprintk(NDEBUG_ABORT, "scsi%d : abort failed, command connected.\n", instance->host_no);
-               return FAILED;
+       if (list_find_cmd(&hostdata->autosense, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: found %p on sense queue\n", cmd);
+               spin_unlock_irqrestore(&hostdata->lock, flags);
+               queue_work(hostdata->work_q, &hostdata->main_task);
+               msleep(1000);
+               spin_lock_irqsave(&hostdata->lock, flags);
+               if (list_del_cmd(&hostdata->autosense, cmd)) {
+                       dsprintk(NDEBUG_ABORT, instance,
+                                "abort: removed %p from sense queue\n", cmd);
+                       set_host_byte(cmd, DID_ABORT);
+                       complete_cmd(instance, cmd);
+                       goto out;
+               }
        }
-/*
- * Case 4: If the command is currently disconnected from the bus, and 
- *      there are no connected commands, we reconnect the I_T_L or 
- *      I_T_L_Q nexus associated with it, go into message out, and send 
- *      an abort message.
- *
- * This case is especially ugly. In order to reestablish the nexus, we
- * need to call NCR5380_select().  The easiest way to implement this 
- * function was to abort if the bus was busy, and let the interrupt
- * handler triggered on the SEL for reselect take care of lost arbitrations
- * where necessary, meaning interrupts need to be enabled.
- *
- * When interrupts are enabled, the queues may change - so we 
- * can't remove it from the disconnected queue before selecting it
- * because that could cause a failure in hashing the nexus if that 
- * device reselected.
- * 
- * Since the queues may change, we can't use the pointers from when we
- * first locate it.
- *
- * So, we must first locate the command, and if NCR5380_select()
- * succeeds, then issue the abort, relocate the command and remove
- * it from the disconnected queue.
- */
 
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; tmp = (struct scsi_cmnd *) tmp->host_scribble)
-               if (cmd == tmp) {
-                       dprintk(NDEBUG_ABORT, "scsi%d : aborting disconnected command.\n", instance->host_no);
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               complete_cmd(instance, cmd);
+       }
 
-                       if (NCR5380_select(instance, cmd))
-                               return FAILED;
-                       dprintk(NDEBUG_ABORT, "scsi%d : nexus reestablished.\n", instance->host_no);
+out:
+       if (result == FAILED)
+               dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+       else
+               dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
 
-                       do_abort(instance);
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-                       for (prev = (struct scsi_cmnd **) &(hostdata->disconnected_queue), tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                               if (cmd == tmp) {
-                                       REMOVE(5, *prev, tmp, tmp->host_scribble);
-                                       *prev = (struct scsi_cmnd *) tmp->host_scribble;
-                                       tmp->host_scribble = NULL;
-                                       tmp->result = DID_ABORT << 16;
-                                       tmp->scsi_done(tmp);
-                                       return SUCCESS;
-                               }
-               }
-/*
- * Case 5 : If we reached this point, the command was not found in any of 
- *          the queues.
- *
- * We probably reached this point because of an unlikely race condition
- * between the command completing successfully and the abortion code,
- * so we won't panic, but we will notify the user in case something really
- * broke.
- */
-       printk(KERN_WARNING "scsi%d : warning : SCSI command probably completed successfully\n"
-                       "         before abortion\n", instance->host_no);
-       return FAILED;
+       return result;
 }
 
 
-/* 
- * Function : int NCR5380_bus_reset (struct scsi_cmnd *cmd)
- * 
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
  *
- * Locks: host lock taken by caller
+ * Returns SUCCESS
  */
 
 static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
 {
        struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int i;
+       unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       NCR5380_print_status(instance);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-       spin_lock_irq(instance->host_lock);
        do_reset(instance);
-       spin_unlock_irq(instance->host_lock);
+
+       /* reset NCR registers */
+       NCR5380_write(MODE_REG, MR_BASE);
+       NCR5380_write(TARGET_COMMAND_REG, 0);
+       NCR5380_write(SELECT_ENABLE_REG, 0);
+
+       /* After the reset, there are no more connected or disconnected commands
+        * and no busy units; so clear the low-level status here to avoid
+        * conflicts when the mid-level code tries to wake up the affected
+        * commands!
+        */
+
+       hostdata->selecting = NULL;
+
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       list_for_each_entry(ncmd, &hostdata->autosense, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       if (hostdata->connected) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->connected);
+               hostdata->connected = NULL;
+       }
+
+       if (hostdata->sensing) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->sensing);
+               hostdata->sensing = NULL;
+       }
+
+       for (i = 0; i < 8; ++i)
+               hostdata->busy[i] = 0;
+#ifdef REAL_DMA
+       hostdata->dma_len = 0;
+#endif
+
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
        return SUCCESS;
 }
index 162112dd1bf8eabeaf0c8bb029291742d6c55ce4..a79288682a74ad91aad59b3ec8f64242337e3713 100644 (file)
 #ifndef NCR5380_H
 #define NCR5380_H
 
+#include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <scsi/scsi_dbg.h>
 #include <scsi/scsi_eh.h>
+#include <scsi/scsi_transport_spi.h>
 
 #define NDEBUG_ARBITRATION     0x1
 #define NDEBUG_AUTOSENSE       0x2
 /* Write any value to this register to start an ini mode DMA receive */
 #define START_DMA_INITIATOR_RECEIVE_REG 7      /* wo */
 
-#define C400_CONTROL_STATUS_REG NCR53C400_register_offset-8    /* rw */
-
+/* NCR 53C400(A) Control Status Register bits: */
 #define CSR_RESET              0x80    /* wo  Resets 53c400 */
 #define CSR_53C80_REG          0x80    /* ro  5380 registers busy */
 #define CSR_TRANS_DIR          0x40    /* rw  Data transfer direction */
 #define CSR_BASE CSR_53C80_INTR
 #endif
 
-/* Number of 128-byte blocks to be transferred */
-#define C400_BLOCK_COUNTER_REG   NCR53C400_register_offset-7   /* rw */
-
-/* Resume transfer after disconnect */
-#define C400_RESUME_TRANSFER_REG NCR53C400_register_offset-6   /* wo */
-
-/* Access to host buffer stack */
-#define C400_HOST_BUFFER         NCR53C400_register_offset-4   /* rw */
-
-
 /* Note : PHASE_* macros are based on the values of the STATUS register */
 #define PHASE_MASK     (SR_MSG | SR_CD | SR_IO)
 
 
 #define PHASE_SR_TO_TCR(phase) ((phase) >> 2)
 
-/*
- * The internal should_disconnect() function returns these based on the 
- * expected length of a disconnect if a device supports disconnect/
- * reconnect.
- */
-
-#define DISCONNECT_NONE                0
-#define DISCONNECT_TIME_TO_DATA        1
-#define DISCONNECT_LONG                2
-
 /* 
  * "Special" value for the (unsigned char) command tag, to indicate
  * I_T_L nexus instead of I_T_L_Q.
 #define NO_IRQ         0
 #endif
 
-#define FLAG_HAS_LAST_BYTE_SENT                1       /* NCR53c81 or better */
-#define FLAG_CHECK_LAST_BYTE_SENT      2       /* Only test once */
-#define FLAG_NCR53C400                 4       /* NCR53c400 */
+#define FLAG_NO_DMA_FIXUP              1       /* No DMA errata workarounds */
 #define FLAG_NO_PSEUDO_DMA             8       /* Inhibit DMA */
-#define FLAG_DTC3181E                  16      /* DTC3181E */
 #define FLAG_LATE_DMA_SETUP            32      /* Setup NCR before DMA H/W */
 #define FLAG_TAGGED_QUEUING            64      /* as X3T9.2 spelled it */
-
-#ifndef ASM
+#define FLAG_TOSHIBA_DELAY             128     /* Allow for borken CD-ROMs */
 
 #ifdef SUPPORT_TAGS
 struct tag_alloc {
@@ -258,33 +238,24 @@ struct NCR5380_hostdata {
        NCR5380_implementation_fields;          /* implementation specific */
        struct Scsi_Host *host;                 /* Host backpointer */
        unsigned char id_mask, id_higher_mask;  /* 1 << id, all bits greater */
-       unsigned char targets_present;          /* targets we have connected
-                                                  to, so we can call a select
-                                                  failure a retryable condition */
-       volatile unsigned char busy[8];         /* index = target, bit = lun */
+       unsigned char busy[8];                  /* index = target, bit = lun */
 #if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-       volatile int dma_len;                   /* requested length of DMA */
+       int dma_len;                            /* requested length of DMA */
 #endif
-       volatile unsigned char last_message;    /* last message OUT */
-       volatile struct scsi_cmnd *connected;   /* currently connected command */
-       volatile struct scsi_cmnd *issue_queue; /* waiting to be issued */
-       volatile struct scsi_cmnd *disconnected_queue;  /* waiting for reconnect */
-       volatile int restart_select;            /* we have disconnected,
-                                                  used to restart 
-                                                  NCR5380_select() */
-       volatile unsigned aborted:1;            /* flag, says aborted */
+       unsigned char last_message;             /* last message OUT */
+       struct scsi_cmnd *connected;            /* currently connected cmnd */
+       struct scsi_cmnd *selecting;            /* cmnd to be connected */
+       struct list_head unissued;              /* waiting to be issued */
+       struct list_head autosense;             /* priority issue queue */
+       struct list_head disconnected;          /* waiting for reconnect */
+       spinlock_t lock;                        /* protects this struct */
        int flags;
-       unsigned long time_expires;             /* in jiffies, set prior to sleeping */
-       int select_time;                        /* timer in select for target response */
-       volatile struct scsi_cmnd *selecting;
-       struct delayed_work coroutine;          /* our co-routine */
        struct scsi_eh_save ses;
+       struct scsi_cmnd *sensing;
        char info[256];
        int read_overruns;                /* number of bytes to cut from a
                                           * transfer to handle chip overruns */
-       int retain_dma_intr;
        struct work_struct main_task;
-       volatile int main_running;
 #ifdef SUPPORT_TAGS
        struct tag_alloc TagAlloc[8][8];        /* 8 targets and 8 LUNs */
 #endif
@@ -292,10 +263,23 @@ struct NCR5380_hostdata {
        unsigned spin_max_r;
        unsigned spin_max_w;
 #endif
+       struct workqueue_struct *work_q;
+       unsigned long accesses_per_ms;  /* chip register accesses per ms */
 };
 
 #ifdef __KERNEL__
 
+struct NCR5380_cmd {
+       struct list_head list;
+};
+
+#define NCR5380_CMD_SIZE               (sizeof(struct NCR5380_cmd))
+
+static inline struct scsi_cmnd *NCR5380_to_scmd(struct NCR5380_cmd *ncmd_ptr)
+{
+       return ((struct scsi_cmnd *)ncmd_ptr) - 1;
+}
+
 #ifndef NDEBUG
 #define NDEBUG (0)
 #endif
@@ -304,6 +288,11 @@ struct NCR5380_hostdata {
        do { if ((NDEBUG) & (flg)) \
                printk(KERN_DEBUG fmt, ## __VA_ARGS__); } while (0)
 
+#define dsprintk(flg, host, fmt, ...) \
+       do { if ((NDEBUG) & (flg)) \
+               shost_printk(KERN_DEBUG, host, fmt, ## __VA_ARGS__); \
+       } while (0)
+
 #if NDEBUG
 #define NCR5380_dprint(flg, arg) \
        do { if ((NDEBUG) & (flg)) NCR5380_print(arg); } while (0)
@@ -320,6 +309,7 @@ static void NCR5380_print(struct Scsi_Host *instance);
 static int NCR5380_probe_irq(struct Scsi_Host *instance, int possible);
 #endif
 static int NCR5380_init(struct Scsi_Host *instance, int flags);
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *);
 static void NCR5380_exit(struct Scsi_Host *instance);
 static void NCR5380_information_transfer(struct Scsi_Host *instance);
 #ifndef DONT_USE_INTR
@@ -328,7 +318,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id);
 static void NCR5380_main(struct work_struct *work);
 static const char *NCR5380_info(struct Scsi_Host *instance);
 static void NCR5380_reselect(struct Scsi_Host *instance);
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd);
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *);
 #if defined(PSEUDO_DMA) || defined(REAL_DMA) || defined(REAL_DMA_POLL)
 static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
 #endif
@@ -443,5 +433,4 @@ static __inline__ int NCR5380_pc_dma_residual(struct Scsi_Host *instance)
 #endif                         /* defined(i386) || defined(__alpha__) */
 #endif                         /* defined(REAL_DMA)  */
 #endif                         /* __KERNEL__ */
-#endif                         /* ndef ASM */
 #endif                         /* NCR5380_H */
index d28d6c0f18c0b76acd384f53ed6132a54f13fa57..221f18c5df9363c704d8289c2baab4b10dd89db6 100644 (file)
@@ -4,9 +4,7 @@
  * Copyright 1995-2002, Russell King
  */
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/ioport.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 
 
 #include <scsi/scsi_host.h>
 
-#include <scsi/scsicam.h>
-
 #define PSEUDO_DMA
 
 #define priv(host)                     ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare()                struct Scsi_Host *_instance
-#define NCR5380_setup(instance)                _instance = instance
-#define NCR5380_read(reg)              cumanascsi_read(_instance, reg)
-#define NCR5380_write(reg, value)      cumanascsi_write(_instance, reg, value)
+#define NCR5380_read(reg)              cumanascsi_read(instance, reg)
+#define NCR5380_write(reg, value)      cumanascsi_write(instance, reg, value)
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
+
 #define NCR5380_intr                   cumanascsi_intr
 #define NCR5380_queue_command          cumanascsi_queue_command
 #define NCR5380_info                   cumanascsi_info
@@ -211,6 +208,8 @@ static struct scsi_host_template cumanascsi_template = {
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
        .proc_name              = "CumanaSCSI-1",
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int cumanascsi1_probe(struct expansion_card *ec,
@@ -240,23 +239,21 @@ static int cumanascsi1_probe(struct expansion_card *ec,
 
        host->irq = ec->irq;
 
-       NCR5380_init(host, 0);
+       ret = NCR5380_init(host, 0);
+       if (ret)
+               goto out_unmap;
+
+       NCR5380_maybe_reset_bus(host);
 
         priv(host)->ctrl = 0;
         writeb(0, priv(host)->base + CTRL);
 
-       host->n_io_port = 255;
-       if (!(request_region(host->io_port, host->n_io_port, "CumanaSCSI-1"))) {
-               ret = -EBUSY;
-               goto out_unmap;
-       }
-
        ret = request_irq(host->irq, cumanascsi_intr, 0,
                          "CumanaSCSI-1", host);
        if (ret) {
                printk("scsi%d: IRQ%d not free: %d\n",
                    host->host_no, host->irq, ret);
-               goto out_unmap;
+               goto out_exit;
        }
 
        ret = scsi_add_host(host, &ec->dev);
@@ -268,6 +265,8 @@ static int cumanascsi1_probe(struct expansion_card *ec,
 
  out_free_irq:
        free_irq(host->irq, host);
+ out_exit:
+       NCR5380_exit(host);
  out_unmap:
        iounmap(priv(host)->base);
        iounmap(priv(host)->dma);
index 7c6fa1479c9c1f4c92cd98c891e800d8b7a12ecb..1fab1d1896b1a7e81a53d2dc664e6f52a712b6d4 100644 (file)
@@ -5,9 +5,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/ioport.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 
 #define DONT_USE_INTR
 
 #define priv(host)                     ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare()                void __iomem *_base
-#define NCR5380_setup(host)            _base = priv(host)->base
 
-#define NCR5380_read(reg)              readb(_base + ((reg) << 2))
-#define NCR5380_write(reg, value)      writeb(value, _base + ((reg) << 2))
+#define NCR5380_read(reg) \
+       readb(priv(instance)->base + ((reg) << 2))
+#define NCR5380_write(reg, value) \
+       writeb(value, priv(instance)->base + ((reg) << 2))
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
+
 #define NCR5380_queue_command          oakscsi_queue_command
 #define NCR5380_info                   oakscsi_info
-#define NCR5380_show_info              oakscsi_show_info
 
 #define NCR5380_implementation_fields  \
        void __iomem *base
@@ -103,7 +103,6 @@ printk("reading %p len %d\n", addr, len);
 
 static struct scsi_host_template oakscsi_template = {
        .module                 = THIS_MODULE,
-       .show_info              = oakscsi_show_info,
        .name                   = "Oak 16-bit SCSI",
        .info                   = oakscsi_info,
        .queuecommand           = oakscsi_queue_command,
@@ -115,6 +114,8 @@ static struct scsi_host_template oakscsi_template = {
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
        .proc_name              = "oakscsi",
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
@@ -142,15 +143,21 @@ static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
        host->irq = NO_IRQ;
        host->n_io_port = 255;
 
-       NCR5380_init(host, 0);
+       ret = NCR5380_init(host, 0);
+       if (ret)
+               goto out_unmap;
+
+       NCR5380_maybe_reset_bus(host);
 
        ret = scsi_add_host(host, &ec->dev);
        if (ret)
-               goto out_unmap;
+               goto out_exit;
 
        scsi_scan_host(host);
        goto out;
 
+ out_exit:
+       NCR5380_exit(host);
  out_unmap:
        iounmap(priv(host)->base);
  unreg:
index db87ece6edb2cd317d463c1afb21f7b9a9942b33..e65478651ca94030ac2b5d2671eecc7e7d4456e3 100644 (file)
@@ -1,15 +1,15 @@
 /*
  * NCR 5380 generic driver routines.  These should make it *trivial*
- *     to implement 5380 SCSI drivers under Linux with a non-trantor
- *     architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
  *
- *     Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
  *
  * Copyright 1993, Drew Eckhardt
- *     Visionary Computing
- *     (Unix and Linux consulting and custom programming)
- *     drew@colorado.edu
- *     +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
  *
  * For more information, please consult
  *
  * 1+ (800) 334-5454
  */
 
-/*
- * ++roman: To port the 5380 driver to the Atari, I had to do some changes in
- * this file, too:
- *
- *  - Some of the debug statements were incorrect (undefined variables and the
- *    like). I fixed that.
- *
- *  - In information_transfer(), I think a #ifdef was wrong. Looking at the
- *    possible DMA transfer size should also happen for REAL_DMA. I added this
- *    in the #if statement.
- *
- *  - When using real DMA, information_transfer() should return in a DATAOUT
- *    phase after starting the DMA. It has nothing more to do.
- *
- *  - The interrupt service routine should run main after end of DMA, too (not
- *    only after RESELECTION interrupts). Additionally, it should _not_ test
- *    for more interrupts after running main, since a DMA process may have
- *    been started and interrupts are turned on now. The new int could happen
- *    inside the execution of NCR5380_intr(), leading to recursive
- *    calls.
- *
- *  - I've added a function merge_contiguous_buffers() that tries to
- *    merge scatter-gather buffers that are located at contiguous
- *    physical addresses and can be processed with the same DMA setup.
- *    Since most scatter-gather operations work on a page (4K) of
- *    4 buffers (1K), in more than 90% of all cases three interrupts and
- *    DMA setup actions are saved.
- *
- * - I've deleted all the stuff for AUTOPROBE_IRQ, REAL_DMA_POLL, PSEUDO_DMA
- *    and USLEEP, because these were messing up readability and will never be
- *    needed for Atari SCSI.
- *
- * - I've revised the NCR5380_main() calling scheme (relax the 'main_running'
- *   stuff), and 'main' is executed in a bottom half if awoken by an
- *   interrupt.
- *
- * - The code was quite cluttered up by "#if (NDEBUG & NDEBUG_*) printk..."
- *   constructs. In my eyes, this made the source rather unreadable, so I
- *   finally replaced that by the *_PRINTK() macros.
- *
- */
-
-/*
- * Further development / testing that should be done :
- * 1.  Test linked command handling code after Eric is ready with
- *     the high level code.
- */
+/* Ported to Atari by Roman Hodek and others. */
 
 /* Adapted for the sun3 by Sam Creasey. */
 
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x, y)                                             \
-       do {                                                    \
-               printk("LINE:%d   Adding %p to %p\n",           \
-                      __LINE__, (void*)(x), (void*)(y));       \
-               if ((x) == (y))                                 \
-                       udelay(5);                              \
-       } while (0)
-#define REMOVE(w, x, y, z)                                     \
-       do {                                                    \
-               printk("LINE:%d   Removing: %p->%p  %p->%p \n", \
-                      __LINE__, (void*)(w), (void*)(x),        \
-                      (void*)(y), (void*)(z));                 \
-               if ((x) == (y))                                 \
-                       udelay(5);                              \
-       } while (0)
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
-
-#ifndef notyet
-#undef LINKED
-#endif
-
 /*
  * Design
  *
  * piece of hardware that requires you to sit in a loop polling for
  * the REQ signal as long as you are connected.  Some devices are
  * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
- * while doing long seek operations.
- *
- * The workaround for this is to keep track of devices that have
- * disconnected.  If the device hasn't disconnected, for commands that
- * should disconnect, we do something like
- *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- *
- * Some tweaking of N and M needs to be done.  An algorithm based
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these
+ * while doing long seek operations. [...] These
  * broken devices are the exception rather than the rule and I'd rather
  * spend my time optimizing for the normal case.
  *
  *
  * These macros control options :
  * AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- *     for commands that return with a CHECK CONDITION status.
+ * for commands that return with a CHECK CONDITION status.
  *
  * DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- *     transceivers.
- *
- * LINKED - if defined, linked commands are supported.
+ * transceivers.
  *
  * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
  *
  * NCR5380_write(register, value) - write to the specific register
  *
  * NCR5380_implementation_fields  - additional fields needed for this
- *      specific implementation of the NCR5380
+ * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
  * REAL functions :
  * NCR5380_REAL_DMA should be defined if real DMA is to be used.
  * Note that the DMA setup functions should return the number of bytes
- *     that they were able to program the controller for.
+ * that they were able to program the controller for.
  *
  * Also note that generic i386/PC versions of these macros are
- *     available as NCR5380_i386_dma_write_setup,
- *     NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * possible) function may be used.
  */
 
-/* Macros ease life... :-) */
-#define        SETUP_HOSTDATA(in)                              \
-    struct NCR5380_hostdata *hostdata =                        \
-       (struct NCR5380_hostdata *)(in)->hostdata
-#define        HOSTDATA(in) ((struct NCR5380_hostdata *)(in)->hostdata)
-
-#define        NEXT(cmd)               ((struct scsi_cmnd *)(cmd)->host_scribble)
-#define        SET_NEXT(cmd,next)      ((cmd)->host_scribble = (void *)(next))
-#define        NEXTADDR(cmd)           ((struct scsi_cmnd **)&(cmd)->host_scribble)
-
-#define        HOSTNO          instance->host_no
-#define        H_NO(cmd)       (cmd)->device->host->host_no
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
 
 #ifdef SUPPORT_TAGS
 
  * cannot know it in advance :-( We just see a QUEUE_FULL status being
  * returned. So, in this case, the driver internal queue size assumption is
  * reduced to the number of active tags if QUEUE_FULL is returned by the
- * target. The command is returned to the mid-level, but with status changed
- * to BUSY, since --as I've seen-- the mid-level can't handle QUEUE_FULL
- * correctly.
+ * target.
  *
  * We're also not allowed running tagged commands as long as an untagged
  * command is active. And REQUEST SENSE commands after a contingent allegiance
@@ -304,7 +206,8 @@ static void __init init_tags(struct NCR5380_hostdata *hostdata)
 static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        if (hostdata->busy[cmd->device->id] & (1 << lun))
                return 1;
@@ -314,8 +217,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
                return 0;
        if (hostdata->TagAlloc[scmd_id(cmd)][lun].nr_allocated >=
            hostdata->TagAlloc[scmd_id(cmd)][lun].queue_size) {
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d: no free tags\n",
-                          H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d: no free tags\n",
+                        scmd_id(cmd), lun);
                return 1;
        }
        return 0;
@@ -330,7 +233,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
 static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        /* If we or the target don't support tagged queuing, allocate the LUN for
         * an untagged command.
@@ -340,18 +244,16 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
            !cmd->device->tagged_supported) {
                cmd->tag = TAG_NONE;
                hostdata->busy[cmd->device->id] |= (1 << lun);
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d now allocated by untagged "
-                          "command\n", H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d now allocated by untagged command\n",
+                        scmd_id(cmd), lun);
        } else {
                struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
 
                cmd->tag = find_first_zero_bit(ta->allocated, MAX_TAGS);
                set_bit(cmd->tag, ta->allocated);
                ta->nr_allocated++;
-               dprintk(NDEBUG_TAGS, "scsi%d: using tag %d for target %d lun %d "
-                          "(now %d tags in use)\n",
-                          H_NO(cmd), cmd->tag, cmd->device->id,
-                          lun, ta->nr_allocated);
+               dsprintk(NDEBUG_TAGS, instance, "using tag %d for target %d lun %d (%d tags allocated)\n",
+                        cmd->tag, scmd_id(cmd), lun, ta->nr_allocated);
        }
 }
 
@@ -363,21 +265,22 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
 static void cmd_free_tag(struct scsi_cmnd *cmd)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        if (cmd->tag == TAG_NONE) {
                hostdata->busy[cmd->device->id] &= ~(1 << lun);
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d untagged cmd finished\n",
-                          H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d untagged cmd freed\n",
+                        scmd_id(cmd), lun);
        } else if (cmd->tag >= MAX_TAGS) {
-               printk(KERN_NOTICE "scsi%d: trying to free bad tag %d!\n",
-                      H_NO(cmd), cmd->tag);
+               shost_printk(KERN_NOTICE, instance,
+                            "trying to free bad tag %d!\n", cmd->tag);
        } else {
                struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
                clear_bit(cmd->tag, ta->allocated);
                ta->nr_allocated--;
-               dprintk(NDEBUG_TAGS, "scsi%d: freed tag %d for target %d lun %d\n",
-                          H_NO(cmd), cmd->tag, cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "freed tag %d for target %d lun %d\n",
+                        cmd->tag, scmd_id(cmd), lun);
        }
 }
 
@@ -401,17 +304,15 @@ static void free_all_tags(struct NCR5380_hostdata *hostdata)
 
 #endif /* SUPPORT_TAGS */
 
-
-/*
- * Function: void merge_contiguous_buffers( struct scsi_cmnd *cmd )
- *
- * Purpose: Try to merge several scatter-gather requests into one DMA
- *    transfer. This is possible if the scatter buffers lie on
- *    physical contiguous addresses.
- *
- * Parameters: struct scsi_cmnd *cmd
- *    The command to work on. The first scatter buffer's data are
- *    assumed to be already transferred into ptr/this_residual.
+/**
+ * merge_contiguous_buffers - coalesce scatter-gather list entries
+ * @cmd: command requesting IO
+ *
+ * Try to merge several scatter-gather buffers into one DMA transfer.
+ * This is possible if the scatter buffers lie on physically
+ * contiguous addresses. The first scatter-gather buffer's data are
+ * assumed to be already transferred into cmd->SCp.this_residual.
+ * Every buffer merged avoids an interrupt and a DMA setup operation.
  */
 
 static void merge_contiguous_buffers(struct scsi_cmnd *cmd)
@@ -463,9 +364,7 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.buffers_residual = scsi_sg_count(cmd) - 1;
                cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
                cmd->SCp.this_residual = cmd->SCp.buffer->length;
-               /* ++roman: Try to merge some scatter-buffers if they are at
-                * contiguous physical addresses.
-                */
+
                merge_contiguous_buffers(cmd);
        } else {
                cmd->SCp.buffer = NULL;
@@ -473,31 +372,110 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.ptr = NULL;
                cmd->SCp.this_residual = 0;
        }
+
+       cmd->SCp.Status = 0;
+       cmd->SCp.Message = 0;
+}
+
+/**
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
+ */
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+                                  int reg1, int bit1, int val1,
+                                  int reg2, int bit2, int val2, int wait)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long deadline = jiffies + wait;
+       unsigned long n;
+
+       /* Busy-wait for up to 10 ms */
+       n = min(10000U, jiffies_to_usecs(wait));
+       n *= hostdata->accesses_per_ms;
+       n /= 2000;
+       do {
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
+                       return 0;
+               cpu_relax();
+       } while (n--);
+
+       if (irqs_disabled() || in_interrupt())
+               return -ETIMEDOUT;
+
+       /* Repeatedly sleep for 1 ms until deadline */
+       while (time_is_after_jiffies(deadline)) {
+               schedule_timeout_uninterruptible(1);
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
+                       return 0;
+       }
+
+       return -ETIMEDOUT;
 }
 
-#include <linux/delay.h>
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+                                        int reg, int bit, int val, int wait)
+{
+       return NCR5380_poll_politely2(instance, reg, bit, val,
+                                               reg, bit, val, wait);
+}
 
 #if NDEBUG
 static struct {
        unsigned char mask;
        const char *name;
 } signals[] = {
-       { SR_DBP, "PARITY"}, { SR_RST, "RST" }, { SR_BSY, "BSY" },
-       { SR_REQ, "REQ" }, { SR_MSG, "MSG" }, { SR_CD,  "CD" }, { SR_IO, "IO" },
-       { SR_SEL, "SEL" }, {0, NULL}
-}, basrs[] = {
-       {BASR_ATN, "ATN"}, {BASR_ACK, "ACK"}, {0, NULL}
-}, icrs[] = {
-       {ICR_ASSERT_RST, "ASSERT RST"},{ICR_ASSERT_ACK, "ASSERT ACK"},
-       {ICR_ASSERT_BSY, "ASSERT BSY"}, {ICR_ASSERT_SEL, "ASSERT SEL"},
-       {ICR_ASSERT_ATN, "ASSERT ATN"}, {ICR_ASSERT_DATA, "ASSERT DATA"},
+       {SR_DBP, "PARITY"},
+       {SR_RST, "RST"},
+       {SR_BSY, "BSY"},
+       {SR_REQ, "REQ"},
+       {SR_MSG, "MSG"},
+       {SR_CD, "CD"},
+       {SR_IO, "IO"},
+       {SR_SEL, "SEL"},
        {0, NULL}
-}, mrs[] = {
-       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, {MR_TARGET, "MODE TARGET"},
-       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, {MR_ENABLE_PAR_INTR,
-       "MODE PARITY INTR"}, {MR_ENABLE_EOP_INTR,"MODE EOP INTR"},
+},
+basrs[] = {
+       {BASR_ATN, "ATN"},
+       {BASR_ACK, "ACK"},
+       {0, NULL}
+},
+icrs[] = {
+       {ICR_ASSERT_RST, "ASSERT RST"},
+       {ICR_ASSERT_ACK, "ASSERT ACK"},
+       {ICR_ASSERT_BSY, "ASSERT BSY"},
+       {ICR_ASSERT_SEL, "ASSERT SEL"},
+       {ICR_ASSERT_ATN, "ASSERT ATN"},
+       {ICR_ASSERT_DATA, "ASSERT DATA"},
+       {0, NULL}
+},
+mrs[] = {
+       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+       {MR_TARGET, "MODE TARGET"},
+       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+       {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
        {MR_MONITOR_BSY, "MODE MONITOR BSY"},
-       {MR_DMA_MODE, "MODE DMA"}, {MR_ARBITRATE, "MODE ARBITRATION"},
+       {MR_DMA_MODE, "MODE DMA"},
+       {MR_ARBITRATE, "MODE ARBITRATION"},
        {0, NULL}
 };
 
@@ -511,15 +489,13 @@ static struct {
 static void NCR5380_print(struct Scsi_Host *instance)
 {
        unsigned char status, data, basr, mr, icr, i;
-       unsigned long flags;
 
-       local_irq_save(flags);
        data = NCR5380_read(CURRENT_SCSI_DATA_REG);
        status = NCR5380_read(STATUS_REG);
        mr = NCR5380_read(MODE_REG);
        icr = NCR5380_read(INITIATOR_COMMAND_REG);
        basr = NCR5380_read(BUS_AND_STATUS_REG);
-       local_irq_restore(flags);
+
        printk("STATUS_REG: %02x ", status);
        for (i = 0; signals[i].mask; ++i)
                if (status & signals[i].mask)
@@ -543,8 +519,12 @@ static struct {
        unsigned char value;
        const char *name;
 } phases[] = {
-       {PHASE_DATAOUT, "DATAOUT"}, {PHASE_DATAIN, "DATAIN"}, {PHASE_CMDOUT, "CMDOUT"},
-       {PHASE_STATIN, "STATIN"}, {PHASE_MSGOUT, "MSGOUT"}, {PHASE_MSGIN, "MSGIN"},
+       {PHASE_DATAOUT, "DATAOUT"},
+       {PHASE_DATAIN, "DATAIN"},
+       {PHASE_CMDOUT, "CMDOUT"},
+       {PHASE_STATIN, "STATIN"},
+       {PHASE_MSGOUT, "MSGOUT"},
+       {PHASE_MSGIN, "MSGIN"},
        {PHASE_UNKNOWN, "UNKNOWN"}
 };
 
@@ -553,8 +533,6 @@ static struct {
  * @instance: adapter to dump
  *
  * Print the current SCSI phase for debugging purposes
- *
- * Locks: none
  */
 
 static void NCR5380_print_phase(struct Scsi_Host *instance)
@@ -564,54 +542,21 @@ static void NCR5380_print_phase(struct Scsi_Host *instance)
 
        status = NCR5380_read(STATUS_REG);
        if (!(status & SR_REQ))
-               printk(KERN_DEBUG "scsi%d: REQ not asserted, phase unknown.\n", HOSTNO);
+               shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
        else {
                for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
                     (phases[i].value != (status & PHASE_MASK)); ++i)
                        ;
-               printk(KERN_DEBUG "scsi%d: phase %s\n", HOSTNO, phases[i].name);
+               shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
        }
 }
-
 #endif
 
-/*
- * ++roman: New scheme of calling NCR5380_main()
- *
- * If we're not in an interrupt, we can call our main directly, it cannot be
- * already running. Else, we queue it on a task queue, if not 'main_running'
- * tells us that a lower level is already executing it. This way,
- * 'main_running' needs not be protected in a special way.
- *
- * queue_main() is a utility function for putting our main onto the task
- * queue, if main_running is false. It should be called only from a
- * interrupt or bottom half.
- */
-
-#include <linux/gfp.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-static inline void queue_main(struct NCR5380_hostdata *hostdata)
-{
-       if (!hostdata->main_running) {
-               /* If in interrupt and NCR5380_main() not already running,
-                  queue it on the 'immediate' task queue, to be processed
-                  immediately after the current interrupt processing has
-                  finished. */
-               schedule_work(&hostdata->main_task);
-       }
-       /* else: nothing to do: the running NCR5380_main() will pick up
-          any newly queued command. */
-}
-
 /**
  * NCR58380_info - report driver and host information
  * @instance: relevant scsi host instance
  *
  * For use as the host template info() handler.
- *
- * Locks: none
  */
 
 static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -630,13 +575,14 @@ static void prepare_info(struct Scsi_Host *instance)
                 "base 0x%lx, irq %d, "
                 "can_queue %d, cmd_per_lun %d, "
                 "sg_tablesize %d, this_id %d, "
-                "flags { %s}, "
+                "flags { %s%s}, "
                 "options { %s} ",
                 instance->hostt->name, instance->io_port, instance->n_io_port,
                 instance->base, instance->irq,
                 instance->can_queue, instance->cmd_per_lun,
                 instance->sg_tablesize, instance->this_id,
                 hostdata->flags & FLAG_TAGGED_QUEUING ? "TAGGED_QUEUING " : "",
+                hostdata->flags & FLAG_TOSHIBA_DELAY  ? "TOSHIBA_DELAY "  : "",
 #ifdef DIFFERENTIAL
                 "DIFFERENTIAL "
 #endif
@@ -652,102 +598,6 @@ static void prepare_info(struct Scsi_Host *instance)
                 "");
 }
 
-/**
- * NCR5380_print_status - dump controller info
- * @instance: controller to dump
- *
- * Print commands in the various queues, called from NCR5380_abort
- * to aid debugging.
- */
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd)
-{
-       int i, s;
-       unsigned char *command;
-       printk("scsi%d: destination target %d, lun %llu\n",
-               H_NO(cmd), cmd->device->id, cmd->device->lun);
-       printk(KERN_CONT "        command = ");
-       command = cmd->cmnd;
-       printk(KERN_CONT "%2d (0x%02x)", command[0], command[0]);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               printk(KERN_CONT " %02x", command[i]);
-       printk("\n");
-}
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-       unsigned long flags;
-
-       NCR5380_dprint(NDEBUG_ANY, instance);
-       NCR5380_dprint_phase(NDEBUG_ANY, instance);
-
-       hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
-       local_irq_save(flags);
-       printk("NCR5380: coroutine is%s running.\n",
-               hostdata->main_running ? "" : "n't");
-       if (!hostdata->connected)
-               printk("scsi%d: no currently connected command\n", HOSTNO);
-       else
-               lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected);
-       printk("scsi%d: issue_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
-               lprint_Scsi_Cmnd(ptr);
-
-       printk("scsi%d: disconnected_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
-            ptr = NEXT(ptr))
-               lprint_Scsi_Cmnd(ptr);
-
-       local_irq_restore(flags);
-       printk("\n");
-}
-
-static void show_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
-       int i, s;
-       unsigned char *command;
-       seq_printf(m, "scsi%d: destination target %d, lun %llu\n",
-               H_NO(cmd), cmd->device->id, cmd->device->lun);
-       seq_puts(m, "        command = ");
-       command = cmd->cmnd;
-       seq_printf(m, "%2d (0x%02x)", command[0], command[0]);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               seq_printf(m, " %02x", command[i]);
-       seq_putc(m, '\n');
-}
-
-static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-                                            struct Scsi_Host *instance)
-{
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-       unsigned long flags;
-
-       hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
-       local_irq_save(flags);
-       seq_printf(m, "NCR5380: coroutine is%s running.\n",
-               hostdata->main_running ? "" : "n't");
-       if (!hostdata->connected)
-               seq_printf(m, "scsi%d: no currently connected command\n", HOSTNO);
-       else
-               show_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
-       seq_printf(m, "scsi%d: issue_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
-               show_Scsi_Cmnd(ptr, m);
-
-       seq_printf(m, "scsi%d: disconnected_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
-            ptr = NEXT(ptr))
-               show_Scsi_Cmnd(ptr, m);
-
-       local_irq_restore(flags);
-       return 0;
-}
-
 /**
  * NCR5380_init - initialise an NCR5380
  * @instance: adapter to configure
@@ -764,11 +614,11 @@ static int __maybe_unused NCR5380_show_info(struct seq_file *m,
 
 static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int i;
-       SETUP_HOSTDATA(instance);
+       unsigned long deadline;
 
        hostdata->host = instance;
-       hostdata->aborted = 0;
        hostdata->id_mask = 1 << instance->this_id;
        hostdata->id_higher_mask = 0;
        for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
@@ -782,13 +632,21 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
 #if defined (REAL_DMA)
        hostdata->dma_len = 0;
 #endif
-       hostdata->targets_present = 0;
+       spin_lock_init(&hostdata->lock);
        hostdata->connected = NULL;
-       hostdata->issue_queue = NULL;
-       hostdata->disconnected_queue = NULL;
+       hostdata->sensing = NULL;
+       INIT_LIST_HEAD(&hostdata->autosense);
+       INIT_LIST_HEAD(&hostdata->unissued);
+       INIT_LIST_HEAD(&hostdata->disconnected);
+
        hostdata->flags = flags;
 
        INIT_WORK(&hostdata->main_task, NCR5380_main);
+       hostdata->work_q = alloc_workqueue("ncr5380_%d",
+                               WQ_UNBOUND | WQ_MEM_RECLAIM,
+                               1, instance->host_no);
+       if (!hostdata->work_q)
+               return -ENOMEM;
 
        prepare_info(instance);
 
@@ -797,6 +655,72 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       /* Calibrate register polling loop */
+       i = 0;
+       deadline = jiffies + 1;
+       do {
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       deadline += msecs_to_jiffies(256);
+       do {
+               NCR5380_read(STATUS_REG);
+               ++i;
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       hostdata->accesses_per_ms = i / 256;
+
+       return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int pass;
+
+       for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
+               switch (pass) {
+               case 1:
+               case 3:
+               case 5:
+                       shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+                       NCR5380_poll_politely(instance,
+                                             STATUS_REG, SR_BSY, 0, 5 * HZ);
+                       break;
+               case 2:
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
+                       do_abort(instance);
+                       break;
+               case 4:
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
+                       do_reset(instance);
+                       /* Wait after a reset; the SCSI standard calls for
+                        * 250ms, we wait 500ms to be on the safe side.
+                        * But some Toshiba CD-ROMs need ten times that.
+                        */
+                       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+                               msleep(2500);
+                       else
+                               msleep(500);
+                       break;
+               case 6:
+                       shost_printk(KERN_ERR, instance, "bus locked solid\n");
+                       return -ENXIO;
+               }
+       }
        return 0;
 }
 
@@ -812,6 +736,38 @@ static void NCR5380_exit(struct Scsi_Host *instance)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        cancel_work_sync(&hostdata->main_task);
+       destroy_workqueue(hostdata->work_q);
+}
+
+/**
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+                         struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+       dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+       if (hostdata->sensing == cmd) {
+               /* Autosense processing ends here */
+               if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+                       set_host_byte(cmd, DID_ERROR);
+               } else
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               hostdata->sensing = NULL;
+       }
+
+#ifdef SUPPORT_TAGS
+       cmd_free_tag(cmd);
+#else
+       hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+#endif
+       cmd->scsi_done(cmd);
 }
 
 /**
@@ -819,7 +775,7 @@ static void NCR5380_exit(struct Scsi_Host *instance)
  * @instance: the relevant SCSI adapter
  * @cmd: SCSI command
  *
- * cmd is added to the per instance issue_queue, with minor
+ * cmd is added to the per-instance issue queue, with minor
  * twiddling done to the host specific fields of cmd.  If the
  * main coroutine is not running, it is restarted.
  */
@@ -828,44 +784,23 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
                                  struct scsi_cmnd *cmd)
 {
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       struct scsi_cmnd *tmp;
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
        unsigned long flags;
 
 #if (NDEBUG & NDEBUG_NO_WRITE)
        switch (cmd->cmnd[0]) {
        case WRITE_6:
        case WRITE_10:
-               printk(KERN_NOTICE "scsi%d: WRITE attempted with NO_WRITE debugging flag set\n",
-                      H_NO(cmd));
+               shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
                cmd->result = (DID_ERROR << 16);
                cmd->scsi_done(cmd);
                return 0;
        }
 #endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
-       /*
-        * We use the host_scribble field as a pointer to the next command
-        * in a queue
-        */
-
-       SET_NEXT(cmd, NULL);
        cmd->result = 0;
 
        /*
-        * Insert the cmd into the issue queue. Note that REQUEST SENSE
-        * commands are added to the head of the queue since any command will
-        * clear the contingent allegiance condition that exists and the
-        * sense data is only guaranteed to be valid while the condition exists.
-        */
-
-       /* ++guenther: now that the issue queue is being set up, we can lock ST-DMA.
-        * Otherwise a running NCR5380_main may steal the lock.
-        * Lock before actually inserting due to fairness reasons explained in
-        * atari_scsi.c. If we insert first, then it's impossible for this driver
-        * to release the lock.
-        * Stop timer for this command while waiting for the lock, or timeouts
-        * may happen (and they really do), and it's no good if the command doesn't
-        * appear in any of the queues.
         * ++roman: Just disabling the NCR interrupt isn't sufficient here,
         * because also a timer int can trigger an abort or reset, which would
         * alter queues and touch the lock.
@@ -873,7 +808,7 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
        if (!NCR5380_acquire_dma_irq(instance))
                return SCSI_MLQUEUE_HOST_BUSY;
 
-       local_irq_save(flags);
+       spin_lock_irqsave(&hostdata->lock, flags);
 
        /*
         * Insert the cmd into the issue queue. Note that REQUEST SENSE
@@ -882,33 +817,18 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
         * sense data is only guaranteed to be valid while the condition exists.
         */
 
-       if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
-               LIST(cmd, hostdata->issue_queue);
-               SET_NEXT(cmd, hostdata->issue_queue);
-               hostdata->issue_queue = cmd;
-       } else {
-               for (tmp = (struct scsi_cmnd *)hostdata->issue_queue;
-                    NEXT(tmp); tmp = NEXT(tmp))
-                       ;
-               LIST(cmd, tmp);
-               SET_NEXT(tmp, cmd);
-       }
-       local_irq_restore(flags);
+       if (cmd->cmnd[0] == REQUEST_SENSE)
+               list_add(&ncmd->list, &hostdata->unissued);
+       else
+               list_add_tail(&ncmd->list, &hostdata->unissued);
 
-       dprintk(NDEBUG_QUEUES, "scsi%d: command added to %s of queue\n", H_NO(cmd),
-                 (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-       /* If queue_command() is called from an interrupt (real one or bottom
-        * half), we let queue_main() do the job of taking care about main. If it
-        * is already running, this is a no-op, else main will be queued.
-        *
-        * If we're not in an interrupt, we can call NCR5380_main()
-        * unconditionally, because it cannot be already running.
-        */
-       if (in_interrupt() || irqs_disabled())
-               queue_main(hostdata);
-       else
-               NCR5380_main(&hostdata->main_task);
+       dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+                cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+
+       /* Kick off command processing */
+       queue_work(hostdata->work_q, &hostdata->main_task);
        return 0;
 }
 
@@ -917,13 +837,78 @@ static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        /* Caller does the locking needed to set & test these data atomically */
-       if (!hostdata->disconnected_queue &&
-           !hostdata->issue_queue &&
+       if (list_empty(&hostdata->disconnected) &&
+           list_empty(&hostdata->unissued) &&
+           list_empty(&hostdata->autosense) &&
            !hostdata->connected &&
-           !hostdata->retain_dma_intr)
+           !hostdata->selecting)
                NCR5380_release_dma_irq(instance);
 }
 
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *cmd;
+
+       if (list_empty(&hostdata->autosense)) {
+               list_for_each_entry(ncmd, &hostdata->unissued, list) {
+                       cmd = NCR5380_to_scmd(ncmd);
+                       dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+                                cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+                       if (
+#ifdef SUPPORT_TAGS
+                           !is_lun_busy(cmd, 1)
+#else
+                           !(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))
+#endif
+                       ) {
+                               list_del(&ncmd->list);
+                               dsprintk(NDEBUG_QUEUES, instance,
+                                        "dequeue: removed %p from issue queue\n", cmd);
+                               return cmd;
+                       }
+               }
+       } else {
+               /* Autosense processing begins here */
+               ncmd = list_first_entry(&hostdata->autosense,
+                                       struct NCR5380_cmd, list);
+               list_del(&ncmd->list);
+               cmd = NCR5380_to_scmd(ncmd);
+               dsprintk(NDEBUG_QUEUES, instance,
+                        "dequeue: removed %p from autosense queue\n", cmd);
+               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+               hostdata->sensing = cmd;
+               return cmd;
+       }
+       return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+       if (hostdata->sensing) {
+               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               list_add(&ncmd->list, &hostdata->autosense);
+               hostdata->sensing = NULL;
+       } else
+               list_add(&ncmd->list, &hostdata->unissued);
+}
+
 /**
  * NCR5380_main - NCR state machines
  *
@@ -931,8 +916,6 @@ static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
  * be done on the NCR5380 host adapters in a system.  Both
  * NCR5380_queue_command() and NCR5380_intr() will try to start it
  * in case it is not running.
- *
- * Locks: called as its own thread with no locks held.
  */
 
 static void NCR5380_main(struct work_struct *work)
@@ -940,154 +923,69 @@ static void NCR5380_main(struct work_struct *work)
        struct NCR5380_hostdata *hostdata =
                container_of(work, struct NCR5380_hostdata, main_task);
        struct Scsi_Host *instance = hostdata->host;
-       struct scsi_cmnd *tmp, *prev;
+       struct scsi_cmnd *cmd;
        int done;
-       unsigned long flags;
 
        /*
-        * We run (with interrupts disabled) until we're sure that none of
-        * the host adapters have anything that can be done, at which point
-        * we set main_running to 0 and exit.
-        *
-        * Interrupts are enabled before doing various other internal
-        * instructions, after we've decided that we need to run through
-        * the loop again.
-        *
-        * this should prevent any race conditions.
-        *
         * ++roman: Just disabling the NCR interrupt isn't sufficient here,
         * because also a timer int can trigger an abort or reset, which can
         * alter queues and touch the Falcon lock.
         */
 
-       /* Tell int handlers main() is now already executing.  Note that
-          no races are possible here. If an int comes in before
-          'main_running' is set here, and queues/executes main via the
-          task queue, it doesn't do any harm, just this instance of main
-          won't find any work left to do. */
-       if (hostdata->main_running)
-               return;
-       hostdata->main_running = 1;
-
-       local_save_flags(flags);
        do {
-               local_irq_disable();    /* Freeze request queues */
                done = 1;
 
-               if (!hostdata->connected) {
-                       dprintk(NDEBUG_MAIN, "scsi%d: not connected\n", HOSTNO);
-                       /*
-                        * Search through the issue_queue for a command destined
-                        * for a target that's not busy.
-                        */
-#if (NDEBUG & NDEBUG_LISTS)
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL;
-                            tmp && (tmp != prev); prev = tmp, tmp = NEXT(tmp))
-                               ;
-                       /*printk("%p  ", tmp);*/
-                       if ((tmp == prev) && tmp)
-                               printk(" LOOP\n");
-                       /* else printk("\n"); */
-#endif
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue,
-                            prev = NULL; tmp; prev = tmp, tmp = NEXT(tmp)) {
-                               u8 lun = tmp->device->lun;
-
-                               dprintk(NDEBUG_LISTS,
-                                       "MAIN tmp=%p target=%d busy=%d lun=%d\n",
-                                       tmp, scmd_id(tmp), hostdata->busy[scmd_id(tmp)],
-                                       lun);
-                               /*  When we find one, remove it from the issue queue. */
-                               /* ++guenther: possible race with Falcon locking */
-                               if (
-#ifdef SUPPORT_TAGS
-                                   !is_lun_busy( tmp, tmp->cmnd[0] != REQUEST_SENSE)
-#else
-                                   !(hostdata->busy[tmp->device->id] & (1 << lun))
-#endif
-                                   ) {
-                                       /* ++guenther: just to be sure, this must be atomic */
-                                       local_irq_disable();
-                                       if (prev) {
-                                               REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
-                                               SET_NEXT(prev, NEXT(tmp));
-                                       } else {
-                                               REMOVE(-1, hostdata->issue_queue, tmp, NEXT(tmp));
-                                               hostdata->issue_queue = NEXT(tmp);
-                                       }
-                                       SET_NEXT(tmp, NULL);
-                                       hostdata->retain_dma_intr++;
+               spin_lock_irq(&hostdata->lock);
+               while (!hostdata->connected &&
+                      (cmd = dequeue_next_cmd(instance))) {
 
-                                       /* reenable interrupts after finding one */
-                                       local_irq_restore(flags);
+                       dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
 
-                                       /*
-                                        * Attempt to establish an I_T_L nexus here.
-                                        * On success, instance->hostdata->connected is set.
-                                        * On failure, we must add the command back to the
-                                        *   issue queue so we can keep trying.
-                                        */
-                                       dprintk(NDEBUG_MAIN, "scsi%d: main(): command for target %d "
-                                                   "lun %d removed from issue_queue\n",
-                                                   HOSTNO, tmp->device->id, lun);
-                                       /*
-                                        * REQUEST SENSE commands are issued without tagged
-                                        * queueing, even on SCSI-II devices because the
-                                        * contingent allegiance condition exists for the
-                                        * entire unit.
-                                        */
-                                       /* ++roman: ...and the standard also requires that
-                                        * REQUEST SENSE command are untagged.
-                                        */
+                       /*
+                        * Attempt to establish an I_T_L nexus here.
+                        * On success, instance->hostdata->connected is set.
+                        * On failure, we must add the command back to the
+                        * issue queue so we can keep trying.
+                        */
+                       /*
+                        * REQUEST SENSE commands are issued without tagged
+                        * queueing, even on SCSI-II devices because the
+                        * contingent allegiance condition exists for the
+                        * entire unit.
+                        */
+                       /* ++roman: ...and the standard also requires that
+                        * REQUEST SENSE command are untagged.
+                        */
 
 #ifdef SUPPORT_TAGS
-                                       cmd_get_tag(tmp, tmp->cmnd[0] != REQUEST_SENSE);
+                       cmd_get_tag(cmd, cmd->cmnd[0] != REQUEST_SENSE);
 #endif
-                                       if (!NCR5380_select(instance, tmp)) {
-                                               local_irq_disable();
-                                               hostdata->retain_dma_intr--;
-                                               /* release if target did not response! */
-                                               maybe_release_dma_irq(instance);
-                                               local_irq_restore(flags);
-                                               break;
-                                       } else {
-                                               local_irq_disable();
-                                               LIST(tmp, hostdata->issue_queue);
-                                               SET_NEXT(tmp, hostdata->issue_queue);
-                                               hostdata->issue_queue = tmp;
+                       cmd = NCR5380_select(instance, cmd);
+                       if (!cmd) {
+                               dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
+                               maybe_release_dma_irq(instance);
+                       } else {
+                               dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+                                        "main: select failed, returning %p to queue\n", cmd);
+                               requeue_cmd(instance, cmd);
 #ifdef SUPPORT_TAGS
-                                               cmd_free_tag(tmp);
+                               cmd_free_tag(cmd);
 #endif
-                                               hostdata->retain_dma_intr--;
-                                               local_irq_restore(flags);
-                                               dprintk(NDEBUG_MAIN, "scsi%d: main(): select() failed, "
-                                                           "returned to issue_queue\n", HOSTNO);
-                                               if (hostdata->connected)
-                                                       break;
-                                       }
-                               } /* if target/lun/target queue is not busy */
-                       } /* for issue_queue */
-               } /* if (!hostdata->connected) */
-
+                       }
+               }
                if (hostdata->connected
 #ifdef REAL_DMA
                    && !hostdata->dma_len
 #endif
                    ) {
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_MAIN, "scsi%d: main: performing information transfer\n",
-                                   HOSTNO);
+                       dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
                        NCR5380_information_transfer(instance);
-                       dprintk(NDEBUG_MAIN, "scsi%d: main: done set false\n", HOSTNO);
                        done = 0;
                }
+               spin_unlock_irq(&hostdata->lock);
+               if (!done)
+                       cond_resched();
        } while (!done);
-
-       /* Better allow ints _after_ 'main_running' has been cleared, else
-          an interrupt could believe we'll pick up the work it left for
-          us, but we won't see it anymore here... */
-       hostdata->main_running = 0;
-       local_irq_restore(flags);
 }
 
 
@@ -1096,27 +994,20 @@ static void NCR5380_main(struct work_struct *work)
  * Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
  *
  * Purpose : Called by interrupt handler when DMA finishes or a phase
- *     mismatch occurs (which would finish the DMA transfer).
+ * mismatch occurs (which would finish the DMA transfer).
  *
  * Inputs : instance - this instance of the NCR5380.
- *
  */
 
 static void NCR5380_dma_complete(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int transferred;
        unsigned char **data;
-       volatile int *count;
+       int *count;
        int saved_data = 0, overrun = 0;
        unsigned char p;
 
-       if (!hostdata->connected) {
-               printk(KERN_WARNING "scsi%d: received end of DMA interrupt with "
-                      "no connected cmd\n", HOSTNO);
-               return;
-       }
-
        if (hostdata->read_overruns) {
                p = hostdata->connected->SCp.phase;
                if (p & SR_IO) {
@@ -1126,15 +1017,11 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
                            (BASR_PHASE_MATCH|BASR_ACK)) {
                                saved_data = NCR5380_read(INPUT_DATA_REG);
                                overrun = 1;
-                               dprintk(NDEBUG_DMA, "scsi%d: read overrun handled\n", HOSTNO);
+                               dsprintk(NDEBUG_DMA, instance, "read overrun handled\n");
                        }
                }
        }
 
-       dprintk(NDEBUG_DMA, "scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
-                  HOSTNO, NCR5380_read(BUS_AND_STATUS_REG),
-                  NCR5380_read(STATUS_REG));
-
 #if defined(CONFIG_SUN3)
        if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request)))) {
                pr_err("scsi%d: overrun in UDC counter -- not prepared to deal with this!\n",
@@ -1153,9 +1040,9 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
        }
 #endif
 
-       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
        transferred = hostdata->dma_len - NCR5380_dma_residual(instance);
        hostdata->dma_len = 0;
@@ -1194,140 +1081,160 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
  * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
  * from the disconnected queue, and restarting NCR5380_main()
  * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
  */
 
 static irqreturn_t NCR5380_intr(int irq, void *dev_id)
 {
        struct Scsi_Host *instance = dev_id;
-       int done = 1, handled = 0;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int handled = 0;
        unsigned char basr;
+       unsigned long flags;
 
-       dprintk(NDEBUG_INTR, "scsi%d: NCR5380 irq triggered\n", HOSTNO);
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-       /* Look for pending interrupts */
        basr = NCR5380_read(BUS_AND_STATUS_REG);
-       dprintk(NDEBUG_INTR, "scsi%d: BASR=%02x\n", HOSTNO, basr);
-       /* dispatch to appropriate routine if found and done=0 */
        if (basr & BASR_IRQ) {
-               NCR5380_dprint(NDEBUG_INTR, instance);
-               if ((NCR5380_read(STATUS_REG) & (SR_SEL|SR_IO)) == (SR_SEL|SR_IO)) {
-                       done = 0;
-                       dprintk(NDEBUG_INTR, "scsi%d: SEL interrupt\n", HOSTNO);
-                       NCR5380_reselect(instance);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else if (basr & BASR_PARITY_ERROR) {
-                       dprintk(NDEBUG_INTR, "scsi%d: PARITY interrupt\n", HOSTNO);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-                       dprintk(NDEBUG_INTR, "scsi%d: RESET interrupt\n", HOSTNO);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else {
-                       /*
-                        * The rest of the interrupt conditions can occur only during a
-                        * DMA transfer
-                        */
+               unsigned char mr = NCR5380_read(MODE_REG);
+               unsigned char sr = NCR5380_read(STATUS_REG);
+
+               dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+                        irq, basr, sr, mr);
 
 #if defined(REAL_DMA)
-                       /*
-                        * We should only get PHASE MISMATCH and EOP interrupts if we have
-                        * DMA enabled, so do a sanity check based on the current setting
-                        * of the MODE register.
+               if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+                       /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+                        * We ack IRQ after clearing Mode Register. Workarounds
+                        * for End of DMA errata need to happen in DMA Mode.
                         */
 
-                       if ((NCR5380_read(MODE_REG) & MR_DMA_MODE) &&
-                           ((basr & BASR_END_DMA_TRANSFER) ||
-                            !(basr & BASR_PHASE_MATCH))) {
+                       dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
 
-                               dprintk(NDEBUG_INTR, "scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
-                               NCR5380_dma_complete( instance );
-                               done = 0;
-                       } else
+                       if (hostdata->connected) {
+                               NCR5380_dma_complete(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
+                       } else {
+                               NCR5380_write(MODE_REG, MR_BASE);
+                               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+                       }
+               } else
 #endif /* REAL_DMA */
-                       {
-/* MS: Ignore unknown phase mismatch interrupts (caused by EOP interrupt) */
-                               if (basr & BASR_PHASE_MATCH)
-                                       dprintk(NDEBUG_INTR, "scsi%d: unknown interrupt, "
-                                              "BASR 0x%x, MR 0x%x, SR 0x%x\n",
-                                              HOSTNO, basr, NCR5380_read(MODE_REG),
-                                              NCR5380_read(STATUS_REG));
-                               (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+               if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+                   (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+                       /* Probably reselected */
+                       NCR5380_write(SELECT_ENABLE_REG, 0);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+                       if (!hostdata->connected) {
+                               NCR5380_reselect(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
+                       }
+                       if (!hostdata->connected)
+                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               } else {
+                       /* Probably Bus Reset */
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
 #ifdef SUN3_SCSI_VME
-                               dregs->csr |= CSR_DMA_ENABLE;
+                       dregs->csr |= CSR_DMA_ENABLE;
 #endif
-                       }
-               } /* if !(SELECTION || PARITY) */
+               }
                handled = 1;
-       } /* BASR & IRQ */ else {
-               printk(KERN_NOTICE "scsi%d: interrupt without IRQ bit set in BASR, "
-                      "BASR 0x%X, MR 0x%X, SR 0x%x\n", HOSTNO, basr,
-                      NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
-               (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       } else {
+               shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
 #ifdef SUN3_SCSI_VME
                dregs->csr |= CSR_DMA_ENABLE;
 #endif
        }
 
-       if (!done) {
-               dprintk(NDEBUG_INTR, "scsi%d: in int routine, calling main\n", HOSTNO);
-               /* Put a call to NCR5380_main() on the queue... */
-               queue_main(shost_priv(instance));
-       }
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
        return IRQ_RETVAL(handled);
 }
 
 /*
  * Function : int NCR5380_select(struct Scsi_Host *instance,
- *                               struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
  *
  * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- *     including ARBITRATION, SELECTION, and initial message out for
- *     IDENTIFY and queue messages.
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
  *
  * Inputs : instance - instantiation of the 5380 driver on which this
- *     target lives, cmd - SCSI command to execute.
+ * target lives, cmd - SCSI command to execute.
  *
- * Returns : -1 if selection could not execute for some reason,
- *     0 if selection succeeded or failed because the target
- *     did not respond.
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
  *
  * Side effects :
- *     If bus busy, arbitration failed, etc, NCR5380_select() will exit
- *             with registers as they should have been on entry - ie
- *             SELECT_ENABLE will be set appropriately, the NCR5380
- *             will cease to drive any SCSI bus signals.
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
  *
- *     If successful : I_T_L or I_T_L_Q nexus will be established,
- *             instance->connected will be set to cmd.
- *             SELECT interrupt will be disabled.
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
  *
- *     If failed (no target) : cmd->scsi_done() will be called, and the
- *             cmd->result host byte set to DID_BAD_TARGET.
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
  */
 
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+                                        struct scsi_cmnd *cmd)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char tmp[3], phase;
        unsigned char *data;
        int len;
-       unsigned long timeout;
-       unsigned long flags;
+       int err;
 
-       hostdata->restart_select = 0;
        NCR5380_dprint(NDEBUG_ARBITRATION, instance);
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: starting arbitration, id = %d\n", HOSTNO,
-                  instance->this_id);
+       dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+                instance->this_id);
+
+       /*
+        * Arbitration and selection phases are slow and involve dropping the
+        * lock, so we have to watch out for EH. An exception handler may
+        * change 'selecting' to NULL. This function will then return NULL
+        * so that the caller will forget about 'cmd'. (During information
+        * transfer phases, EH may change 'connected' to NULL.)
+        */
+       hostdata->selecting = cmd;
 
        /*
         * Set the phase bits to 0, otherwise the NCR5380 won't drive the
         * data bus during SELECTION.
         */
 
-       local_irq_save(flags);
-       if (hostdata->connected) {
-               local_irq_restore(flags);
-               return -1;
-       }
        NCR5380_write(TARGET_COMMAND_REG, 0);
 
        /*
@@ -1337,96 +1244,77 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
        NCR5380_write(MODE_REG, MR_ARBITRATE);
 
-       local_irq_restore(flags);
-
-       /* Wait for arbitration logic to complete */
-#if defined(NCR_TIMEOUT)
-       {
-               unsigned long timeout = jiffies + 2*NCR_TIMEOUT;
+       /* The chip now waits for BUS FREE phase. Then after the 800 ns
+        * Bus Free Delay, arbitration will begin.
+        */
 
-               while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
-                      time_before(jiffies, timeout) && !hostdata->connected)
-                       ;
-               if (time_after_eq(jiffies, timeout)) {
-                       printk("scsi : arbitration timeout at %d\n", __LINE__);
-                       NCR5380_write(MODE_REG, MR_BASE);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
+       spin_unlock_irq(&hostdata->lock);
+       err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+                       INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+                                              ICR_ARBITRATION_PROGRESS, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+               /* Reselection interrupt */
+               goto out;
        }
-#else /* NCR_TIMEOUT */
-       while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
-              !hostdata->connected)
-               ;
-#endif
-
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: arbitration complete\n", HOSTNO);
-
-       if (hostdata->connected) {
+       if (err < 0) {
                NCR5380_write(MODE_REG, MR_BASE);
-               return -1;
+               shost_printk(KERN_ERR, instance,
+                            "select: arbitration timeout\n");
+               goto out;
        }
-       /*
-        * The arbitration delay is 2.2us, but this is a minimum and there is
-        * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
-        * the integral nature of udelay().
-        *
-        */
+       spin_unlock_irq(&hostdata->lock);
 
+       /* The SCSI-2 arbitration delay is 2.4 us */
        udelay(3);
 
        /* Check for lost arbitration */
        if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
            (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
-           (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
-           hostdata->connected) {
+           (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
                NCR5380_write(MODE_REG, MR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
-                          HOSTNO);
-               return -1;
+               dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+               spin_lock_irq(&hostdata->lock);
+               goto out;
        }
 
-       /* after/during arbitration, BSY should be asserted.
-          IBM DPES-31080 Version S31Q works now */
-       /* Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman) */
+       /* After/during arbitration, BSY should be asserted.
+        * IBM DPES-31080 Version S31Q works now
+        * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+        */
        NCR5380_write(INITIATOR_COMMAND_REG,
                      ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
 
-       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
-           hostdata->connected) {
-               NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
-                          HOSTNO);
-               return -1;
-       }
-
        /*
         * Again, bus clear + bus settle time is 1.2us, however, this is
         * a minimum so we'll udelay ceil(1.2)
         */
 
-#ifdef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-       /* ++roman: But some targets (see above :-) seem to need a bit more... */
-       udelay(15);
-#else
-       udelay(2);
-#endif
+       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+               udelay(15);
+       else
+               udelay(2);
 
-       if (hostdata->connected) {
+       spin_lock_irq(&hostdata->lock);
+
+       /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+               goto out;
+
+       if (!hostdata->selecting) {
                NCR5380_write(MODE_REG, MR_BASE);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               return -1;
+               goto out;
        }
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: won arbitration\n", HOSTNO);
+       dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
 
        /*
         * Now that we have won arbitration, start Selection process, asserting
         * the host and target ID's on the SCSI bus.
         */
 
-       NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << cmd->device->id)));
+       NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
 
        /*
         * Raise ATN while SEL is true before BSY goes false from arbitration,
@@ -1434,22 +1322,18 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * phase immediately after selection.
         */
 
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY |
-                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL ));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
        NCR5380_write(MODE_REG, MR_BASE);
 
        /*
         * Reselect interrupts must be turned off prior to the dropping of BSY,
         * otherwise we will trigger an interrupt.
         */
-
-       if (hostdata->connected) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               return -1;
-       }
-
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       spin_unlock_irq(&hostdata->lock);
+
        /*
         * The initiator shall then wait at least two deskew delays and release
         * the BSY signal.
@@ -1457,8 +1341,8 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        udelay(1);        /* wingel -- wait two bus deskew delay >2*45ns */
 
        /* Reset BSY */
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA |
-                     ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+                     ICR_ASSERT_ATN | ICR_ASSERT_SEL);
 
        /*
         * Something weird happens when we cease to drive BSY - looks
@@ -1479,45 +1363,39 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        udelay(1);
 
-       dprintk(NDEBUG_SELECTION, "scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
+       dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
 
        /*
         * The SCSI specification calls for a 250 ms timeout for the actual
         * selection.
         */
 
-       timeout = jiffies + msecs_to_jiffies(250);
-
-       /*
-        * XXX very interesting - we're seeing a bounce where the BSY we
-        * asserted is being reflected / still asserted (propagation delay?)
-        * and it's detecting as true.  Sigh.
-        */
-
-#if 0
-       /* ++roman: If a target conformed to the SCSI standard, it wouldn't assert
-        * IO while SEL is true. But again, there are some disks out the in the
-        * world that do that nevertheless. (Somebody claimed that this announces
-        * reselection capability of the target.) So we better skip that test and
-        * only wait for BSY... (Famous german words: Der Klügere gibt nach :-)
-        */
-
-       while (time_before(jiffies, timeout) &&
-              !(NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO)))
-               ;
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+                                   msecs_to_jiffies(250));
 
        if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+               spin_lock_irq(&hostdata->lock);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_reselect(instance);
-               printk(KERN_ERR "scsi%d: reselection after won arbitration?\n",
-                      HOSTNO);
+               if (!hostdata->connected)
+                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+               goto out;
+       }
+
+       if (err < 0) {
+               spin_lock_irq(&hostdata->lock);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return -1;
+               /* Can't touch cmd if it has been reclaimed by the scsi ML */
+               if (hostdata->selecting) {
+                       cmd->result = DID_BAD_TARGET << 16;
+                       complete_cmd(instance, cmd);
+                       dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+                       cmd = NULL;
+               }
+               goto out;
        }
-#else
-       while (time_before(jiffies, timeout) && !(NCR5380_read(STATUS_REG) & SR_BSY))
-               ;
-#endif
 
        /*
         * No less than two deskew delays after the initiator detects the
@@ -1525,32 +1403,9 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * change the DATA BUS.                                     -wingel
         */
 
-       udelay(1);
-
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
-
-       if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               if (hostdata->targets_present & (1 << cmd->device->id)) {
-                       printk(KERN_ERR "scsi%d: weirdness\n", HOSTNO);
-                       if (hostdata->restart_select)
-                               printk(KERN_NOTICE "\trestart select\n");
-                       NCR5380_dprint(NDEBUG_ANY, instance);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
-               cmd->result = DID_BAD_TARGET << 16;
-#ifdef SUPPORT_TAGS
-               cmd_free_tag(cmd);
-#endif
-               cmd->scsi_done(cmd);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               dprintk(NDEBUG_SELECTION, "scsi%d: target did not respond within 250ms\n", HOSTNO);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return 0;
-       }
-
-       hostdata->targets_present |= (1 << cmd->device->id);
+       udelay(1);
+
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
        /*
         * Since we followed the SCSI spec, and raised ATN while SEL
@@ -1563,16 +1418,27 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * until it wraps back to 0.
         *
         * XXX - it turns out that there are some broken SCSI-II devices,
-        *           which claim to support tagged queuing but fail when more than
-        *           some number of commands are issued at once.
+        * which claim to support tagged queuing but fail when more than
+        * some number of commands are issued at once.
         */
 
        /* Wait for start of REQ/ACK handshake */
-       while (!(NCR5380_read(STATUS_REG) & SR_REQ))
-               ;
 
-       dprintk(NDEBUG_SELECTION, "scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
-                  HOSTNO, cmd->device->id);
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (err < 0) {
+               shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               goto out;
+       }
+       if (!hostdata->selecting) {
+               do_abort(instance);
+               goto out;
+       }
+
+       dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+                scmd_id(cmd));
        tmp[0] = IDENTIFY(1, cmd->device->lun);
 
 #ifdef SUPPORT_TAGS
@@ -1591,11 +1457,12 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        data = tmp;
        phase = PHASE_MSGOUT;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
-       dprintk(NDEBUG_SELECTION, "scsi%d: nexus established.\n", HOSTNO);
+       dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
        /* XXX need to handle errors here */
+
        hostdata->connected = cmd;
 #ifndef SUPPORT_TAGS
-       hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
+       hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 #endif
 #ifdef SUN3_SCSI_VME
        dregs->csr |= CSR_INTR;
@@ -1603,24 +1470,30 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        initialize_SCp(cmd);
 
-       return 0;
+       cmd = NULL;
+
+out:
+       if (!hostdata->selecting)
+               return NULL;
+       hostdata->selecting = NULL;
+       return cmd;
 }
 
 /*
  * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
- *      unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using polled I/O
  *
  * Inputs : instance - instance of driver, *phase - pointer to
- *     what phase is expected, *count - pointer to number of
- *     bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
  * Returns : -1 when different phase is entered without transferring
- *     maximum number of bytes, 0 if all bytes are transferred or exit
- *     is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
  *
- *     Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
  *
  * XXX Note : handling for bus free may be useful.
  */
@@ -1635,9 +1508,9 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                                unsigned char *phase, int *count,
                                unsigned char **data)
 {
-       register unsigned char p = *phase, tmp;
-       register int c = *count;
-       register unsigned char *d = *data;
+       unsigned char p = *phase, tmp;
+       int c = *count;
+       unsigned char *d = *data;
 
        /*
         * The NCR5380 chip will only drive the SCSI bus when the
@@ -1652,14 +1525,15 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                 * Wait for assertion of REQ, after which the phase bits will be
                 * valid
                 */
-               while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
-                       ;
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d: REQ detected\n", HOSTNO);
+               if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
+                       break;
+
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
 
                /* Check for phase mismatch */
-               if ((tmp & PHASE_MASK) != p) {
-                       dprintk(NDEBUG_PIO, "scsi%d: phase mismatch\n", HOSTNO);
+               if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+                       dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
                        NCR5380_dprint_phase(NDEBUG_PIO, instance);
                        break;
                }
@@ -1684,35 +1558,36 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
                                NCR5380_dprint(NDEBUG_PIO, instance);
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
                        } else {
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
                                NCR5380_dprint(NDEBUG_PIO, instance);
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
                        }
                } else {
                        NCR5380_dprint(NDEBUG_PIO, instance);
                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
                }
 
-               while (NCR5380_read(STATUS_REG) & SR_REQ)
-                       ;
+               if (NCR5380_poll_politely(instance,
+                                         STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+                       break;
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d: req false, handshake complete\n", HOSTNO);
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
 
-               /*
               * We have several special cases to consider during REQ/ACK handshaking :
               * 1.  We were in MSGOUT phase, and we are on the last byte of the
               *      message.  ATN must be dropped as ACK is dropped.
               *
               * 2.  We are in a MSGIN phase, and we are on the last byte of the
               *      message.  We must exit with ACK asserted, so that the calling
               *      code may raise ATN before dropping ACK to reject the message.
               *
               * 3.  ACK and ATN are clear and the target may proceed as normal.
               */
+/*
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1.  We were in MSGOUT phase, and we are on the last byte of the
* message.  ATN must be dropped as ACK is dropped.
+ *
+ * 2.  We are in a MSGIN phase, and we are on the last byte of the
* message.  We must exit with ACK asserted, so that the calling
* code may raise ATN before dropping ACK to reject the message.
+ *
+ * 3.  ACK and ATN are clear and the target may proceed as normal.
+ */
                if (!(p == PHASE_MSGIN && c == 1)) {
                        if (p == PHASE_MSGOUT && c > 1)
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1721,16 +1596,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                }
        } while (--c);
 
-       dprintk(NDEBUG_PIO, "scsi%d: residual %d\n", HOSTNO, c);
+       dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
 
        *count = c;
        *data = d;
        tmp = NCR5380_read(STATUS_REG);
        /* The phase read from the bus is valid if either REQ is (already)
-        * asserted or if ACK hasn't been released yet. The latter is the case if
-        * we're in MSGIN and all wanted bytes have been received.
+        * asserted or if ACK hasn't been released yet. The latter applies if
+        * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
         */
-       if ((tmp & SR_REQ) || (p == PHASE_MSGIN && c == 0))
+       if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
                *phase = tmp & PHASE_MASK;
        else
                *phase = PHASE_UNKNOWN;
@@ -1741,19 +1616,45 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                return -1;
 }
 
-/*
- * Function : do_abort (Scsi_Host *host)
+/**
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
+ *
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
  *
- * Purpose : abort the currently established nexus.  Should only be
- *     called from a routine which can drop into a
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
+ */
+
+static void do_reset(struct Scsi_Host *instance)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       NCR5380_write(TARGET_COMMAND_REG,
+                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
+       udelay(50);
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       local_irq_restore(flags);
+}
+
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
  *
- * Returns 0 on success, -1 on failure.
+ * Returns 0 on success, -1 on failure.
  */
 
 static int do_abort(struct Scsi_Host *instance)
 {
-       unsigned char tmp, *msgptr, phase;
+       unsigned char *msgptr, phase, tmp;
        int len;
+       int rc;
 
        /* Request message out phase */
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1768,16 +1669,20 @@ static int do_abort(struct Scsi_Host *instance)
         * the target sees, so we just handshake.
         */
 
-       while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
-               ;
+       rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+       if (rc < 0)
+               goto timeout;
+
+       tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-       if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
-                             ICR_ASSERT_ACK);
-               while (NCR5380_read(STATUS_REG) & SR_REQ)
-                       ;
+       if (tmp != PHASE_MSGOUT) {
+               NCR5380_write(INITIATOR_COMMAND_REG,
+                             ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+               rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+               if (rc < 0)
+                       goto timeout;
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
        }
 
@@ -1793,26 +1698,29 @@ static int do_abort(struct Scsi_Host *instance)
         */
 
        return len ? -1 : 0;
+
+timeout:
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       return -1;
 }
 
 #if defined(REAL_DMA)
 /*
  * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
- *      unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using either real
- *     or pseudo DMA.
+ * or pseudo DMA.
  *
  * Inputs : instance - instance of driver, *phase - pointer to
- *     what phase is expected, *count - pointer to number of
- *     bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
  * Returns : -1 when different phase is entered without transferring
- *     maximum number of bytes, 0 if all bytes or transferred or exit
- *     is in same phase.
- *
- *     Also, *phase, *count, *data are modified in place.
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
  *
+ * Also, *phase, *count, *data are modified in place.
  */
 
 
@@ -1820,10 +1728,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
                                unsigned char *phase, int *count,
                                unsigned char **data)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        register int c = *count;
        register unsigned char p = *phase;
-       unsigned long flags;
 
 #if defined(CONFIG_SUN3)
        /* sanity check */
@@ -1834,29 +1741,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        }
        hostdata->dma_len = c;
 
-       dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
-               instance->host_no, (p & SR_IO) ? "reading" : "writing",
-               c, (p & SR_IO) ? "to" : "from", *data);
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, *data);
 
        /* netbsd turns off ints here, why not be safe and do it too */
-       local_irq_save(flags);
 
        /* send start chain */
        sun3scsi_dma_start(c, *data);
 
+       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
        if (p & SR_IO) {
-               NCR5380_write(TARGET_COMMAND_REG, 1);
-               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
                NCR5380_write(INITIATOR_COMMAND_REG, 0);
-               NCR5380_write(MODE_REG,
-                             (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
                NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
        } else {
-               NCR5380_write(TARGET_COMMAND_REG, 0);
-               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_DATA);
-               NCR5380_write(MODE_REG,
-                             (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
                NCR5380_write(START_DMA_SEND_REG, 0);
        }
 
@@ -1864,8 +1764,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        dregs->csr |= CSR_DMA_ENABLE;
 #endif
 
-       local_irq_restore(flags);
-
        sun3_dma_active = 1;
 
 #else /* !defined(CONFIG_SUN3) */
@@ -1880,25 +1778,20 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        if (hostdata->read_overruns && (p & SR_IO))
                c -= hostdata->read_overruns;
 
-       dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
-                  HOSTNO, (p & SR_IO) ? "reading" : "writing",
-                  c, (p & SR_IO) ? "to" : "from", d);
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, d);
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-#ifdef REAL_DMA
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
-#endif /* def REAL_DMA  */
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
 
        if (!(hostdata->flags & FLAG_LATE_DMA_SETUP)) {
                /* On the Medusa, it is a must to initialize the DMA before
                 * starting the NCR. This is also the cleaner way for the TT.
                 */
-               local_irq_save(flags);
                hostdata->dma_len = (p & SR_IO) ?
                        NCR5380_dma_read_setup(instance, d, c) :
                        NCR5380_dma_write_setup(instance, d, c);
-               local_irq_restore(flags);
        }
 
        if (p & SR_IO)
@@ -1912,11 +1805,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
                /* On the Falcon, the DMA setup must be done after the last */
                /* NCR access, else the DMA setup gets trashed!
                 */
-               local_irq_save(flags);
                hostdata->dma_len = (p & SR_IO) ?
                        NCR5380_dma_read_setup(instance, d, c) :
                        NCR5380_dma_write_setup(instance, d, c);
-               local_irq_restore(flags);
        }
 #endif /* !defined(CONFIG_SUN3) */
 
@@ -1928,23 +1819,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
  *
  * Purpose : run through the various SCSI phases and do as the target
- *     directs us to.  Operates on the currently connected command,
- *     instance->connected.
+ * directs us to.  Operates on the currently connected command,
+ * instance->connected.
  *
  * Inputs : instance, instance for which we are doing commands
  *
  * Side effects : SCSI things happen, the disconnected queue will be
- *     modified if a command disconnects, *instance->connected will
- *     change.
+ * modified if a command disconnects, *instance->connected will
+ * change.
  *
  * XXX Note : we need to watch for bus free or a reset condition here
- *     to recover from an unexpected bus free condition.
+ * to recover from an unexpected bus free condition.
  */
 
 static void NCR5380_information_transfer(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
-       unsigned long flags;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char msgout = NOP;
        int sink = 0;
        int len;
@@ -1953,13 +1843,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 #endif
        unsigned char *data;
        unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
-       struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
+       struct scsi_cmnd *cmd;
 
 #ifdef SUN3_SCSI_VME
        dregs->csr |= CSR_INTR;
 #endif
 
-       while (1) {
+       while ((cmd = hostdata->connected)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
                tmp = NCR5380_read(STATUS_REG);
                /* We only have a valid SCSI phase when REQ is asserted */
                if (tmp & SR_REQ) {
@@ -1984,7 +1876,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                /* this command setup for dma yet? */
                                if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != cmd)) {
                                        if (cmd->request->cmd_type == REQ_TYPE_FS) {
-                                               sun3scsi_dma_setup(d, count,
+                                               sun3scsi_dma_setup(instance, d, count,
                                                                   rq_data_dir(cmd->request));
                                                sun3_dma_setup_done = cmd;
                                        }
@@ -2000,11 +1892,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
-                                             ICR_ASSERT_ACK);
+                                             ICR_ASSERT_ACK);
                                while (NCR5380_read(STATUS_REG) & SR_REQ)
                                        ;
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_ATN);
+                                             ICR_ASSERT_ATN);
                                sink = 0;
                                continue;
                        }
@@ -2012,12 +1904,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                        switch (phase) {
                        case PHASE_DATAOUT:
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
-                               printk("scsi%d: NDEBUG_NO_DATAOUT set, attempted DATAOUT "
-                                      "aborted\n", HOSTNO);
+                               shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
                                sink = 1;
                                do_abort(instance);
                                cmd->result = DID_ERROR << 16;
-                               cmd->scsi_done(cmd);
+                               complete_cmd(instance, cmd);
                                return;
 #endif
                        case PHASE_DATAIN:
@@ -2031,13 +1922,10 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                        --cmd->SCp.buffers_residual;
                                        cmd->SCp.this_residual = cmd->SCp.buffer->length;
                                        cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
-                                       /* ++roman: Try to merge some scatter-buffers if
-                                        * they are at contiguous physical addresses.
-                                        */
                                        merge_contiguous_buffers(cmd);
-                                       dprintk(NDEBUG_INFORMATION, "scsi%d: %d bytes and %d buffers left\n",
-                                                  HOSTNO, cmd->SCp.this_residual,
-                                                  cmd->SCp.buffers_residual);
+                                       dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+                                                cmd->SCp.this_residual,
+                                                cmd->SCp.buffers_residual);
                                }
 
                                /*
@@ -2051,16 +1939,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                 */
 
                                /* ++roman: I suggest, this should be
-                                *   #if def(REAL_DMA)
+                                * #if def(REAL_DMA)
                                 * instead of leaving REAL_DMA out.
                                 */
 
 #if defined(REAL_DMA)
-                               if (
 #if !defined(CONFIG_SUN3)
-                                   !cmd->device->borken &&
+                               transfersize = 0;
+                               if (!cmd->device->borken)
 #endif
-                                   (transfersize = NCR5380_dma_xfer_len(instance, cmd, phase)) >= DMA_MIN_SIZE) {
+                                       transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+                               if (transfersize >= DMA_MIN_SIZE) {
                                        len = transfersize;
                                        cmd->SCp.phase = phase;
                                        if (NCR5380_transfer_dma(instance, &phase,
@@ -2068,16 +1958,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                /*
                                                 * If the watchdog timer fires, all future
                                                 * accesses to this device will use the
-                                                * polled-IO. */
+                                                * polled-IO.
+                                                */
                                                scmd_printk(KERN_INFO, cmd,
                                                        "switching to slow handshake\n");
                                                cmd->device->borken = 1;
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                                       ICR_ASSERT_ATN);
                                                sink = 1;
                                                do_abort(instance);
                                                cmd->result = DID_ERROR << 16;
-                                               cmd->scsi_done(cmd);
+                                               complete_cmd(instance, cmd);
                                                /* XXX - need to source or sink data here, as appropriate */
                                        } else {
 #ifdef REAL_DMA
@@ -2093,9 +1982,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                        }
                                } else
 #endif /* defined(REAL_DMA) */
+                               {
+                                       spin_unlock_irq(&hostdata->lock);
                                        NCR5380_transfer_pio(instance, &phase,
-                                                            (int *)&cmd->SCp.this_residual,
-                                                            (unsigned char **)&cmd->SCp.ptr);
+                                                            (int *)&cmd->SCp.this_residual,
+                                                            (unsigned char **)&cmd->SCp.ptr);
+                                       spin_lock_irq(&hostdata->lock);
+                               }
 #if defined(CONFIG_SUN3) && defined(REAL_DMA)
                                /* if we had intended to dma that command clear it */
                                if (sun3_dma_setup_done == cmd)
@@ -2105,162 +1998,64 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                        case PHASE_MSGIN:
                                len = 1;
                                data = &tmp;
-                               NCR5380_write(SELECT_ENABLE_REG, 0);    /* disable reselects */
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                cmd->SCp.Message = tmp;
 
                                switch (tmp) {
-                               /*
-                                * Linking lets us reduce the time required to get the
-                                * next command out to the device, hopefully this will
-                                * mean we don't waste another revolution due to the delays
-                                * required by ARBITRATION and another SELECTION.
-                                *
-                                * In the current implementation proposal, low level drivers
-                                * merely have to start the next command, pointed to by
-                                * next_link, done() is called as with unlinked commands.
-                                */
-#ifdef LINKED
-                               case LINKED_CMD_COMPLETE:
-                               case LINKED_FLG_CMD_COMPLETE:
-                                       /* Accept message by clearing ACK */
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
-                                       dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked command "
-                                                  "complete.\n", HOSTNO, cmd->device->id, cmd->device->lun);
-
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /*
-                                        * Sanity check : A linked command should only terminate
-                                        * with one of these messages if there are more linked
-                                        * commands available.
-                                        */
-
-                                       if (!cmd->next_link) {
-                                                printk(KERN_NOTICE "scsi%d: target %d lun %llu "
-                                                       "linked command complete, no next_link\n",
-                                                       HOSTNO, cmd->device->id, cmd->device->lun);
-                                               sink = 1;
-                                               do_abort(instance);
-                                               return;
-                                       }
-
-                                       initialize_SCp(cmd->next_link);
-                                       /* The next command is still part of this process; copy it
-                                        * and don't free it! */
-                                       cmd->next_link->tag = cmd->tag;
-                                       cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked request "
-                                                  "done, calling scsi_done().\n",
-                                                  HOSTNO, cmd->device->id, cmd->device->lun);
-                                       cmd->scsi_done(cmd);
-                                       cmd = hostdata->connected;
-                                       break;
-#endif /* def LINKED */
                                case ABORT:
                                case COMMAND_COMPLETE:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d, lun %llu "
-                                                 "completed\n", HOSTNO, cmd->device->id, cmd->device->lun);
+                                       dsprintk(NDEBUG_QUEUES, instance,
+                                                "COMMAND COMPLETE %p target %d lun %llu\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
 
-                                       local_irq_save(flags);
-                                       hostdata->retain_dma_intr++;
                                        hostdata->connected = NULL;
 #ifdef SUPPORT_TAGS
                                        cmd_free_tag(cmd);
                                        if (status_byte(cmd->SCp.Status) == QUEUE_FULL) {
-                                               /* Turn a QUEUE FULL status into BUSY, I think the
-                                                * mid level cannot handle QUEUE FULL :-( (The
-                                                * command is retried after BUSY). Also update our
-                                                * queue size to the number of currently issued
-                                                * commands now.
-                                                */
-                                               /* ++Andreas: the mid level code knows about
-                                                  QUEUE_FULL now. */
-                                               struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][cmd->device->lun];
-                                               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu returned "
-                                                          "QUEUE_FULL after %d commands\n",
-                                                          HOSTNO, cmd->device->id, cmd->device->lun,
-                                                          ta->nr_allocated);
+                                               u8 lun = cmd->device->lun;
+                                               struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
+
+                                               dsprintk(NDEBUG_TAGS, instance,
+                                                        "QUEUE_FULL %p target %d lun %d nr_allocated %d\n",
+                                                        cmd, scmd_id(cmd), lun, ta->nr_allocated);
                                                if (ta->queue_size > ta->nr_allocated)
-                                                       ta->nr_allocated = ta->queue_size;
+                                                       ta->queue_size = ta->nr_allocated;
                                        }
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
 #endif
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-
-                                       /*
-                                        * I'm not sure what the correct thing to do here is :
-                                        *
-                                        * If the command that just executed is NOT a request
-                                        * sense, the obvious thing to do is to set the result
-                                        * code to the values of the stored parameters.
-                                        *
-                                        * If it was a REQUEST SENSE command, we need some way to
-                                        * differentiate between the failure code of the original
-                                        * and the failure code of the REQUEST sense - the obvious
-                                        * case is success, where we fall through and leave the
-                                        * result code unchanged.
-                                        *
-                                        * The non-obvious place is where the REQUEST SENSE failed
-                                        */
-
-                                       if (cmd->cmnd[0] != REQUEST_SENSE)
-                                               cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       else if (status_byte(cmd->SCp.Status) != GOOD)
-                                               cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
-                                       if ((cmd->cmnd[0] == REQUEST_SENSE) &&
-                                               hostdata->ses.cmd_len) {
-                                               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
-                                               hostdata->ses.cmd_len = 0 ;
-                                       }
-
-                                       if ((cmd->cmnd[0] != REQUEST_SENSE) &&
-                                           (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
-                                               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
 
-                                               dprintk(NDEBUG_AUTOSENSE, "scsi%d: performing request sense\n", HOSTNO);
-
-                                               LIST(cmd,hostdata->issue_queue);
-                                               SET_NEXT(cmd, hostdata->issue_queue);
-                                               hostdata->issue_queue = (struct scsi_cmnd *) cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d: REQUEST SENSE added to head of "
-                                                         "issue queue\n", H_NO(cmd));
-                                       } else {
-                                               cmd->scsi_done(cmd);
+                                       cmd->result &= ~0xffff;
+                                       cmd->result |= cmd->SCp.Status;
+                                       cmd->result |= cmd->SCp.Message << 8;
+
+                                       if (cmd->cmnd[0] == REQUEST_SENSE)
+                                               complete_cmd(instance, cmd);
+                                       else {
+                                               if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+                                                   cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+                                                       dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+                                                                cmd);
+                                                       list_add_tail(&ncmd->list,
+                                                                     &hostdata->autosense);
+                                               } else
+                                                       complete_cmd(instance, cmd);
                                        }
 
-                                       local_irq_restore(flags);
-
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        /*
                                         * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
                                         */
                                        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 
-                                       local_irq_save(flags);
-                                       hostdata->retain_dma_intr--;
-                                       /* ++roman: For Falcon SCSI, release the lock on the
-                                        * ST-DMA here if no other commands are waiting on the
-                                        * disconnected queue.
-                                        */
                                        maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
                                        return;
                                case MESSAGE_REJECT:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        switch (hostdata->last_message) {
                                        case HEAD_OF_QUEUE_TAG:
                                        case ORDERED_QUEUE_TAG:
@@ -2274,27 +2069,20 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                cmd->device->tagged_supported = 0;
                                                hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
                                                cmd->tag = TAG_NONE;
-                                               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu rejected "
-                                                          "QUEUE_TAG message; tagged queuing "
-                                                          "disabled\n",
-                                                          HOSTNO, cmd->device->id, cmd->device->lun);
+                                               dsprintk(NDEBUG_TAGS, instance, "target %d lun %llu rejected QUEUE_TAG message; tagged queuing disabled\n",
+                                                        scmd_id(cmd), cmd->device->lun);
                                                break;
                                        }
                                        break;
                                case DISCONNECT:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       local_irq_save(flags);
-                                       cmd->device->disconnect = 1;
-                                       LIST(cmd,hostdata->disconnected_queue);
-                                       SET_NEXT(cmd, hostdata->disconnected_queue);
                                        hostdata->connected = NULL;
-                                       hostdata->disconnected_queue = cmd;
-                                       local_irq_restore(flags);
-                                       dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d lun %llu was "
-                                                 "moved from connected to the "
-                                                 "disconnected_queue\n", HOSTNO,
-                                                 cmd->device->id, cmd->device->lun);
+                                       list_add(&ncmd->list, &hostdata->disconnected);
+                                       dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+                                                instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
+
                                        /*
                                         * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
@@ -2303,9 +2091,6 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 
                                        /* Enable reselect interrupts */
                                        NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /* Wait for bus free to avoid nasty timeouts */
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
 #ifdef SUN3_SCSI_VME
                                        dregs->csr |= CSR_DMA_ENABLE;
 #endif
@@ -2324,37 +2109,30 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                case RESTORE_POINTERS:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        break;
                                case EXTENDED_MESSAGE:
                                        /*
-                                        * Extended messages are sent in the following format :
-                                        * Byte
-                                        * 0            EXTENDED_MESSAGE == 1
-                                        * 1            length (includes one byte for code, doesn't
-                                        *              include first two bytes)
-                                        * 2            code
-                                        * 3..length+1  arguments
-                                        *
-                                        * Start the extended message buffer with the EXTENDED_MESSAGE
+                                        * Start the message buffer with the EXTENDED_MESSAGE
                                         * byte, since spi_print_msg() wants the whole thing.
                                         */
                                        extended_msg[0] = EXTENDED_MESSAGE;
                                        /* Accept first byte by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d: receiving extended message\n", HOSTNO);
+                                       spin_unlock_irq(&hostdata->lock);
+
+                                       dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
 
                                        len = 2;
                                        data = extended_msg + 1;
                                        phase = PHASE_MSGIN;
                                        NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d: length=%d, code=0x%02x\n", HOSTNO,
-                                                  (int)extended_msg[1], (int)extended_msg[2]);
+                                       dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+                                                (int)extended_msg[1],
+                                                (int)extended_msg[2]);
 
-                                       if (!len && extended_msg[1] <=
-                                           (sizeof(extended_msg) - 1)) {
+                                       if (!len && extended_msg[1] > 0 &&
+                                           extended_msg[1] <= sizeof(extended_msg) - 2) {
                                                /* Accept third byte by clearing ACK */
                                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                                len = extended_msg[1] - 1;
@@ -2362,8 +2140,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                phase = PHASE_MSGIN;
 
                                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                               dprintk(NDEBUG_EXTENDED, "scsi%d: message received, residual %d\n",
-                                                          HOSTNO, len);
+                                               dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+                                                        len);
 
                                                switch (extended_msg[2]) {
                                                case EXTENDED_SDTR:
@@ -2373,15 +2151,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                        tmp = 0;
                                                }
                                        } else if (len) {
-                                               printk(KERN_NOTICE "scsi%d: error receiving "
-                                                      "extended message\n", HOSTNO);
+                                               shost_printk(KERN_ERR, instance, "error receiving extended message\n");
                                                tmp = 0;
                                        } else {
-                                               printk(KERN_NOTICE "scsi%d: extended message "
-                                                          "code %02x length %d is too long\n",
-                                                          HOSTNO, extended_msg[2], extended_msg[1]);
+                                               shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+                                                            extended_msg[2], extended_msg[1]);
                                                tmp = 0;
                                        }
+
+                                       spin_lock_irq(&hostdata->lock);
+                                       if (!hostdata->connected)
+                                               return;
+
                                        /* Fall through to reject message */
 
                                        /*
@@ -2390,8 +2171,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                         */
                                default:
                                        if (!tmp) {
-                                               printk(KERN_INFO "scsi%d: rejecting message ",
-                                                      instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "rejecting message ");
                                                spi_print_msg(extended_msg);
                                                printk("\n");
                                        } else if (tmp != EXTENDED_MESSAGE)
@@ -2414,18 +2194,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                hostdata->last_message = msgout;
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                if (msgout == ABORT) {
-                                       local_irq_save(flags);
-#ifdef SUPPORT_TAGS
-                                       cmd_free_tag(cmd);
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
                                        hostdata->connected = NULL;
                                        cmd->result = DID_ERROR << 16;
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+                                       complete_cmd(instance, cmd);
                                        maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
-                                       cmd->scsi_done(cmd);
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                }
                                msgout = NOP;
@@ -2447,22 +2220,25 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                cmd->SCp.Status = tmp;
                                break;
                        default:
-                               printk("scsi%d: unknown phase\n", HOSTNO);
+                               shost_printk(KERN_ERR, instance, "unknown phase\n");
                                NCR5380_dprint(NDEBUG_ANY, instance);
                        } /* switch(phase) */
-               } /* if (tmp * SR_REQ) */
-       } /* while (1) */
+               } else {
+                       spin_unlock_irq(&hostdata->lock);
+                       NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+                       spin_lock_irq(&hostdata->lock);
+               }
+       }
 }
 
 /*
  * Function : void NCR5380_reselect (struct Scsi_Host *instance)
  *
  * Purpose : does reselection, initializing the instance->connected
- *     field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- *     nexus has been reestablished,
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
  *
  * Inputs : instance - this instance of the NCR5380.
- *
  */
 
 
@@ -2471,7 +2247,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 
 static void NCR5380_reselect(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char target_mask;
        unsigned char lun;
 #ifdef SUPPORT_TAGS
@@ -2480,7 +2256,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
        unsigned char msg[3];
        int __maybe_unused len;
        unsigned char __maybe_unused *data, __maybe_unused phase;
-       struct scsi_cmnd *tmp = NULL, *prev;
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *tmp;
 
        /*
         * Disable arbitration, etc. since the host adapter obviously
@@ -2488,11 +2265,10 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         */
 
        NCR5380_write(MODE_REG, MR_BASE);
-       hostdata->restart_select = 1;
 
        target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
 
-       dprintk(NDEBUG_RESELECTION, "scsi%d: reselect\n", HOSTNO);
+       dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
 
        /*
         * At this point, we have detected that our SCSI ID is on the bus,
@@ -2504,17 +2280,22 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         */
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
-       while (NCR5380_read(STATUS_REG) & SR_SEL)
-               ;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               return;
+       }
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
         * Wait for target to go into MSGIN.
         */
 
-       while (!(NCR5380_read(STATUS_REG) & SR_REQ))
-               ;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+               do_abort(instance);
+               return;
+       }
 
 #if defined(CONFIG_SUN3) && defined(REAL_DMA)
        /* acknowledge toggle to MSGIN */
@@ -2527,15 +2308,21 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
        data = msg;
        phase = PHASE_MSGIN;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
+
+       if (len) {
+               do_abort(instance);
+               return;
+       }
 #endif
 
        if (!(msg[0] & 0x80)) {
-               printk(KERN_DEBUG "scsi%d: expecting IDENTIFY message, got ", HOSTNO);
+               shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
                spi_print_msg(msg);
+               printk("\n");
                do_abort(instance);
                return;
        }
-       lun = (msg[0] & 0x07);
+       lun = msg[0] & 0x07;
 
 #if defined(SUPPORT_TAGS) && !defined(CONFIG_SUN3)
        /* If the phase is still MSGIN, the target wants to send some more
@@ -2551,8 +2338,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
                    msg[1] == SIMPLE_QUEUE_TAG)
                        tag = msg[2];
-               dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at "
-                          "reselection\n", HOSTNO, target_mask, lun, tag);
+               dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n",
+                        target_mask, lun, tag);
        }
 #endif
 
@@ -2561,36 +2348,34 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         * just reestablished, and remove it from the disconnected queue.
         */
 
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL;
-            tmp; prev = tmp, tmp = NEXT(tmp)) {
-               if ((target_mask == (1 << tmp->device->id)) && (lun == tmp->device->lun)
+       tmp = NULL;
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               if (target_mask == (1 << scmd_id(cmd)) &&
+                   lun == (u8)cmd->device->lun
 #ifdef SUPPORT_TAGS
-                   && (tag == tmp->tag)
+                   && (tag == cmd->tag)
 #endif
                    ) {
-                       if (prev) {
-                               REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
-                               SET_NEXT(prev, NEXT(tmp));
-                       } else {
-                               REMOVE(-1, hostdata->disconnected_queue, tmp, NEXT(tmp));
-                               hostdata->disconnected_queue = NEXT(tmp);
-                       }
-                       SET_NEXT(tmp, NULL);
+                       list_del(&ncmd->list);
+                       tmp = cmd;
                        break;
                }
        }
 
-       if (!tmp) {
-               printk(KERN_WARNING "scsi%d: warning: target bitmask %02x lun %d "
-#ifdef SUPPORT_TAGS
-                      "tag %d "
-#endif
-                      "not in disconnected_queue.\n",
-                      HOSTNO, target_mask, lun
+       if (tmp) {
+               dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+                        "reselect: removed %p from disconnected queue\n", tmp);
+       } else {
+
 #ifdef SUPPORT_TAGS
-                      , tag
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d tag %d not in disconnected queue.\n",
+                            target_mask, lun, tag);
+#else
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+                            target_mask, lun);
 #endif
-                       );
                /*
                 * Since we have an established nexus that we can't do anything
                 * with, we must abort it.
@@ -2614,7 +2399,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                }
                /* setup this command for dma if not already */
                if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != tmp)) {
-                       sun3scsi_dma_setup(d, count, rq_data_dir(tmp->request));
+                       sun3scsi_dma_setup(instance, d, count,
+                                          rq_data_dir(tmp->request));
                        sun3_dma_setup_done = tmp;
                }
        }
@@ -2639,235 +2425,196 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
                    msg[1] == SIMPLE_QUEUE_TAG)
                        tag = msg[2];
-               dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at reselection\n"
-                       HOSTNO, target_mask, lun, tag);
+               dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n"
+                        target_mask, lun, tag);
        }
 #endif
 
        hostdata->connected = tmp;
-       dprintk(NDEBUG_RESELECTION, "scsi%d: nexus established, target = %d, lun = %llu, tag = %d\n",
-                  HOSTNO, tmp->device->id, tmp->device->lun, tmp->tag);
+       dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+                scmd_id(tmp), tmp->device->lun, tmp->tag);
 }
 
 
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- *     host byte of the result field to, if zero DID_ABORTED is
- *     used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- * XXX - there is no way to abort the command that is currently
- *      connected, you have to wait for it to complete.  If this is
- *      a problem, we could implement longjmp() / setjmp(), setjmp()
- *      called where the loop started in NCR5380_main().
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
  */
 
-static
-int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+                          struct scsi_cmnd *needle)
 {
-       struct Scsi_Host *instance = cmd->device->host;
-       SETUP_HOSTDATA(instance);
-       struct scsi_cmnd *tmp, **prev;
-       unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       scmd_printk(KERN_NOTICE, cmd, "aborting command\n");
+       list_for_each_entry(ncmd, haystack, list)
+               if (NCR5380_to_scmd(ncmd) == needle)
+                       return true;
+       return false;
+}
 
-       NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
 
-       local_irq_save(flags);
+static bool list_del_cmd(struct list_head *haystack,
+                         struct scsi_cmnd *needle)
+{
+       if (list_find_cmd(haystack, needle)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
 
-       dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
-                   NCR5380_read(BUS_AND_STATUS_REG),
-                   NCR5380_read(STATUS_REG));
+               list_del(&ncmd->list);
+               return true;
+       }
+       return false;
+}
 
-#if 1
-       /*
-        * Case 1 : If the command is the currently executing command,
-        * we'll set the aborted flag and return control so that
-        * information transfer routine can exit cleanly.
-        */
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
+ */
 
-       if (hostdata->connected == cmd) {
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long flags;
+       int result = SUCCESS;
 
-               dprintk(NDEBUG_ABORT, "scsi%d: aborting connected command\n", HOSTNO);
-               /*
-                * We should perform BSY checking, and make sure we haven't slipped
-                * into BUS FREE.
-                */
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-               /*      NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN); */
-               /*
-                * Since we can't change phases until we've completed the current
-                * handshake, we have to source or sink a byte of data if the current
-                * phase is not MSGOUT.
-                */
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-               /*
-                * Return control to the executing NCR drive so we can clear the
-                * aborted flag and get back into our main loop.
-                */
+       if (list_del_cmd(&hostdata->unissued, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from issue queue\n", cmd);
+               cmd->result = DID_ABORT << 16;
+               cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
+       }
 
-               if (do_abort(instance) == 0) {
-                       hostdata->aborted = 1;
-                       hostdata->connected = NULL;
-                       cmd->result = DID_ABORT << 16;
-#ifdef SUPPORT_TAGS
-                       cmd_free_tag(cmd);
-#else
-                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
-                       maybe_release_dma_irq(instance);
-                       local_irq_restore(flags);
-                       cmd->scsi_done(cmd);
-                       return SUCCESS;
-               } else {
-                       local_irq_restore(flags);
-                       printk("scsi%d: abort of connected command failed!\n", HOSTNO);
-                       return FAILED;
-               }
+       if (hostdata->selecting == cmd) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: cmd %p == selecting\n", cmd);
+               hostdata->selecting = NULL;
+               cmd->result = DID_ABORT << 16;
+               complete_cmd(instance, cmd);
+               goto out;
        }
-#endif
 
-       /*
-        * Case 2 : If the command hasn't been issued yet, we simply remove it
-        *          from the issue queue.
-        */
-       for (prev = (struct scsi_cmnd **)&(hostdata->issue_queue),
-            tmp = (struct scsi_cmnd *)hostdata->issue_queue;
-            tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
-               if (cmd == tmp) {
-                       REMOVE(5, *prev, tmp, NEXT(tmp));
-                       (*prev) = NEXT(tmp);
-                       SET_NEXT(tmp, NULL);
-                       tmp->result = DID_ABORT << 16;
-                       maybe_release_dma_irq(instance);
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_ABORT, "scsi%d: abort removed command from issue queue.\n",
-                                   HOSTNO);
-                       /* Tagged queuing note: no tag to free here, hasn't been assigned
-                        * yet... */
-                       tmp->scsi_done(tmp);
-                       return SUCCESS;
+       if (list_del_cmd(&hostdata->disconnected, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from disconnected list\n", cmd);
+               cmd->result = DID_ERROR << 16;
+               if (!hostdata->connected)
+                       NCR5380_select(instance, cmd);
+               if (hostdata->connected != cmd) {
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
                }
        }
 
-       /*
-        * Case 3 : If any commands are connected, we're going to fail the abort
-        *          and let the high level SCSI driver retry at a later time or
-        *          issue a reset.
-        *
-        *          Timeouts, and therefore aborted commands, will be highly unlikely
-        *          and handling them cleanly in this situation would make the common
-        *          case of noresets less efficient, and would pollute our code.  So,
-        *          we fail.
-        */
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               if (cmd->cmnd[0] == REQUEST_SENSE)
+                       complete_cmd(instance, cmd);
+               else {
+                       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-       if (hostdata->connected) {
-               local_irq_restore(flags);
-               dprintk(NDEBUG_ABORT, "scsi%d: abort failed, command connected.\n", HOSTNO);
-               return FAILED;
+                       /* Perform autosense for this command */
+                       list_add(&ncmd->list, &hostdata->autosense);
+               }
        }
 
-       /*
-        * Case 4: If the command is currently disconnected from the bus, and
-        *      there are no connected commands, we reconnect the I_T_L or
-        *      I_T_L_Q nexus associated with it, go into message out, and send
-        *      an abort message.
-        *
-        * This case is especially ugly. In order to reestablish the nexus, we
-        * need to call NCR5380_select().  The easiest way to implement this
-        * function was to abort if the bus was busy, and let the interrupt
-        * handler triggered on the SEL for reselect take care of lost arbitrations
-        * where necessary, meaning interrupts need to be enabled.
-        *
-        * When interrupts are enabled, the queues may change - so we
-        * can't remove it from the disconnected queue before selecting it
-        * because that could cause a failure in hashing the nexus if that
-        * device reselected.
-        *
-        * Since the queues may change, we can't use the pointers from when we
-        * first locate it.
-        *
-        * So, we must first locate the command, and if NCR5380_select()
-        * succeeds, then issue the abort, relocate the command and remove
-        * it from the disconnected queue.
-        */
-
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp;
-            tmp = NEXT(tmp)) {
-               if (cmd == tmp) {
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_ABORT, "scsi%d: aborting disconnected command.\n", HOSTNO);
-
-                       if (NCR5380_select(instance, cmd))
-                               return FAILED;
-
-                       dprintk(NDEBUG_ABORT, "scsi%d: nexus reestablished.\n", HOSTNO);
-
-                       do_abort(instance);
-
-                       local_irq_save(flags);
-                       for (prev = (struct scsi_cmnd **)&(hostdata->disconnected_queue),
-                            tmp = (struct scsi_cmnd *)hostdata->disconnected_queue;
-                            tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
-                               if (cmd == tmp) {
-                                       REMOVE(5, *prev, tmp, NEXT(tmp));
-                                       *prev = NEXT(tmp);
-                                       SET_NEXT(tmp, NULL);
-                                       tmp->result = DID_ABORT << 16;
-                                       /* We must unlock the tag/LUN immediately here, since the
-                                        * target goes to BUS FREE and doesn't send us another
-                                        * message (COMMAND_COMPLETE or the like)
-                                        */
-#ifdef SUPPORT_TAGS
-                                       cmd_free_tag(tmp);
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
-                                       maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
-                                       tmp->scsi_done(tmp);
-                                       return SUCCESS;
-                               }
-                       }
+       if (list_find_cmd(&hostdata->autosense, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: found %p on sense queue\n", cmd);
+               spin_unlock_irqrestore(&hostdata->lock, flags);
+               queue_work(hostdata->work_q, &hostdata->main_task);
+               msleep(1000);
+               spin_lock_irqsave(&hostdata->lock, flags);
+               if (list_del_cmd(&hostdata->autosense, cmd)) {
+                       dsprintk(NDEBUG_ABORT, instance,
+                                "abort: removed %p from sense queue\n", cmd);
+                       set_host_byte(cmd, DID_ABORT);
+                       complete_cmd(instance, cmd);
+                       goto out;
                }
        }
 
-       /* Maybe it is sufficient just to release the ST-DMA lock... (if
-        * possible at all) At least, we should check if the lock could be
-        * released after the abort, in case it is kept due to some bug.
-        */
-       maybe_release_dma_irq(instance);
-       local_irq_restore(flags);
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               complete_cmd(instance, cmd);
+       }
 
-       /*
-        * Case 5 : If we reached this point, the command was not found in any of
-        *          the queues.
-        *
-        * We probably reached this point because of an unlikely race condition
-        * between the command completing successfully and the abortion code,
-        * so we won't panic, but we will notify the user in case something really
-        * broke.
-        */
+out:
+       if (result == FAILED)
+               dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+       else
+               dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
 
-       printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO);
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       maybe_release_dma_irq(instance);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-       return FAILED;
+       return result;
 }
 
 
-/*
- * Function : int NCR5380_reset (struct scsi_cmnd *cmd)
- *
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS or FAILURE
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
  *
+ * Returns SUCCESS
  */
 
 static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
@@ -2876,23 +2623,22 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int i;
        unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       NCR5380_print_status(instance);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
+
+       do_reset(instance);
 
-       /* get in phase */
-       NCR5380_write(TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-       /* assert RST */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       udelay(40);
        /* reset NCR registers */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
-       /* ++roman: reset interrupt condition! otherwise no interrupts don't get
-        * through anymore ... */
-       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
        /* After the reset, there are no more connected or disconnected commands
         * and no busy units; so clear the low-level status here to avoid
@@ -2900,17 +2646,34 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
         * commands!
         */
 
-       if (hostdata->issue_queue)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
-       if (hostdata->connected)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
-       if (hostdata->disconnected_queue)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
+       hostdata->selecting = NULL;
+
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       list_for_each_entry(ncmd, &hostdata->autosense, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       if (hostdata->connected) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->connected);
+               hostdata->connected = NULL;
+       }
+
+       if (hostdata->sensing) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->sensing);
+               hostdata->sensing = NULL;
+       }
 
-       local_irq_save(flags);
-       hostdata->issue_queue = NULL;
-       hostdata->connected = NULL;
-       hostdata->disconnected_queue = NULL;
 #ifdef SUPPORT_TAGS
        free_all_tags(hostdata);
 #endif
@@ -2920,8 +2683,9 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
        hostdata->dma_len = 0;
 #endif
 
+       queue_work(hostdata->work_q, &hostdata->main_task);
        maybe_release_dma_irq(instance);
-       local_irq_restore(flags);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
        return SUCCESS;
 }
index 5ede3daa93dc546c63a67809882f61009fb29582..78d1b2963f2c04b00ef0afbfb33be0560eef0361 100644 (file)
@@ -66,7 +66,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
@@ -98,7 +97,6 @@
 
 #define NCR5380_queue_command           atari_scsi_queue_command
 #define NCR5380_abort                   atari_scsi_abort
-#define NCR5380_show_info               atari_scsi_show_info
 #define NCR5380_info                    atari_scsi_info
 
 #define NCR5380_dma_read_setup(instance, data, count) \
@@ -161,23 +159,10 @@ static inline unsigned long SCSI_DMA_GETADR(void)
        return adr;
 }
 
-#define HOSTDATA_DMALEN                (((struct NCR5380_hostdata *) \
-                               (atari_scsi_host->hostdata))->dma_len)
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#ifndef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-#define        AFTER_RESET_DELAY       (HZ/2)
-#else
-#define        AFTER_RESET_DELAY       (5*HZ/2)
-#endif
-
 #ifdef REAL_DMA
 static void atari_scsi_fetch_restbytes(void);
 #endif
 
-static struct Scsi_Host *atari_scsi_host;
 static unsigned char (*atari_scsi_reg_read)(unsigned char reg);
 static void (*atari_scsi_reg_write)(unsigned char reg, unsigned char value);
 
@@ -208,12 +193,12 @@ static int setup_cmd_per_lun = -1;
 module_param(setup_cmd_per_lun, int, 0);
 static int setup_sg_tablesize = -1;
 module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
 static int setup_use_tagged_queuing = -1;
 module_param(setup_use_tagged_queuing, int, 0);
-#endif
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
 
 
 #if defined(REAL_DMA)
@@ -273,15 +258,17 @@ static void scsi_dma_buserr(int irq, void *dummy)
 #endif
 
 
-static irqreturn_t scsi_tt_intr(int irq, void *dummy)
+static irqreturn_t scsi_tt_intr(int irq, void *dev)
 {
 #ifdef REAL_DMA
+       struct Scsi_Host *instance = dev;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int dma_stat;
 
        dma_stat = tt_scsi_dma.dma_ctrl;
 
-       dprintk(NDEBUG_INTR, "scsi%d: NCR5380 interrupt, DMA status = %02x\n",
-                  atari_scsi_host->host_no, dma_stat & 0xff);
+       dsprintk(NDEBUG_INTR, instance, "NCR5380 interrupt, DMA status = %02x\n",
+                dma_stat & 0xff);
 
        /* Look if it was the DMA that has interrupted: First possibility
         * is that a bus error occurred...
@@ -304,7 +291,8 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
         * data reg!
         */
        if ((dma_stat & 0x02) && !(dma_stat & 0x40)) {
-               atari_dma_residual = HOSTDATA_DMALEN - (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
+               atari_dma_residual = hostdata->dma_len -
+                       (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
 
                dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
                           atari_dma_residual);
@@ -356,15 +344,17 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
 
 #endif /* REAL_DMA */
 
-       NCR5380_intr(irq, dummy);
+       NCR5380_intr(irq, dev);
 
        return IRQ_HANDLED;
 }
 
 
-static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
+static irqreturn_t scsi_falcon_intr(int irq, void *dev)
 {
 #ifdef REAL_DMA
+       struct Scsi_Host *instance = dev;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int dma_stat;
 
        /* Turn off DMA and select sector counter register before
@@ -399,7 +389,7 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
                        printk(KERN_ERR "SCSI DMA error: %ld bytes lost in "
                               "ST-DMA fifo\n", transferred & 15);
 
-               atari_dma_residual = HOSTDATA_DMALEN - transferred;
+               atari_dma_residual = hostdata->dma_len - transferred;
                dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
                           atari_dma_residual);
        } else
@@ -411,13 +401,14 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
                 * data to the original destination address.
                 */
                memcpy(atari_dma_orig_addr, phys_to_virt(atari_dma_startaddr),
-                      HOSTDATA_DMALEN - atari_dma_residual);
+                      hostdata->dma_len - atari_dma_residual);
                atari_dma_orig_addr = NULL;
        }
 
 #endif /* REAL_DMA */
 
-       NCR5380_intr(irq, dummy);
+       NCR5380_intr(irq, dev);
+
        return IRQ_HANDLED;
 }
 
@@ -488,7 +479,7 @@ static int __init atari_scsi_setup(char *str)
         * Defaults depend on TT or Falcon, determined at run time.
         * Negative values mean don't change.
         */
-       int ints[6];
+       int ints[8];
 
        get_options(str, ARRAY_SIZE(ints), ints);
 
@@ -504,10 +495,11 @@ static int __init atari_scsi_setup(char *str)
                setup_sg_tablesize = ints[3];
        if (ints[0] >= 4)
                setup_hostid = ints[4];
-#ifdef SUPPORT_TAGS
        if (ints[0] >= 5)
                setup_use_tagged_queuing = ints[5];
-#endif
+       /* ints[6] (use_pdma) is ignored */
+       if (ints[0] >= 7)
+               setup_toshiba_delay = ints[7];
 
        return 1;
 }
@@ -516,38 +508,6 @@ __setup("atascsi=", atari_scsi_setup);
 #endif /* !MODULE */
 
 
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-static void __init atari_scsi_reset_boot(void)
-{
-       unsigned long end;
-
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No messing
-        * with the queues, interrupts, or locks necessary here.
-        */
-
-       printk("Atari SCSI: resetting the SCSI bus...");
-
-       /* get in phase */
-       NCR5380_write(TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-
-       /* assert RST */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay(50);
-       /* reset RST and interrupt */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-
-       end = jiffies + AFTER_RESET_DELAY;
-       while (time_before(jiffies, end))
-               barrier();
-
-       printk(" done\n");
-}
-#endif
-
 #if defined(REAL_DMA)
 
 static unsigned long atari_scsi_dma_setup(struct Scsi_Host *instance,
@@ -815,14 +775,14 @@ static int atari_scsi_bus_reset(struct scsi_cmnd *cmd)
 static struct scsi_host_template atari_scsi_template = {
        .module                 = THIS_MODULE,
        .proc_name              = DRV_MODULE_NAME,
-       .show_info              = atari_scsi_show_info,
        .name                   = "Atari native SCSI",
        .info                   = atari_scsi_info,
        .queuecommand           = atari_scsi_queue_command,
        .eh_abort_handler       = atari_scsi_abort,
        .eh_bus_reset_handler   = atari_scsi_bus_reset,
        .this_id                = 7,
-       .use_clustering         = DISABLE_CLUSTERING
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
 };
 
 static int __init atari_scsi_probe(struct platform_device *pdev)
@@ -880,7 +840,7 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
        } else {
                /* Test if a host id is set in the NVRam */
                if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) {
-                       unsigned char b = nvram_read_byte(14);
+                       unsigned char b = nvram_read_byte(16);
 
                        /* Arbitration enabled? (for TOS)
                         * If yes, use configured host ID
@@ -915,21 +875,18 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
                error = -ENOMEM;
                goto fail_alloc;
        }
-       atari_scsi_host = instance;
-
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-       atari_scsi_reset_boot();
-#endif
 
        instance->irq = irq->start;
 
        host_flags |= IS_A_TT() ? 0 : FLAG_LATE_DMA_SETUP;
-
 #ifdef SUPPORT_TAGS
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
+       host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        if (IS_A_TT()) {
                error = request_irq(instance->irq, scsi_tt_intr, 0,
@@ -975,6 +932,8 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
 #endif
        }
 
+       NCR5380_maybe_reset_bus(instance);
+
        error = scsi_add_host(instance, NULL);
        if (error)
                goto fail_host;
@@ -989,6 +948,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
 fail_alloc:
        if (atari_dma_buffer)
index 4e7cad27246944f6e79cf21a56fb3f25f96e1234..bad5f32e1f670587ddedda3ec97104e1c2d4555a 100644 (file)
@@ -3,6 +3,7 @@ config BE2ISCSI
        depends on PCI && SCSI && NET
        select SCSI_ISCSI_ATTRS
        select ISCSI_BOOT_SYSFS
+       select IRQ_POLL
 
        help
        This driver implements the iSCSI functionality for Emulex
index 77f992e7472629eeeaedf3470ced43d02077d9ca..a41c6432f4446cf9082916a0859e08be3c0a70c7 100644 (file)
@@ -20,7 +20,7 @@
 
 #include <linux/pci.h>
 #include <linux/if_vlan.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
 #define FW_VER_LEN     32
 #define MCC_Q_LEN      128
 #define MCC_CQ_LEN     256
@@ -101,7 +101,7 @@ struct be_eq_obj {
        struct beiscsi_hba *phba;
        struct be_queue_info *cq;
        struct work_struct work_cqs; /* Work Item */
-       struct blk_iopoll       iopoll;
+       struct irq_poll iopoll;
 };
 
 struct be_mcc_obj {
index b7087ba69d8df1c2daa9ac7b1b8a9ab314257a8b..022e87b62e401a37e1d6578e065f389ce560a8b4 100644 (file)
@@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba)
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
                beiscsi_process_cq(pbe_eq);
-               blk_iopoll_enable(&pbe_eq->iopoll);
+               irq_poll_enable(&pbe_eq->iopoll);
        }
 }
 
index fe0c5143f8e68a6cb3e9618c98258c9e4118e1ae..cb9072a841be19cbfde9ff655fec50c523edf8b3 100644 (file)
@@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
        num_eq_processed = 0;
        while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
                                & EQE_VALID_MASK) {
-               if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-                       blk_iopoll_sched(&pbe_eq->iopoll);
+               irq_poll_sched(&pbe_eq->iopoll);
 
                AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
                queue_tail_inc(eq);
@@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
                        spin_unlock_irqrestore(&phba->isr_lock, flags);
                        num_mcceq_processed++;
                } else {
-                       if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-                               blk_iopoll_sched(&pbe_eq->iopoll);
+                       irq_poll_sched(&pbe_eq->iopoll);
                        num_ioeq_processed++;
                }
                AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
@@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work)
        hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
 }
 
-static int be_iopoll(struct blk_iopoll *iop, int budget)
+static int be_iopoll(struct irq_poll *iop, int budget)
 {
        unsigned int ret;
        struct beiscsi_hba *phba;
@@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget)
        pbe_eq->cq_count += ret;
        if (ret < budget) {
                phba = pbe_eq->phba;
-               blk_iopoll_complete(iop);
+               irq_poll_complete(iop);
                beiscsi_log(phba, KERN_INFO,
                            BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
                            "BM_%d : rearm pbe_eq->q.id =%d\n",
@@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
        }
 
        if (unload_state == BEISCSI_CLEAN_UNLOAD) {
@@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+               irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
                                be_iopoll);
-               blk_iopoll_enable(&pbe_eq->iopoll);
        }
 
        i = (phba->msix_enabled) ? i : 0;
@@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+               irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
                                be_iopoll);
-               blk_iopoll_enable(&pbe_eq->iopoll);
        }
 
        i = (phba->msix_enabled) ? i : 0;
@@ -5795,7 +5791,7 @@ free_blkenbld:
        destroy_workqueue(phba->wq);
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
        }
 free_twq:
        beiscsi_clean_port(phba);
index 0e2bee937fe81b345abee5ca20beec65f098f175..e22a268fd311b9a859166c57a32b3cf499fd6c26 100644 (file)
@@ -57,7 +57,7 @@ MODULE_PARM_DESC(cxgb3i_snd_win, "TCP send window in bytes (default=128KB)");
 
 static int cxgb3i_rx_credit_thres = 10 * 1024;
 module_param(cxgb3i_rx_credit_thres, int, 0644);
-MODULE_PARM_DESC(rx_credit_thres,
+MODULE_PARM_DESC(cxgb3i_rx_credit_thres,
                 "RX credits return threshold in bytes (default=10KB)");
 
 static unsigned int cxgb3i_max_connect = 8 * 1024;
index 3613581343159dc67e8980424b2346d301dec01f..93880ed6291cfbda8a88ad6130a433c0c461c727 100644 (file)
@@ -562,7 +562,7 @@ static int mode_select_handle_sense(struct scsi_device *sdev,
                        /*
                         * Command Lock contention
                         */
-                       err = SCSI_DH_RETRY;
+                       err = SCSI_DH_IMM_RETRY;
                break;
        default:
                break;
@@ -612,6 +612,8 @@ retry:
                err = mode_select_handle_sense(sdev, h->sense);
                if (err == SCSI_DH_RETRY && retry_cnt--)
                        goto retry;
+               if (err == SCSI_DH_IMM_RETRY)
+                       goto retry;
        }
        if (err == SCSI_DH_OK) {
                h->state = RDAC_STATE_ACTIVE;
index 3e088125a8be6a339757dc1380f1c73418952bfc..6c14e68b9e1a80aab149ed3f7556ce3e22de7280 100644 (file)
 
 #define DONT_USE_INTR
 
-#define NCR5380_read(reg)              inb(port + reg)
-#define NCR5380_write(reg, value)      outb(value, port + reg)
+#define NCR5380_read(reg)              inb(instance->io_port + reg)
+#define NCR5380_write(reg, value)      outb(value, instance->io_port + reg)
 
 #define NCR5380_implementation_fields  /* none */
-#define NCR5380_local_declare()                unsigned int port
-#define NCR5380_setup(instance)                port = instance->io_port
-
-/*
- * Includes needed for NCR5380.[ch] (XXX: Move them to NCR5380.h)
- */
-#include <linux/delay.h>
 
 #include "NCR5380.h"
 #include "NCR5380.c"
@@ -56,6 +49,7 @@
 
 
 static struct scsi_host_template dmx3191d_driver_template = {
+       .module                 = THIS_MODULE,
        .proc_name              = DMX3191D_DRIVER_NAME,
        .name                   = "Domex DMX3191D",
        .info                   = NCR5380_info,
@@ -67,6 +61,8 @@ static struct scsi_host_template dmx3191d_driver_template = {
        .sg_tablesize           = SG_ALL,
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int dmx3191d_probe_one(struct pci_dev *pdev,
@@ -97,17 +93,25 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
         */
        shost->irq = NO_IRQ;
 
-       NCR5380_init(shost, FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E);
+       error = NCR5380_init(shost, FLAG_NO_PSEUDO_DMA);
+       if (error)
+               goto out_host_put;
+
+       NCR5380_maybe_reset_bus(shost);
 
        pci_set_drvdata(pdev, shost);
 
        error = scsi_add_host(shost, &pdev->dev);
        if (error)
-               goto out_release_region;
+               goto out_exit;
 
        scsi_scan_host(shost);
        return 0;
 
+out_exit:
+       NCR5380_exit(shost);
+out_host_put:
+       scsi_host_put(shost);
  out_release_region:
        release_region(io, DMX3191D_REGION_LEN);
  out_disable_device:
@@ -119,15 +123,14 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
 static void dmx3191d_remove_one(struct pci_dev *pdev)
 {
        struct Scsi_Host *shost = pci_get_drvdata(pdev);
+       unsigned long io = shost->io_port;
 
        scsi_remove_host(shost);
 
        NCR5380_exit(shost);
-
-       release_region(shost->io_port, DMX3191D_REGION_LEN);
-       pci_disable_device(pdev);
-
        scsi_host_put(shost);
+       release_region(io, DMX3191D_REGION_LEN);
+       pci_disable_device(pdev);
 }
 
 static struct pci_device_id dmx3191d_pci_tbl[] = {
index 4c74c7ba2dff7cf6829684176c484ecbf356cea1..6c736b071cf4bcbaf8f5fb68b19192dc039dd293 100644 (file)
@@ -1,9 +1,5 @@
-
 #define PSEUDO_DMA
 #define DONT_USE_INTR
-#define UNSAFE                 /* Leave interrupts enabled during pseudo-dma I/O */
-#define DMA_WORKS_RIGHT
-
 
 /*
  * DTC 3180/3280 driver, by
 
 
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <scsi/scsi_host.h>
+
 #include "dtc.h"
 #define AUTOPROBE_IRQ
 #include "NCR5380.h"
@@ -150,7 +144,7 @@ static const struct signature {
 
 static int __init dtc_setup(char *str)
 {
-       static int commandline_current = 0;
+       static int commandline_current;
        int i;
        int ints[10];
 
@@ -188,7 +182,7 @@ __setup("dtc=", dtc_setup);
 
 static int __init dtc_detect(struct scsi_host_template * tpnt)
 {
-       static int current_override = 0, current_base = 0;
+       static int current_override, current_base;
        struct Scsi_Host *instance;
        unsigned int addr;
        void __iomem *base;
@@ -205,9 +199,8 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
                                addr = 0;
                } else
                        for (; !addr && (current_base < NO_BASES); ++current_base) {
-#if (DTCDEBUG & DTCDEBUG_INIT)
-                               printk(KERN_DEBUG "scsi-dtc : probing address %08x\n", bases[current_base].address);
-#endif
+                               dprintk(NDEBUG_INIT, "dtc: probing address 0x%08x\n",
+                                       (unsigned int)bases[current_base].address);
                                if (bases[current_base].noauto)
                                        continue;
                                base = ioremap(bases[current_base].address, 0x2000);
@@ -216,18 +209,14 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
                                for (sig = 0; sig < NO_SIGNATURES; ++sig) {
                                        if (check_signature(base + signatures[sig].offset, signatures[sig].string, strlen(signatures[sig].string))) {
                                                addr = bases[current_base].address;
-#if (DTCDEBUG & DTCDEBUG_INIT)
-                                               printk(KERN_DEBUG "scsi-dtc : detected board.\n");
-#endif
+                                               dprintk(NDEBUG_INIT, "dtc: detected board\n");
                                                goto found;
                                        }
                                }
                                iounmap(base);
                        }
 
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
-               printk(KERN_DEBUG "scsi-dtc : base = %08x\n", addr);
-#endif
+               dprintk(NDEBUG_INIT, "dtc: addr = 0x%08x\n", addr);
 
                if (!addr)
                        break;
@@ -235,12 +224,15 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
 found:
                instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
                if (instance == NULL)
-                       break;
+                       goto out_unmap;
 
                instance->base = addr;
                ((struct NCR5380_hostdata *)(instance)->hostdata)->base = base;
 
-               NCR5380_init(instance, 0);
+               if (NCR5380_init(instance, FLAG_NO_DMA_FIXUP))
+                       goto out_unregister;
+
+               NCR5380_maybe_reset_bus(instance);
 
                NCR5380_write(DTC_CONTROL_REG, CSR_5380_INTR);  /* Enable int's */
                if (overrides[current_override].irq != IRQ_AUTO)
@@ -271,14 +263,19 @@ found:
                        printk(KERN_WARNING "scsi%d : interrupts not used. Might as well not jumper it.\n", instance->host_no);
                instance->irq = NO_IRQ;
 #endif
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
-               printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+               dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+                       instance->host_no, instance->irq);
 
                ++current_override;
                ++count;
        }
        return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_unmap:
+       iounmap(base);
+       return count;
 }
 
 /*
@@ -331,12 +328,8 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
        unsigned char *d = dst;
        int i;                  /* For counting time spent in the poll-loop */
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
        i = 0;
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-       NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
        if (instance->irq == NO_IRQ)
                NCR5380_write(DTC_CONTROL_REG, CSR_DIR_READ);
        else
@@ -348,7 +341,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
                while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
                        ++i;
                rtrc(3);
-               memcpy_fromio(d, base + DTC_DATA_BUF, 128);
+               memcpy_fromio(d, hostdata->base + DTC_DATA_BUF, 128);
                d += 128;
                len -= 128;
                rtrc(7);
@@ -358,9 +351,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
        rtrc(4);
        while (!(NCR5380_read(DTC_CONTROL_REG) & D_CR_ACCESS))
                ++i;
-       NCR5380_write(MODE_REG, 0);     /* Clear the operating mode */
        rtrc(0);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        if (i > hostdata->spin_max_r)
                hostdata->spin_max_r = i;
        return (0);
@@ -383,12 +374,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
 {
        int i;
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-       NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
-       /* set direction (write) */
        if (instance->irq == NO_IRQ)
                NCR5380_write(DTC_CONTROL_REG, 0);
        else
@@ -400,7 +386,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
                while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
                        ++i;
                rtrc(3);
-               memcpy_toio(base + DTC_DATA_BUF, src, 128);
+               memcpy_toio(hostdata->base + DTC_DATA_BUF, src, 128);
                src += 128;
                len -= 128;
        }
@@ -413,47 +399,60 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
                ++i;
        rtrc(7);
        /* Check for parity error here. fixme. */
-       NCR5380_write(MODE_REG, 0);     /* Clear the operating mode */
        rtrc(0);
        if (i > hostdata->spin_max_w)
                hostdata->spin_max_w = i;
        return (0);
 }
 
+static int dtc_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+       int transfersize = cmd->transfersize;
+
+       /* Limit transfers to 32K, for xx400 & xx406
+        * pseudoDMA that transfers in 128 bytes blocks.
+        */
+       if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+           !(cmd->SCp.this_residual % transfersize))
+               transfersize = 32 * 1024;
+
+       return transfersize;
+}
+
 MODULE_LICENSE("GPL");
 
 #include "NCR5380.c"
 
 static int dtc_release(struct Scsi_Host *shost)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(shost);
+       struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
-       iounmap(base);
+       iounmap(hostdata->base);
        return 0;
 }
 
 static struct scsi_host_template driver_template = {
-       .name                           = "DTC 3180/3280 ",
-       .detect                         = dtc_detect,
-       .release                        = dtc_release,
-       .proc_name                      = "dtc3x80",
-       .show_info                      = dtc_show_info,
-       .write_info                     = dtc_write_info,
-       .info                           = dtc_info,
-       .queuecommand                   = dtc_queue_command,
-       .eh_abort_handler               = dtc_abort,
-       .eh_bus_reset_handler           = dtc_bus_reset,
-       .bios_param                     = dtc_biosparam,
-       .can_queue                      = CAN_QUEUE,
-       .this_id                        = 7,
-       .sg_tablesize                   = SG_ALL,
-       .cmd_per_lun                    = CMD_PER_LUN,
-       .use_clustering                 = DISABLE_CLUSTERING,
+       .name                   = "DTC 3180/3280",
+       .detect                 = dtc_detect,
+       .release                = dtc_release,
+       .proc_name              = "dtc3x80",
+       .show_info              = dtc_show_info,
+       .write_info             = dtc_write_info,
+       .info                   = dtc_info,
+       .queuecommand           = dtc_queue_command,
+       .eh_abort_handler       = dtc_abort,
+       .eh_bus_reset_handler   = dtc_bus_reset,
+       .bios_param             = dtc_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
index 78a2332e9064ee6b58372943ec0c327d2a74b26f..56732cba8abad0110f04532239d62fa1b8163c91 100644 (file)
 #ifndef DTC3280_H
 #define DTC3280_H
 
-#define DTCDEBUG 0
-#define DTCDEBUG_INIT  0x1
-#define DTCDEBUG_TRANSFER 0x2
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32 
-#endif
-
 #define NCR5380_implementation_fields \
     void __iomem *base
 
-#define NCR5380_local_declare() \
-    void __iomem *base
-
-#define NCR5380_setup(instance) \
-    base = ((struct NCR5380_hostdata *)(instance)->hostdata)->base
+#define DTC_address(reg) \
+       (((struct NCR5380_hostdata *)shost_priv(instance))->base + DTC_5380_OFFSET + reg)
 
-#define DTC_address(reg) (base + DTC_5380_OFFSET + reg)
-
-#define dbNCR5380_read(reg)                                              \
-    (rval=readb(DTC_address(reg)), \
-     (((unsigned char) printk("DTC : read register %d at addr %p is: %02x\n"\
-    , (reg), DTC_address(reg), rval)), rval ) )
-
-#define dbNCR5380_write(reg, value) do {                                  \
-    printk("DTC : write %02x to register %d at address %p\n",         \
-            (value), (reg), DTC_address(reg));     \
-    writeb(value, DTC_address(reg));} while(0)
-
-
-#if !(DTCDEBUG & DTCDEBUG_TRANSFER) 
 #define NCR5380_read(reg) (readb(DTC_address(reg)))
 #define NCR5380_write(reg, value) (writeb(value, DTC_address(reg)))
-#else
-#define NCR5380_read(reg) (readb(DTC_address(reg)))
-#define xNCR5380_read(reg)                                             \
-    (((unsigned char) printk("DTC : read register %d at address %p\n"\
-    , (reg), DTC_address(reg))), readb(DTC_address(reg)))
 
-#define NCR5380_write(reg, value) do {                                 \
-    printk("DTC : write %02x to register %d at address %p\n",  \
-           (value), (reg), DTC_address(reg));  \
-    writeb(value, DTC_address(reg));} while(0)
-#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+        dtc_dma_xfer_len(cmd)
 
 #define NCR5380_intr                   dtc_intr
 #define NCR5380_queue_command          dtc_queue_command
index f8d2478b11cc7a53453899db7b256543c7f21a33..90091e69302025cbbaaf1d5836ebfe1952b74e6e 100644 (file)
  *     
  */
 
-/* settings for DTC3181E card with only Mustek scanner attached */
-#define USLEEP_POLL    msecs_to_jiffies(10)
-#define USLEEP_SLEEP   msecs_to_jiffies(200)
-#define USLEEP_WAITLONG        msecs_to_jiffies(5000)
-
 #define AUTOPROBE_IRQ
 
 #ifdef CONFIG_SCSI_GENERIC_NCR53C400
-#define NCR53C400_PSEUDO_DMA 1
 #define PSEUDO_DMA
-#define NCR53C400
 #endif
 
 #include <asm/io.h>
-#include <linux/signal.h>
 #include <linux/blkdev.h>
+#include <linux/module.h>
 #include <scsi/scsi_host.h>
 #include "g_NCR5380.h"
 #include "NCR5380.h"
-#include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/isapnp.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
 
-#define NCR_NOT_SET 0
-static int ncr_irq = NCR_NOT_SET;
-static int ncr_dma = NCR_NOT_SET;
-static int ncr_addr = NCR_NOT_SET;
-static int ncr_5380 = NCR_NOT_SET;
-static int ncr_53c400 = NCR_NOT_SET;
-static int ncr_53c400a = NCR_NOT_SET;
-static int dtc_3181e = NCR_NOT_SET;
+static int ncr_irq;
+static int ncr_dma;
+static int ncr_addr;
+static int ncr_5380;
+static int ncr_53c400;
+static int ncr_53c400a;
+static int dtc_3181e;
+static int hp_c2502;
 
 static struct override {
        NCR5380_map_type NCR5380_map_name;
@@ -121,7 +112,7 @@ static struct override {
 
 static void __init internal_setup(int board, char *str, int *ints)
 {
-       static int commandline_current = 0;
+       static int commandline_current;
        switch (board) {
        case BOARD_NCR5380:
                if (ints[0] != 2 && ints[0] != 3) {
@@ -235,6 +226,30 @@ static int __init do_DTC3181E_setup(char *str)
 
 #endif
 
+#ifndef SCSI_G_NCR5380_MEM
+/*
+ * Configure I/O address of 53C400A or DTC436 by writing magic numbers
+ * to ports 0x779 and 0x379.
+ */
+static void magic_configure(int idx, u8 irq, u8 magic[])
+{
+       u8 cfg = 0;
+
+       outb(magic[0], 0x779);
+       outb(magic[1], 0x379);
+       outb(magic[2], 0x379);
+       outb(magic[3], 0x379);
+       outb(magic[4], 0x379);
+
+       /* allowed IRQs for HP C2502 */
+       if (irq != 2 && irq != 3 && irq != 4 && irq != 5 && irq != 7)
+               irq = 0;
+       if (idx >= 0 && idx <= 7)
+               cfg = 0x80 | idx | (irq << 4);
+       outb(cfg, 0x379);
+}
+#endif
+
 /**
  *     generic_NCR5380_detect  -       look for NCR5380 controllers
  *     @tpnt: the scsi template
@@ -243,19 +258,18 @@ static int __init do_DTC3181E_setup(char *str)
  *     and DTC436(ISAPnP) controllers. If overrides have been set we use
  *     them.
  *
- *     The caller supplied NCR5380_init function is invoked from here, before
- *     the interrupt line is taken.
- *
  *     Locks: none
  */
 
 static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
 {
-       static int current_override = 0;
+       static int current_override;
        int count;
        unsigned int *ports;
+       u8 *magic = NULL;
 #ifndef SCSI_G_NCR5380_MEM
        int i;
+       int port_idx = -1;
        unsigned long region_size = 16;
 #endif
        static unsigned int __initdata ncr_53c400a_ports[] = {
@@ -264,27 +278,36 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
        static unsigned int __initdata dtc_3181e_ports[] = {
                0x220, 0x240, 0x280, 0x2a0, 0x2c0, 0x300, 0x320, 0x340, 0
        };
-       int flags = 0;
+       static u8 ncr_53c400a_magic[] __initdata = {    /* 53C400A & DTC436 */
+               0x59, 0xb9, 0xc5, 0xae, 0xa6
+       };
+       static u8 hp_c2502_magic[] __initdata = {       /* HP C2502 */
+               0x0f, 0x22, 0xf0, 0x20, 0x80
+       };
+       int flags;
        struct Scsi_Host *instance;
+       struct NCR5380_hostdata *hostdata;
 #ifdef SCSI_G_NCR5380_MEM
        unsigned long base;
        void __iomem *iomem;
 #endif
 
-       if (ncr_irq != NCR_NOT_SET)
+       if (ncr_irq)
                overrides[0].irq = ncr_irq;
-       if (ncr_dma != NCR_NOT_SET)
+       if (ncr_dma)
                overrides[0].dma = ncr_dma;
-       if (ncr_addr != NCR_NOT_SET)
+       if (ncr_addr)
                overrides[0].NCR5380_map_name = (NCR5380_map_type) ncr_addr;
-       if (ncr_5380 != NCR_NOT_SET)
+       if (ncr_5380)
                overrides[0].board = BOARD_NCR5380;
-       else if (ncr_53c400 != NCR_NOT_SET)
+       else if (ncr_53c400)
                overrides[0].board = BOARD_NCR53C400;
-       else if (ncr_53c400a != NCR_NOT_SET)
+       else if (ncr_53c400a)
                overrides[0].board = BOARD_NCR53C400A;
-       else if (dtc_3181e != NCR_NOT_SET)
+       else if (dtc_3181e)
                overrides[0].board = BOARD_DTC3181E;
+       else if (hp_c2502)
+               overrides[0].board = BOARD_HP_C2502;
 #ifndef SCSI_G_NCR5380_MEM
        if (!current_override && isapnp_present()) {
                struct pnp_dev *dev = NULL;
@@ -318,41 +341,45 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                }
        }
 #endif
-       tpnt->proc_name = "g_NCR5380";
 
        for (count = 0; current_override < NO_OVERRIDES; ++current_override) {
                if (!(overrides[current_override].NCR5380_map_name))
                        continue;
 
                ports = NULL;
+               flags = 0;
                switch (overrides[current_override].board) {
                case BOARD_NCR5380:
                        flags = FLAG_NO_PSEUDO_DMA;
                        break;
                case BOARD_NCR53C400:
-                       flags = FLAG_NCR53C400;
+#ifdef PSEUDO_DMA
+                       flags = FLAG_NO_DMA_FIXUP;
+#endif
                        break;
                case BOARD_NCR53C400A:
-                       flags = FLAG_NO_PSEUDO_DMA;
+                       flags = FLAG_NO_DMA_FIXUP;
+                       ports = ncr_53c400a_ports;
+                       magic = ncr_53c400a_magic;
+                       break;
+               case BOARD_HP_C2502:
+                       flags = FLAG_NO_DMA_FIXUP;
                        ports = ncr_53c400a_ports;
+                       magic = hp_c2502_magic;
                        break;
                case BOARD_DTC3181E:
-                       flags = FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E;
+                       flags = FLAG_NO_DMA_FIXUP;
                        ports = dtc_3181e_ports;
+                       magic = ncr_53c400a_magic;
                        break;
                }
 
 #ifndef SCSI_G_NCR5380_MEM
-               if (ports) {
+               if (ports && magic) {
                        /* wakeup sequence for the NCR53C400A and DTC3181E */
 
                        /* Disable the adapter and look for a free io port */
-                       outb(0x59, 0x779);
-                       outb(0xb9, 0x379);
-                       outb(0xc5, 0x379);
-                       outb(0xae, 0x379);
-                       outb(0xa6, 0x379);
-                       outb(0x00, 0x379);
+                       magic_configure(-1, 0, magic);
 
                        if (overrides[current_override].NCR5380_map_name != PORT_AUTO)
                                for (i = 0; ports[i]; i++) {
@@ -371,17 +398,12 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                                }
                        if (ports[i]) {
                                /* At this point we have our region reserved */
-                               outb(0x59, 0x779);
-                               outb(0xb9, 0x379);
-                               outb(0xc5, 0x379);
-                               outb(0xae, 0x379);
-                               outb(0xa6, 0x379);
-                               outb(0x80 | i, 0x379);  /* set io port to be used */
+                               magic_configure(i, 0, magic); /* no IRQ yet */
                                outb(0xc0, ports[i] + 9);
                                if (inb(ports[i] + 9) != 0x80)
                                        continue;
-                               else
-                                       overrides[current_override].NCR5380_map_name = ports[i];
+                               overrides[current_override].NCR5380_map_name = ports[i];
+                               port_idx = i;
                        } else
                                continue;
                }
@@ -403,24 +425,65 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                }
 #endif
                instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
-               if (instance == NULL) {
-#ifndef SCSI_G_NCR5380_MEM
-                       release_region(overrides[current_override].NCR5380_map_name, region_size);
-#else
-                       iounmap(iomem);
-                       release_mem_region(base, NCR5380_region_size);
-#endif
-                       continue;
-               }
+               if (instance == NULL)
+                       goto out_release;
+               hostdata = shost_priv(instance);
 
-               instance->NCR5380_instance_name = overrides[current_override].NCR5380_map_name;
 #ifndef SCSI_G_NCR5380_MEM
+               instance->io_port = overrides[current_override].NCR5380_map_name;
                instance->n_io_port = region_size;
+               hostdata->io_width = 1; /* 8-bit PDMA by default */
+
+               /*
+                * On NCR53C400 boards, NCR5380 registers are mapped 8 past
+                * the base address.
+                */
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+                       instance->io_port += 8;
+                       hostdata->c400_ctl_status = 0;
+                       hostdata->c400_blk_cnt = 1;
+                       hostdata->c400_host_buf = 4;
+                       break;
+               case BOARD_DTC3181E:
+                       hostdata->io_width = 2; /* 16-bit PDMA */
+                       /* fall through */
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       hostdata->c400_ctl_status = 9;
+                       hostdata->c400_blk_cnt = 10;
+                       hostdata->c400_host_buf = 8;
+                       break;
+               }
 #else
-               ((struct NCR5380_hostdata *)instance->hostdata)->iomem = iomem;
+               instance->base = overrides[current_override].NCR5380_map_name;
+               hostdata->iomem = iomem;
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+                       hostdata->c400_ctl_status = 0x100;
+                       hostdata->c400_blk_cnt = 0x101;
+                       hostdata->c400_host_buf = 0x104;
+                       break;
+               case BOARD_DTC3181E:
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       pr_err(DRV_MODULE_NAME ": unknown register offsets\n");
+                       goto out_unregister;
+               }
 #endif
 
-               NCR5380_init(instance, flags);
+               if (NCR5380_init(instance, flags))
+                       goto out_unregister;
+
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+               case BOARD_DTC3181E:
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+               }
+
+               NCR5380_maybe_reset_bus(instance);
 
                if (overrides[current_override].irq != IRQ_AUTO)
                        instance->irq = overrides[current_override].irq;
@@ -431,12 +494,18 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                if (instance->irq == 255)
                        instance->irq = NO_IRQ;
 
-               if (instance->irq != NO_IRQ)
+               if (instance->irq != NO_IRQ) {
+#ifndef SCSI_G_NCR5380_MEM
+                       /* set IRQ for HP C2502 */
+                       if (overrides[current_override].board == BOARD_HP_C2502)
+                               magic_configure(port_idx, instance->irq, magic);
+#endif
                        if (request_irq(instance->irq, generic_NCR5380_intr,
                                        0, "NCR5380", instance)) {
                                printk(KERN_WARNING "scsi%d : IRQ%d not free, interrupts disabled\n", instance->host_no, instance->irq);
                                instance->irq = NO_IRQ;
                        }
+               }
 
                if (instance->irq == NO_IRQ) {
                        printk(KERN_INFO "scsi%d : interrupts not enabled. for better interactive performance,\n", instance->host_no);
@@ -447,6 +516,17 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                ++count;
        }
        return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_release:
+#ifndef SCSI_G_NCR5380_MEM
+       release_region(overrides[current_override].NCR5380_map_name, region_size);
+#else
+       iounmap(iomem);
+       release_mem_region(base, NCR5380_region_size);
+#endif
+       return count;
 }
 
 /**
@@ -460,21 +540,15 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
  
 static int generic_NCR5380_release_resources(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       
        if (instance->irq != NO_IRQ)
                free_irq(instance->irq, instance);
        NCR5380_exit(instance);
-
 #ifndef SCSI_G_NCR5380_MEM
-       release_region(instance->NCR5380_instance_name, instance->n_io_port);
+       release_region(instance->io_port, instance->n_io_port);
 #else
        iounmap(((struct NCR5380_hostdata *)instance->hostdata)->iomem);
-       release_mem_region(instance->NCR5380_instance_name, NCR5380_region_size);
+       release_mem_region(instance->base, NCR5380_region_size);
 #endif
-
-
        return 0;
 }
 
@@ -507,7 +581,7 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
 }
 #endif
 
-#ifdef NCR53C400_PSEUDO_DMA
+#ifdef PSEUDO_DMA
 
 /**
  *     NCR5380_pread           -       pseudo DMA read
@@ -521,75 +595,68 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
  
 static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int blocks = len / 128;
        int start = 0;
-       int bl;
-
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
-       NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE | CSR_TRANS_DIR);
-       NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE | CSR_TRANS_DIR);
+       NCR5380_write(hostdata->c400_blk_cnt, blocks);
        while (1) {
-               if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+               if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
                        break;
-               }
-               if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+               if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
                        printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
                        return -1;
                }
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY);
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+                       ; /* FIXME - no timeout */
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       int i;
-                       for (i = 0; i < 128; i++)
-                               dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
-               }
+               if (hostdata->io_width == 2)
+                       insw(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 64);
+               else
+                       insb(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+               memcpy_fromio(dst + start,
+                             hostdata->iomem + NCR53C400_host_buffer, 128);
 #endif
                start += 128;
                blocks--;
        }
 
        if (blocks) {
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
-               {
-                       // FIXME - no timeout
-               }
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+                       ; /* FIXME - no timeout */
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       int i;  
-                       for (i = 0; i < 128; i++)
-                               dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
-               }
+               if (hostdata->io_width == 2)
+                       insw(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 64);
+               else
+                       insb(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+               memcpy_fromio(dst + start,
+                             hostdata->iomem + NCR53C400_host_buffer, 128);
 #endif
                start += 128;
                blocks--;
        }
 
-       if (!(NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
+       if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
                printk("53C400r: no 53C80 gated irq after transfer");
 
-#if 0
-       /*
-        *      DON'T DO THIS - THEY NEVER ARRIVE!
-        */
-       printk("53C400r: Waiting for 53C80 registers\n");
-       while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG)
+       /* wait for 53C80 registers to be available */
+       while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
                ;
-#endif
+
        if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
                printk(KERN_ERR "53C400r: no end dma signal\n");
                
-       NCR5380_write(MODE_REG, MR_BASE);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        return 0;
 }
 
@@ -605,89 +672,91 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
 
 static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int blocks = len / 128;
        int start = 0;
-       int bl;
-       int i;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
-       NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
-       NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+       NCR5380_write(hostdata->c400_blk_cnt, blocks);
        while (1) {
-               if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+               if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
                        printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
                        return -1;
                }
 
-               if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+               if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
                        break;
-               }
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
                        ; // FIXME - timeout
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       for (i = 0; i < 128; i++)
-                               NCR5380_write(C400_HOST_BUFFER, src[start + i]);
-               }
+               if (hostdata->io_width == 2)
+                       outsw(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 64);
+               else
+                       outsb(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+               memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+                           src + start, 128);
 #endif
                start += 128;
                blocks--;
        }
        if (blocks) {
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
                        ; // FIXME - no timeout
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       for (i = 0; i < 128; i++)
-                               NCR5380_write(C400_HOST_BUFFER, src[start + i]);
-               }
+               if (hostdata->io_width == 2)
+                       outsw(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 64);
+               else
+                       outsb(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+               memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+                           src + start, 128);
 #endif
                start += 128;
                blocks--;
        }
 
-#if 0
-       printk("53C400w: waiting for registers to be available\n");
-       THEY NEVER DO ! while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG);
-       printk("53C400w: Got em\n");
-#endif
-
-       /* Let's wait for this instead - could be ugly */
-       /* All documentation says to check for this. Maybe my hardware is too
-        * fast. Waiting for it seems to work fine! KLL
-        */
-       while (!(i = NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
-               ;       // FIXME - no timeout
-
-       /*
-        * I know. i is certainly != 0 here but the loop is new. See previous
-        * comment.
-        */
-       if (i) {
-               if (!((i = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_END_DMA_TRANSFER))
-                       printk(KERN_ERR "53C400w: No END OF DMA bit - WHOOPS! BASR=%0x\n", i);
-       } else
-               printk(KERN_ERR "53C400w: no 53C80 gated irq after transfer (last block)\n");
+       /* wait for 53C80 registers to be available */
+       while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
+               udelay(4); /* DTC436 chip hangs without this */
+               /* FIXME - no timeout */
+       }
 
-#if 0
        if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
                printk(KERN_ERR "53C400w: no end dma signal\n");
        }
-#endif
+
        while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
                ;       // TIMEOUT
        return 0;
 }
-#endif                         /* PSEUDO_DMA */
+
+static int generic_NCR5380_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+       int transfersize = cmd->transfersize;
+
+       /* Limit transfers to 32K, for xx400 & xx406
+        * pseudoDMA that transfers in 128 bytes blocks.
+        */
+       if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+           !(cmd->SCp.this_residual % transfersize))
+               transfersize = 32 * 1024;
+
+       /* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
+       if (transfersize % 128)
+               transfersize = 0;
+
+       return transfersize;
+}
+
+#endif /* PSEUDO_DMA */
 
 /*
  *     Include the NCR5380 core code that we build our driver around   
@@ -696,22 +765,24 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
-       .show_info              = generic_NCR5380_show_info,
-       .name                   = "Generic NCR5380/NCR53C400 SCSI",
-       .detect                 = generic_NCR5380_detect,
-       .release                = generic_NCR5380_release_resources,
-       .info                   = generic_NCR5380_info,
-       .queuecommand           = generic_NCR5380_queue_command,
+       .proc_name              = DRV_MODULE_NAME,
+       .name                   = "Generic NCR5380/NCR53C400 SCSI",
+       .detect                 = generic_NCR5380_detect,
+       .release                = generic_NCR5380_release_resources,
+       .info                   = generic_NCR5380_info,
+       .queuecommand           = generic_NCR5380_queue_command,
        .eh_abort_handler       = generic_NCR5380_abort,
        .eh_bus_reset_handler   = generic_NCR5380_bus_reset,
-       .bios_param             = NCR5380_BIOSPARAM,
-       .can_queue              = CAN_QUEUE,
-        .this_id               = 7,
-        .sg_tablesize          = SG_ALL,
-       .cmd_per_lun            = CMD_PER_LUN,
-        .use_clustering                = DISABLE_CLUSTERING,
+       .bios_param             = NCR5380_BIOSPARAM,
+       .can_queue              = 16,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
-#include <linux/module.h>
+
 #include "scsi_module.c"
 
 module_param(ncr_irq, int, 0);
@@ -721,6 +792,7 @@ module_param(ncr_5380, int, 0);
 module_param(ncr_53c400, int, 0);
 module_param(ncr_53c400a, int, 0);
 module_param(dtc_3181e, int, 0);
+module_param(hp_c2502, int, 0);
 MODULE_LICENSE("GPL");
 
 #if !defined(SCSI_G_NCR5380_MEM) && defined(MODULE)
index bea1a3b9b862dfd4b944072af3fbab22344bd66c..6f3d2ac4f185c2ff64f6c6a41721f604f08edab7 100644 (file)
 #ifndef GENERIC_NCR5380_H
 #define GENERIC_NCR5380_H
 
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define BIOSPARAM
 #define NCR5380_BIOSPARAM generic_NCR5380_biosparam
 #else
 #define NCR5380_BIOSPARAM NULL
 #endif
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 16
-#endif
-
 #define __STRVAL(x) #x
 #define STRVAL(x) __STRVAL(x)
 
 #ifndef SCSI_G_NCR5380_MEM
+#define DRV_MODULE_NAME "g_NCR5380"
 
-#define NCR5380_map_config port
 #define NCR5380_map_type int
 #define NCR5380_map_name port
-#define NCR5380_instance_name io_port
-#define NCR53C400_register_offset 0
-#define NCR53C400_address_adjust 8
 
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define NCR5380_region_size 16
 #else
 #define NCR5380_region_size 8
 #endif
 
-#define NCR5380_read(reg) (inb(NCR5380_map_name + (reg)))
-#define NCR5380_write(reg, value) (outb((value), (NCR5380_map_name + (reg))))
+#define NCR5380_read(reg) \
+       inb(instance->io_port + (reg))
+#define NCR5380_write(reg, value) \
+       outb(value, instance->io_port + (reg))
 
 #define NCR5380_implementation_fields \
-    NCR5380_map_type NCR5380_map_name
-
-#define NCR5380_local_declare() \
-    register NCR5380_implementation_fields
-
-#define NCR5380_setup(instance) \
-    NCR5380_map_name = (NCR5380_map_type)((instance)->NCR5380_instance_name)
+       int c400_ctl_status; \
+       int c400_blk_cnt; \
+       int c400_host_buf; \
+       int io_width;
 
 #else 
 /* therefore SCSI_G_NCR5380_MEM */
+#define DRV_MODULE_NAME "g_NCR5380_mmio"
 
-#define NCR5380_map_config memory
 #define NCR5380_map_type unsigned long
 #define NCR5380_map_name base
-#define NCR5380_instance_name base
-#define NCR53C400_register_offset 0x108
-#define NCR53C400_address_adjust 0
 #define NCR53C400_mem_base 0x3880
 #define NCR53C400_host_buffer 0x3900
 #define NCR5380_region_size 0x3a00
 
-#define NCR5380_read(reg) readb(iomem + NCR53C400_mem_base + (reg))
-#define NCR5380_write(reg, value) writeb(value, iomem + NCR53C400_mem_base + (reg))
+#define NCR5380_read(reg) \
+       readb(((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+             NCR53C400_mem_base + (reg))
+#define NCR5380_write(reg, value) \
+       writeb(value, ((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+              NCR53C400_mem_base + (reg))
 
 #define NCR5380_implementation_fields \
-    NCR5380_map_type NCR5380_map_name; \
-    void __iomem *iomem;
-
-#define NCR5380_local_declare() \
-    register void __iomem *iomem
-
-#define NCR5380_setup(instance) \
-    iomem = (((struct NCR5380_hostdata *)(instance)->hostdata)->iomem)
+       void __iomem *iomem; \
+       int c400_ctl_status; \
+       int c400_blk_cnt; \
+       int c400_host_buf;
 
 #endif
 
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+        generic_NCR5380_dma_xfer_len(cmd)
+
 #define NCR5380_intr generic_NCR5380_intr
 #define NCR5380_queue_command generic_NCR5380_queue_command
 #define NCR5380_abort generic_NCR5380_abort
 #define BOARD_NCR53C400        1
 #define BOARD_NCR53C400A 2
 #define BOARD_DTC3181E 3
+#define BOARD_HP_C2502 4
 
-#endif /* ndef ASM */
 #endif /* GENERIC_NCR5380_H */
 
index 37a0c7156087905b8cb749103e71d92547091e2e..d1dd1616f983bb7a567d4eb465e09a5c9cd611c5 100644 (file)
@@ -1,5 +1,7 @@
 config SCSI_HISI_SAS
        tristate "HiSilicon SAS"
+       depends on HAS_DMA && HAS_IOMEM
+       depends on ARM64 || COMPILE_TEST
        select SCSI_SAS_LIBSAS
        select BLK_DEV_INTEGRITY
        help
index d54381149c0d513b16cb355b5ec632636073321c..eea24d7531cf51eec585afbe0575a0dc6d804279 100644 (file)
 /* ITCT header */
 /* qw0 */
 #define ITCT_HDR_DEV_TYPE_OFF          0
-#define ITCT_HDR_DEV_TYPE_MSK          (0x3 << ITCT_HDR_DEV_TYPE_OFF)
+#define ITCT_HDR_DEV_TYPE_MSK          (0x3ULL << ITCT_HDR_DEV_TYPE_OFF)
 #define ITCT_HDR_VALID_OFF             2
-#define ITCT_HDR_VALID_MSK             (0x1 << ITCT_HDR_VALID_OFF)
-#define ITCT_HDR_BREAK_REPLY_ENA_OFF   3
-#define ITCT_HDR_BREAK_REPLY_ENA_MSK   (0x1 << ITCT_HDR_BREAK_REPLY_ENA_OFF)
+#define ITCT_HDR_VALID_MSK             (0x1ULL << ITCT_HDR_VALID_OFF)
 #define ITCT_HDR_AWT_CONTROL_OFF       4
-#define ITCT_HDR_AWT_CONTROL_MSK       (0x1 << ITCT_HDR_AWT_CONTROL_OFF)
+#define ITCT_HDR_AWT_CONTROL_MSK       (0x1ULL << ITCT_HDR_AWT_CONTROL_OFF)
 #define ITCT_HDR_MAX_CONN_RATE_OFF     5
-#define ITCT_HDR_MAX_CONN_RATE_MSK     (0xf << ITCT_HDR_MAX_CONN_RATE_OFF)
+#define ITCT_HDR_MAX_CONN_RATE_MSK     (0xfULL << ITCT_HDR_MAX_CONN_RATE_OFF)
 #define ITCT_HDR_VALID_LINK_NUM_OFF    9
-#define ITCT_HDR_VALID_LINK_NUM_MSK    (0xf << ITCT_HDR_VALID_LINK_NUM_OFF)
+#define ITCT_HDR_VALID_LINK_NUM_MSK    (0xfULL << ITCT_HDR_VALID_LINK_NUM_OFF)
 #define ITCT_HDR_PORT_ID_OFF           13
-#define ITCT_HDR_PORT_ID_MSK           (0x7 << ITCT_HDR_PORT_ID_OFF)
+#define ITCT_HDR_PORT_ID_MSK           (0x7ULL << ITCT_HDR_PORT_ID_OFF)
 #define ITCT_HDR_SMP_TIMEOUT_OFF       16
-#define ITCT_HDR_SMP_TIMEOUT_MSK       (0xffff << ITCT_HDR_SMP_TIMEOUT_OFF)
-#define ITCT_HDR_MAX_BURST_BYTES_OFF   16
-#define ITCT_HDR_MAX_BURST_BYTES_MSK   (0xffffffff << \
-                                       ITCT_MAX_BURST_BYTES_OFF)
+#define ITCT_HDR_SMP_TIMEOUT_MSK       (0xffffULL << ITCT_HDR_SMP_TIMEOUT_OFF)
 /* qw1 */
 #define ITCT_HDR_MAX_SAS_ADDR_OFF      0
 #define ITCT_HDR_MAX_SAS_ADDR_MSK      (0xffffffffffffffff << \
                                        ITCT_HDR_MAX_SAS_ADDR_OFF)
 /* qw2 */
 #define ITCT_HDR_IT_NEXUS_LOSS_TL_OFF  0
-#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK  (0xffff << \
+#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK  (0xffffULL << \
                                        ITCT_HDR_IT_NEXUS_LOSS_TL_OFF)
 #define ITCT_HDR_BUS_INACTIVE_TL_OFF   16
-#define ITCT_HDR_BUS_INACTIVE_TL_MSK   (0xffff << \
+#define ITCT_HDR_BUS_INACTIVE_TL_MSK   (0xffffULL << \
                                        ITCT_HDR_BUS_INACTIVE_TL_OFF)
 #define ITCT_HDR_MAX_CONN_TL_OFF       32
-#define ITCT_HDR_MAX_CONN_TL_MSK       (0xffff << \
+#define ITCT_HDR_MAX_CONN_TL_MSK       (0xffffULL << \
                                        ITCT_HDR_MAX_CONN_TL_OFF)
 #define ITCT_HDR_REJ_OPEN_TL_OFF       48
-#define ITCT_HDR_REJ_OPEN_TL_MSK       (0xffff << \
-                                       ITCT_REJ_OPEN_TL_OFF)
+#define ITCT_HDR_REJ_OPEN_TL_MSK       (0xffffULL << \
+                                       ITCT_HDR_REJ_OPEN_TL_OFF)
 
 /* Err record header */
 #define ERR_HDR_DMA_TX_ERR_TYPE_OFF    0
@@ -533,10 +528,10 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba,
        itct->sas_addr = __swab64(itct->sas_addr);
 
        /* qw2 */
-       itct->qw2 = cpu_to_le64((500 < ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
-                               (0xff00 < ITCT_HDR_BUS_INACTIVE_TL_OFF) |
-                               (0xff00 < ITCT_HDR_MAX_CONN_TL_OFF) |
-                               (0xff00 < ITCT_HDR_REJ_OPEN_TL_OFF));
+       itct->qw2 = cpu_to_le64((500ULL << ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_BUS_INACTIVE_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_MAX_CONN_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_REJ_OPEN_TL_OFF));
 }
 
 static void free_device_v1_hw(struct hisi_hba *hisi_hba,
@@ -544,7 +539,8 @@ static void free_device_v1_hw(struct hisi_hba *hisi_hba,
 {
        u64 dev_id = sas_dev->device_id;
        struct hisi_sas_itct *itct = &hisi_hba->itct[dev_id];
-       u32 qw0, reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
+       u64 qw0;
+       u32 reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
 
        reg_val |= CFG_AGING_TIME_ITCT_REL_MSK;
        hisi_sas_write32(hisi_hba, CFG_AGING_TIME, reg_val);
@@ -1293,13 +1289,10 @@ static int slot_complete_v1_hw(struct hisi_hba *hisi_hba,
                goto out;
        }
 
-       if (cmplt_hdr_data & CMPLT_HDR_ERR_RCRD_XFRD_MSK) {
-               if (!(cmplt_hdr_data & CMPLT_HDR_CMD_CMPLT_MSK) ||
-                   !(cmplt_hdr_data & CMPLT_HDR_RSPNS_XFRD_MSK))
-                       ts->stat = SAS_DATA_OVERRUN;
-               else
-                       slot_err_v1_hw(hisi_hba, task, slot);
+       if (cmplt_hdr_data & CMPLT_HDR_ERR_RCRD_XFRD_MSK &&
+               !(cmplt_hdr_data & CMPLT_HDR_RSPNS_XFRD_MSK)) {
 
+               slot_err_v1_hw(hisi_hba, task, slot);
                goto out;
        }
 
index 4e1a632ccf162ea52365b84e71aab19b44619280..f8b88fa78e6274f8b5956d6ea6a3f057881132f5 100644 (file)
@@ -43,6 +43,7 @@ typedef struct {
        unsigned dp:1;          /* Data phase present           */
        unsigned rd:1;          /* Read data in data phase      */
        unsigned wanted:1;      /* Parport sharing busy flag    */
+       unsigned int dev_no;    /* Device number                */
        wait_queue_head_t *waiting;
        struct Scsi_Host *host;
        struct list_head list;
@@ -1120,15 +1121,40 @@ static struct scsi_host_template imm_template = {
 
 static LIST_HEAD(imm_hosts);
 
+/*
+ * Finds the first available device number that can be alloted to the
+ * new imm device and returns the address of the previous node so that
+ * we can add to the tail and have a list in the ascending order.
+ */
+
+static inline imm_struct *find_parent(void)
+{
+       imm_struct *dev, *par = NULL;
+       unsigned int cnt = 0;
+
+       if (list_empty(&imm_hosts))
+               return NULL;
+
+       list_for_each_entry(dev, &imm_hosts, list) {
+               if (dev->dev_no != cnt)
+                       return par;
+               cnt++;
+               par = dev;
+       }
+
+       return par;
+}
+
 static int __imm_attach(struct parport *pb)
 {
        struct Scsi_Host *host;
-       imm_struct *dev;
+       imm_struct *dev, *temp;
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting);
        DEFINE_WAIT(wait);
        int ports;
        int modes, ppb;
        int err = -ENOMEM;
+       struct pardev_cb imm_cb;
 
        init_waitqueue_head(&waiting);
 
@@ -1141,9 +1167,15 @@ static int __imm_attach(struct parport *pb)
        dev->mode = IMM_AUTODETECT;
        INIT_LIST_HEAD(&dev->list);
 
-       dev->dev = parport_register_device(pb, "imm", NULL, imm_wakeup,
-                                               NULL, 0, dev);
+       temp = find_parent();
+       if (temp)
+               dev->dev_no = temp->dev_no + 1;
+
+       memset(&imm_cb, 0, sizeof(imm_cb));
+       imm_cb.private = dev;
+       imm_cb.wakeup = imm_wakeup;
 
+       dev->dev = parport_register_dev_model(pb, "imm", &imm_cb, dev->dev_no);
        if (!dev->dev)
                goto out;
 
@@ -1207,7 +1239,10 @@ static int __imm_attach(struct parport *pb)
        host->unique_id = pb->number;
        *(imm_struct **)&host->hostdata = dev;
        dev->host = host;
-       list_add_tail(&dev->list, &imm_hosts);
+       if (!temp)
+               list_add_tail(&dev->list, &imm_hosts);
+       else
+               list_add_tail(&dev->list, &temp->list);
        err = scsi_add_host(host, NULL);
        if (err)
                goto out2;
@@ -1245,9 +1280,10 @@ static void imm_detach(struct parport *pb)
 }
 
 static struct parport_driver imm_driver = {
-       .name   = "imm",
-       .attach = imm_attach,
-       .detach = imm_detach,
+       .name           = "imm",
+       .match_port     = imm_attach,
+       .detach         = imm_detach,
+       .devmodel       = true,
 };
 
 static int __init imm_driver_init(void)
index 536cd5a8042245f4ad46929a256098e047d745dd..3b3e0998fa6e72b71f5417c4f942ff9bb67caee0 100644 (file)
@@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = {
        .store = ipr_store_reset_adapter
 };
 
-static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+static int ipr_iopoll(struct irq_poll *iop, int budget);
  /**
  * ipr_show_iopoll_weight - Show ipr polling mode
  * @dev:       class device struct
@@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
        int i;
 
        if (!ioa_cfg->sis64) {
-               dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+               dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
                return -EINVAL;
        }
        if (kstrtoul(buf, 10, &user_iopoll_weight))
                return -EINVAL;
 
        if (user_iopoll_weight > 256) {
-               dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+               dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
                return -EINVAL;
        }
 
        if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
-               dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+               dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
                return strlen(buf);
        }
 
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++)
-                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+                       irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
        }
 
        spin_lock_irqsave(shost->host_lock, lock_flags);
        ioa_cfg->iopoll_weight = user_iopoll_weight;
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++) {
-                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                       irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
                                        ioa_cfg->iopoll_weight, ipr_iopoll);
-                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
                }
        }
        spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -4003,13 +4002,12 @@ static ssize_t ipr_store_update_fw(struct device *dev,
        struct ipr_sglist *sglist;
        char fname[100];
        char *src;
-       int len, result, dnld_size;
+       int result, dnld_size;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
 
-       len = snprintf(fname, 99, "%s", buf);
-       fname[len-1] = '\0';
+       snprintf(fname, sizeof(fname), "%s", buf);
 
        if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
                dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
@@ -5569,7 +5567,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
        return num_hrrq;
 }
 
-static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+static int ipr_iopoll(struct irq_poll *iop, int budget)
 {
        struct ipr_ioa_cfg *ioa_cfg;
        struct ipr_hrr_queue *hrrq;
@@ -5585,7 +5583,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget)
        completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
 
        if (completed_ops < budget)
-               blk_iopoll_complete(iop);
+               irq_poll_complete(iop);
        spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 
        list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
@@ -5693,8 +5691,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
                       hrrq->toggle_bit) {
-                       if (!blk_iopoll_sched_prep(&hrrq->iopoll))
-                               blk_iopoll_sched(&hrrq->iopoll);
+                       irq_poll_sched(&hrrq->iopoll);
                        spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                        return IRQ_HANDLED;
                }
@@ -10405,9 +10402,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++) {
-                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                       irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
                                        ioa_cfg->iopoll_weight, ipr_iopoll);
-                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
                }
        }
 
@@ -10436,7 +10432,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                ioa_cfg->iopoll_weight = 0;
                for (i = 1; i < ioa_cfg->hrrq_num; i++)
-                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+                       irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
        }
 
        while (ioa_cfg->in_reset_reload) {
index a34c7a5a995e4bcbf61b1d859fb784b60a9cdbee..56c57068300a1fb947221f8bc32f467aa115ae80 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/libata.h>
 #include <linux/list.h>
 #include <linux/kref.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 
@@ -517,7 +517,7 @@ struct ipr_hrr_queue {
        u8 allow_cmds:1;
        u8 removing_ioa:1;
 
-       struct blk_iopoll iopoll;
+       struct irq_poll iopoll;
 };
 
 /* Command packet structure */
index d64a769b8155ccda7ab0d0368c9bb84eb717e6a6..bb2381314a2bfe91c1552f4f257dc41919014aac 100644 (file)
@@ -12,7 +12,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #define PSEUDO_DMA
 
 #define NCR5380_implementation_fields   unsigned char *pdma_base
-#define NCR5380_local_declare()         struct Scsi_Host *_instance
-#define NCR5380_setup(instance)         _instance = instance
 
-#define NCR5380_read(reg)               macscsi_read(_instance, reg)
-#define NCR5380_write(reg, value)       macscsi_write(_instance, reg, value)
+#define NCR5380_read(reg)               macscsi_read(instance, reg)
+#define NCR5380_write(reg, value)       macscsi_write(instance, reg, value)
 
 #define NCR5380_pread                   macscsi_pread
 #define NCR5380_pwrite                  macscsi_pwrite
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr                    macscsi_intr
 #define NCR5380_queue_command           macscsi_queue_command
@@ -51,8 +49,6 @@
 
 #include "NCR5380.h"
 
-#define RESET_BOOT
-
 static int setup_can_queue = -1;
 module_param(setup_can_queue, int, 0);
 static int setup_cmd_per_lun = -1;
@@ -65,17 +61,8 @@ static int setup_use_tagged_queuing = -1;
 module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#define TOSHIBA_DELAY
-
-#ifdef TOSHIBA_DELAY
-#define        AFTER_RESET_DELAY       (5*HZ/2)
-#else
-#define        AFTER_RESET_DELAY       (HZ/2)
-#endif
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
 
 /*
  * NCR 5380 register access functions
@@ -94,12 +81,12 @@ static inline void macscsi_write(struct Scsi_Host *instance, int reg, int value)
 #ifndef MODULE
 static int __init mac_scsi_setup(char *str)
 {
-       int ints[7];
+       int ints[8];
 
        (void)get_options(str, ARRAY_SIZE(ints), ints);
 
-       if (ints[0] < 1 || ints[0] > 6) {
-               pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>]]]]]\n");
+       if (ints[0] < 1) {
+               pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>[,<toshiba_delay>]]]]]]\n");
                return 0;
        }
        if (ints[0] >= 1)
@@ -114,50 +101,14 @@ static int __init mac_scsi_setup(char *str)
                setup_use_tagged_queuing = ints[5];
        if (ints[0] >= 6)
                setup_use_pdma = ints[6];
+       if (ints[0] >= 7)
+               setup_toshiba_delay = ints[7];
        return 1;
 }
 
 __setup("mac5380=", mac_scsi_setup);
 #endif /* !MODULE */
 
-#ifdef RESET_BOOT
-/*
- * Our 'bus reset on boot' function
- */
-
-static void mac_scsi_reset_boot(struct Scsi_Host *instance)
-{
-       unsigned long end;
-
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No messing
-        * with the queues, interrupts, or locks necessary here.
-        */
-
-       printk(KERN_INFO "Macintosh SCSI: resetting the SCSI bus..." );
-
-       /* get in phase */
-       NCR5380_write( TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
-       /* assert RST */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay( 50 );
-       /* reset RST and interrupt */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
-       for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
-               barrier();
-
-       printk(KERN_INFO " done\n" );
-}
-#endif
-
 #ifdef PSEUDO_DMA
 /* 
    Pseudo-DMA: (Ove Edlund)
@@ -235,9 +186,6 @@ static int macscsi_pread(struct Scsi_Host *instance,
        unsigned char *d;
        unsigned char *s;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
        s = hostdata->pdma_base + (INPUT_DATA_REG << 4);
        d = dst;
 
@@ -329,9 +277,6 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
        unsigned char *s;
        unsigned char *d;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
        s = src;
        d = hostdata->pdma_base + (OUTPUT_DATA_REG << 4);
 
@@ -364,20 +309,22 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
 #define PFX                     DRV_MODULE_NAME ": "
 
 static struct scsi_host_template mac_scsi_template = {
-       .module                         = THIS_MODULE,
-       .proc_name                      = DRV_MODULE_NAME,
-       .show_info                      = macscsi_show_info,
-       .write_info                     = macscsi_write_info,
-       .name                           = "Macintosh NCR5380 SCSI",
-       .info                           = macscsi_info,
-       .queuecommand                   = macscsi_queue_command,
-       .eh_abort_handler               = macscsi_abort,
-       .eh_bus_reset_handler           = macscsi_bus_reset,
-       .can_queue                      = 16,
-       .this_id                        = 7,
-       .sg_tablesize                   = SG_ALL,
-       .cmd_per_lun                    = 2,
-       .use_clustering                 = DISABLE_CLUSTERING
+       .module                 = THIS_MODULE,
+       .proc_name              = DRV_MODULE_NAME,
+       .show_info              = macscsi_show_info,
+       .write_info             = macscsi_write_info,
+       .name                   = "Macintosh NCR5380 SCSI",
+       .info                   = macscsi_info,
+       .queuecommand           = macscsi_queue_command,
+       .eh_abort_handler       = macscsi_abort,
+       .eh_bus_reset_handler   = macscsi_bus_reset,
+       .can_queue              = 16,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int __init mac_scsi_probe(struct platform_device *pdev)
@@ -432,15 +379,14 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
        } else
                host_flags |= FLAG_NO_PSEUDO_DMA;
 
-#ifdef RESET_BOOT
-       mac_scsi_reset_boot(instance);
-#endif
-
 #ifdef SUPPORT_TAGS
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
+       host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        if (instance->irq != NO_IRQ) {
                error = request_irq(instance->irq, macscsi_intr, IRQF_SHARED,
@@ -449,6 +395,8 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
                        goto fail_irq;
        }
 
+       NCR5380_maybe_reset_bus(instance);
+
        error = scsi_add_host(instance, NULL);
        if (error)
                goto fail_host;
@@ -463,6 +411,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
        return error;
 }
index a70692779a16c770300b410eda56a9f1823e9239..4cf9ed96414f05c881221dabcc18ca5717bd2c11 100644 (file)
@@ -179,8 +179,12 @@ mraid_mm_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 
        /*
         * The following call will block till a kioc is available
+        * or return NULL if the list head is empty for the pointer
+        * of type mraid_mmapt passed to mraid_mm_alloc_kioc
         */
        kioc = mraid_mm_alloc_kioc(adp);
+       if (!kioc)
+               return -ENXIO;
 
        /*
         * User sent the old mimd_t ioctl packet. Convert it to uioc_t.
index e81eadd08afc776d6be23b2e2af8f550841da1b4..512037e27783d6192f1cea65b41eefb5fbaa9db4 100644 (file)
@@ -1,6 +1,4 @@
 #define PSEUDO_DMA
-#define UNSAFE  /* Not unsafe for PAS16 -- use it */
-#define PDEBUG 0
 
 /*
  * This driver adapted from Drew Eckhardt's Trantor T128 driver
  
 #include <linux/module.h>
 
-#include <linux/signal.h>
-#include <linux/proc_fs.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/stat.h>
 #include <linux/init.h>
 
 #include <scsi/scsi_host.h>
@@ -87,8 +81,8 @@
 #include "NCR5380.h"
 
 
-static unsigned short pas16_addr = 0;
-static int pas16_irq = 0;
+static unsigned short pas16_addr;
+static int pas16_irq;
  
 
 static const int scsi_irq_translate[] =
@@ -146,22 +140,6 @@ static const unsigned short  pas16_offset[ 8 ] =
                    * START_DMA_INITIATOR_RECEIVE_REG wo
                    */
     };
-/*----------------------------------------------------------------*/
-/* the following will set the monitor border color (useful to find
- where something crashed or gets stuck at */
-/* 1 = blue
- 2 = green
- 3 = cyan
- 4 = red
- 5 = magenta
- 6 = yellow
- 7 = white
-*/
-#if 1
-#define rtrc(i) {inb(0x3da); outb(0x31, 0x3c0); outb((i), 0x3c0);}
-#else
-#define rtrc(i) {}
-#endif
 
 
 /*
@@ -205,7 +183,7 @@ static void __init
        outb( 0x01, io_port + P_TIMEOUT_STATUS_REG_OFFSET );   /* Reset TC */
        outb( 0x01, io_port + WAIT_STATE );   /* 1 Wait state */
 
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
+       inb(io_port + pas16_offset[RESET_PARITY_INTERRUPT_REG]);
 
        /* Set the SCSI interrupt pointer without mucking up the sound
         * interrupt pointer in the same byte.
@@ -280,13 +258,13 @@ static int __init
      * put in an additional test to try to weed them out.
      */
 
-    outb( 0x01, io_port + WAIT_STATE );        /* 1 Wait state */
-    NCR5380_write( MODE_REG, 0x20 );           /* Is it really SCSI? */
-    if( NCR5380_read( MODE_REG ) != 0x20 )     /* Write to a reg.    */
-       return 0;                               /* and try to read    */
-    NCR5380_write( MODE_REG, 0x00 );           /* it back.           */
-    if( NCR5380_read( MODE_REG ) != 0x00 )
-       return 0;
+       outb(0x01, io_port + WAIT_STATE);             /* 1 Wait state */
+       outb(0x20, io_port + pas16_offset[MODE_REG]); /* Is it really SCSI? */
+       if (inb(io_port + pas16_offset[MODE_REG]) != 0x20) /* Write to a reg. */
+               return 0;                                  /* and try to read */
+       outb(0x00, io_port + pas16_offset[MODE_REG]);      /* it back. */
+       if (inb(io_port + pas16_offset[MODE_REG]) != 0x00)
+               return 0;
 
     return 1;
 }
@@ -305,7 +283,7 @@ static int __init
 
 static int __init pas16_setup(char *str)
 {
-    static int commandline_current = 0;
+       static int commandline_current;
     int i;
     int ints[10];
 
@@ -344,8 +322,8 @@ __setup("pas16=", pas16_setup);
 
 static int __init pas16_detect(struct scsi_host_template *tpnt)
 {
-    static int current_override = 0;
-    static unsigned short current_base = 0;
+       static int current_override;
+       static unsigned short current_base;
     struct Scsi_Host *instance;
     unsigned short io_port;
     int  count;
@@ -377,34 +355,32 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
        }
        else
            for (; !io_port && (current_base < NO_BASES); ++current_base) {
-#if (PDEBUG & PDEBUG_INIT)
-    printk("scsi-pas16 : probing io_port %04x\n", (unsigned int) bases[current_base].io_port);
-#endif
+               dprintk(NDEBUG_INIT, "pas16: probing io_port 0x%04x\n",
+                       (unsigned int)bases[current_base].io_port);
                if ( !bases[current_base].noauto &&
                     pas16_hw_detect( current_base ) ){
                        io_port = bases[current_base].io_port;
                        init_board( io_port, default_irqs[ current_base ], 0 ); 
-#if (PDEBUG & PDEBUG_INIT)
-                       printk("scsi-pas16 : detected board.\n");
-#endif
+                       dprintk(NDEBUG_INIT, "pas16: detected board\n");
                }
     }
 
-
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
-       printk("scsi-pas16 : io_port = %04x\n", (unsigned int) io_port);
-#endif
+       dprintk(NDEBUG_INIT, "pas16: io_port = 0x%04x\n",
+               (unsigned int)io_port);
 
        if (!io_port)
            break;
 
        instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
        if(instance == NULL)
-               break;
+               goto out;
                
        instance->io_port = io_port;
 
-       NCR5380_init(instance, 0);
+       if (NCR5380_init(instance, 0))
+               goto out_unregister;
+
+       NCR5380_maybe_reset_bus(instance);
 
        if (overrides[current_override].irq != IRQ_AUTO)
            instance->irq = overrides[current_override].irq;
@@ -431,14 +407,18 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
            outb( (inb(io_port + IO_CONFIG_3) & 0x0f), io_port + IO_CONFIG_3 );
        }
 
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
-       printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+       dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+               instance->host_no, instance->irq);
 
        ++current_override;
        ++count;
     }
     return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out:
+       return count;
 }
 
 /*
@@ -561,29 +541,29 @@ static int pas16_release(struct Scsi_Host *shost)
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
        return 0;
 }
 
 static struct scsi_host_template driver_template = {
-       .name           = "Pro Audio Spectrum-16 SCSI",
-       .detect         = pas16_detect,
-       .release        = pas16_release,
-       .proc_name      = "pas16",
-       .show_info      = pas16_show_info,
-       .write_info     = pas16_write_info,
-       .info           = pas16_info,
-       .queuecommand   = pas16_queue_command,
-       .eh_abort_handler = pas16_abort,
-       .eh_bus_reset_handler = pas16_bus_reset,
-       .bios_param     = pas16_biosparam, 
-       .can_queue      = CAN_QUEUE,
-       .this_id        = 7,
-       .sg_tablesize   = SG_ALL,
-       .cmd_per_lun    = CMD_PER_LUN,
-       .use_clustering = DISABLE_CLUSTERING,
+       .name                   = "Pro Audio Spectrum-16 SCSI",
+       .detect                 = pas16_detect,
+       .release                = pas16_release,
+       .proc_name              = "pas16",
+       .show_info              = pas16_show_info,
+       .write_info             = pas16_write_info,
+       .info                   = pas16_info,
+       .queuecommand           = pas16_queue_command,
+       .eh_abort_handler       = pas16_abort,
+       .eh_bus_reset_handler   = pas16_bus_reset,
+       .bios_param             = pas16_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
 
index c6109c80050bdf59d2b6933ba43579f8afa896ed..d375277172258391ed84ff1627c060f007e1f188 100644 (file)
@@ -24,9 +24,6 @@
 #ifndef PAS16_H
 #define PAS16_H
 
-#define PDEBUG_INIT    0x1
-#define PDEBUG_TRANSFER 0x2
-
 #define PAS16_DEFAULT_BASE_1  0x388
 #define PAS16_DEFAULT_BASE_2  0x384
 #define PAS16_DEFAULT_BASE_3  0x38c
 #define OPERATION_MODE_1 0xec03
 #define IO_CONFIG_3 0xf002
 
+#define NCR5380_implementation_fields /* none */
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32 
-#endif
-
-#define NCR5380_implementation_fields \
-    volatile unsigned short io_port
-
-#define NCR5380_local_declare() \
-    volatile unsigned short io_port
+#define PAS16_io_port(reg) (instance->io_port + pas16_offset[(reg)])
 
-#define NCR5380_setup(instance) \
-    io_port = (instance)->io_port
-
-#define PAS16_io_port(reg) ( io_port + pas16_offset[(reg)] )
-
-#if !(PDEBUG & PDEBUG_TRANSFER) 
 #define NCR5380_read(reg) ( inb(PAS16_io_port(reg)) )
 #define NCR5380_write(reg, value) ( outb((value),PAS16_io_port(reg)) )
-#else
-#define NCR5380_read(reg)                                              \
-    (((unsigned char) printk("scsi%d : read register %d at io_port %04x\n"\
-    , instance->hostno, (reg), PAS16_io_port(reg))), inb( PAS16_io_port(reg)) )
-
-#define NCR5380_write(reg, value)                                      \
-    (printk("scsi%d : write %02x to register %d at io_port %04x\n",    \
-           instance->hostno, (value), (reg), PAS16_io_port(reg)),      \
-    outb( (value),PAS16_io_port(reg) ) )
-
-#endif
 
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr pas16_intr
-#define do_NCR5380_intr do_pas16_intr
 #define NCR5380_queue_command pas16_queue_command
 #define NCR5380_abort pas16_abort
 #define NCR5380_bus_reset pas16_bus_reset
    
 #define PAS16_IRQS 0xd4a8 
 
-#endif /* ndef ASM */
 #endif /* PAS16_H */
index 52a87657c7dd3e4945aad29e9b3cbf983b94efad..692a7570b5e1475da8371f1d08939b8898912ebe 100644 (file)
@@ -2204,7 +2204,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
        /* Clear outstanding commands array. */
        for (que = 0; que < ha->max_req_queues; que++) {
                req = ha->req_q_map[que];
-               if (!req)
+               if (!req || !test_bit(que, ha->req_qid_map))
                        continue;
                req->out_ptr = (void *)(req->ring + req->length);
                *req->out_ptr = 0;
@@ -2221,7 +2221,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
 
        for (que = 0; que < ha->max_rsp_queues; que++) {
                rsp = ha->rsp_q_map[que];
-               if (!rsp)
+               if (!rsp || !test_bit(que, ha->rsp_qid_map))
                        continue;
                rsp->in_ptr = (void *)(rsp->ring + rsp->length);
                *rsp->in_ptr = 0;
@@ -4981,7 +4981,7 @@ qla25xx_init_queues(struct qla_hw_data *ha)
 
        for (i = 1; i < ha->max_rsp_queues; i++) {
                rsp = ha->rsp_q_map[i];
-               if (rsp) {
+               if (rsp && test_bit(i, ha->rsp_qid_map)) {
                        rsp->options &= ~BIT_0;
                        ret = qla25xx_init_rsp_que(base_vha, rsp);
                        if (ret != QLA_SUCCESS)
@@ -4996,8 +4996,8 @@ qla25xx_init_queues(struct qla_hw_data *ha)
        }
        for (i = 1; i < ha->max_req_queues; i++) {
                req = ha->req_q_map[i];
-               if (req) {
-               /* Clear outstanding commands array. */
+               if (req && test_bit(i, ha->req_qid_map)) {
+                       /* Clear outstanding commands array. */
                        req->options &= ~BIT_0;
                        ret = qla25xx_init_req_que(base_vha, req);
                        if (ret != QLA_SUCCESS)
index d4d65eb0e9b41264eb2d86e4d4af4997e0268c5c..4af95479a9db9325d8bb42fff0dc0d0f5ec6b682 100644 (file)
@@ -3063,9 +3063,9 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
                    "MSI-X: Failed to enable support "
                    "-- %d/%d\n Retry with %d vectors.\n",
                    ha->msix_count, ret, ret);
+               ha->msix_count = ret;
+               ha->max_rsp_queues = ha->msix_count - 1;
        }
-       ha->msix_count = ret;
-       ha->max_rsp_queues = ha->msix_count - 1;
        ha->msix_entries = kzalloc(sizeof(struct qla_msix_entry) *
                                ha->msix_count, GFP_KERNEL);
        if (!ha->msix_entries) {
index c5dd594f6c316185b203f3e25c1595b4c320da81..cf7ba52bae665fa482b8535f92e5fcb1dbe25244 100644 (file)
@@ -600,7 +600,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha)
        /* Delete request queues */
        for (cnt = 1; cnt < ha->max_req_queues; cnt++) {
                req = ha->req_q_map[cnt];
-               if (req) {
+               if (req && test_bit(cnt, ha->req_qid_map)) {
                        ret = qla25xx_delete_req_que(vha, req);
                        if (ret != QLA_SUCCESS) {
                                ql_log(ql_log_warn, vha, 0x00ea,
@@ -614,7 +614,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha)
        /* Delete response queues */
        for (cnt = 1; cnt < ha->max_rsp_queues; cnt++) {
                rsp = ha->rsp_q_map[cnt];
-               if (rsp) {
+               if (rsp && test_bit(cnt, ha->rsp_qid_map)) {
                        ret = qla25xx_delete_rsp_que(vha, rsp);
                        if (ret != QLA_SUCCESS) {
                                ql_log(ql_log_warn, vha, 0x00eb,
index f1788db4319560c8210eda5a0fc4f576d5ec321f..f6c7ce35b542ecadfe0305095e300bfabe95950c 100644 (file)
@@ -409,6 +409,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha)
        int cnt;
 
        for (cnt = 0; cnt < ha->max_req_queues; cnt++) {
+               if (!test_bit(cnt, ha->req_qid_map))
+                       continue;
+
                req = ha->req_q_map[cnt];
                qla2x00_free_req_que(ha, req);
        }
@@ -416,6 +419,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha)
        ha->req_q_map = NULL;
 
        for (cnt = 0; cnt < ha->max_rsp_queues; cnt++) {
+               if (!test_bit(cnt, ha->rsp_qid_map))
+                       continue;
+
                rsp = ha->rsp_q_map[cnt];
                qla2x00_free_rsp_que(ha, rsp);
        }
index 8075a4cdb45cf85aaa09c1e26fef1bdd8dc82b55..ee967becd257e1b5a5724aed241a9537fc0f3bc3 100644 (file)
@@ -105,7 +105,7 @@ static void qlt_response_pkt(struct scsi_qla_host *ha, response_t *pkt);
 static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
        int fn, void *iocb, int flags);
 static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd
-       *cmd, struct atio_from_isp *atio, int ha_locked);
+       *cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort);
 static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha,
        struct qla_tgt_srr_imm *imm, int ha_lock);
 static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha,
@@ -1756,7 +1756,7 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
                qlt_send_notify_ack(vha, &mcmd->orig_iocb.imm_ntfy,
                    0, 0, 0, 0, 0, 0);
        else {
-               if (mcmd->se_cmd.se_tmr_req->function == TMR_ABORT_TASK)
+               if (mcmd->orig_iocb.atio.u.raw.entry_type == ABTS_RECV_24XX)
                        qlt_24xx_send_abts_resp(vha, &mcmd->orig_iocb.abts,
                            mcmd->fc_tm_rsp, false);
                else
@@ -2665,7 +2665,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
                        /* no need to terminate. FW already freed exchange. */
                        qlt_abort_cmd_on_host_reset(cmd->vha, cmd);
                else
-                       qlt_send_term_exchange(vha, cmd, &cmd->atio, 1);
+                       qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
                spin_unlock_irqrestore(&ha->hardware_lock, flags);
                return 0;
        }
@@ -3173,7 +3173,8 @@ static int __qlt_send_term_exchange(struct scsi_qla_host *vha,
 }
 
 static void qlt_send_term_exchange(struct scsi_qla_host *vha,
-       struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked)
+       struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked,
+       int ul_abort)
 {
        unsigned long flags = 0;
        int rc;
@@ -3193,8 +3194,7 @@ static void qlt_send_term_exchange(struct scsi_qla_host *vha,
                qlt_alloc_qfull_cmd(vha, atio, 0, 0);
 
 done:
-       if (cmd && (!cmd->aborted ||
-           !cmd->cmd_sent_to_fw)) {
+       if (cmd && !ul_abort && !cmd->aborted) {
                if (cmd->sg_mapped)
                        qlt_unmap_sg(vha, cmd);
                vha->hw->tgt.tgt_ops->free_cmd(cmd);
@@ -3253,21 +3253,38 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha)
 
 }
 
-void qlt_abort_cmd(struct qla_tgt_cmd *cmd)
+int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
 {
        struct qla_tgt *tgt = cmd->tgt;
        struct scsi_qla_host *vha = tgt->vha;
        struct se_cmd *se_cmd = &cmd->se_cmd;
+       unsigned long flags;
 
        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014,
            "qla_target(%d): terminating exchange for aborted cmd=%p "
            "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd,
            se_cmd->tag);
 
+       spin_lock_irqsave(&cmd->cmd_lock, flags);
+       if (cmd->aborted) {
+               spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+               /*
+                * It's normal to see 2 calls in this path:
+                *  1) XFER Rdy completion + CMD_T_ABORT
+                *  2) TCM TMR - drain_state_list
+                */
+               ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                       "multiple abort. %p transport_state %x, t_state %x,"
+                       " se_cmd_flags %x \n", cmd, cmd->se_cmd.transport_state,
+                       cmd->se_cmd.t_state,cmd->se_cmd.se_cmd_flags);
+               return EIO;
+       }
        cmd->aborted = 1;
        cmd->cmd_flags |= BIT_6;
+       spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
-       qlt_send_term_exchange(vha, cmd, &cmd->atio, 0);
+       qlt_send_term_exchange(vha, cmd, &cmd->atio, 0, 1);
+       return 0;
 }
 EXPORT_SYMBOL(qlt_abort_cmd);
 
@@ -3282,6 +3299,9 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 
        BUG_ON(cmd->cmd_in_wq);
 
+       if (cmd->sg_mapped)
+               qlt_unmap_sg(cmd->vha, cmd);
+
        if (!cmd->q_full)
                qlt_decr_num_pend_cmds(cmd->vha);
 
@@ -3399,7 +3419,7 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio,
                term = 1;
 
        if (term)
-               qlt_send_term_exchange(vha, cmd, &cmd->atio, 1);
+               qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
 
        return term;
 }
@@ -3580,12 +3600,13 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
                case CTIO_PORT_LOGGED_OUT:
                case CTIO_PORT_UNAVAILABLE:
                {
-                       int logged_out = (status & 0xFFFF);
+                       int logged_out =
+                               (status & 0xFFFF) == CTIO_PORT_LOGGED_OUT;
+
                        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf059,
                            "qla_target(%d): CTIO with %s status %x "
                            "received (state %x, se_cmd %p)\n", vha->vp_idx,
-                           (logged_out == CTIO_PORT_LOGGED_OUT) ?
-                           "PORT LOGGED OUT" : "PORT UNAVAILABLE",
+                           logged_out ? "PORT LOGGED OUT" : "PORT UNAVAILABLE",
                            status, cmd->state, se_cmd);
 
                        if (logged_out && cmd->sess) {
@@ -3754,6 +3775,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
                goto out_term;
        }
 
+       spin_lock_init(&cmd->cmd_lock);
        cdb = &atio->u.isp24.fcp_cmnd.cdb[0];
        cmd->se_cmd.tag = atio->u.isp24.exchange_addr;
        cmd->unpacked_lun = scsilun_to_int(
@@ -3796,7 +3818,7 @@ out_term:
         */
        cmd->cmd_flags |= BIT_2;
        spin_lock_irqsave(&ha->hardware_lock, flags);
-       qlt_send_term_exchange(vha, NULL, &cmd->atio, 1);
+       qlt_send_term_exchange(vha, NULL, &cmd->atio, 1, 0);
 
        qlt_decr_num_pend_cmds(vha);
        percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
@@ -3918,7 +3940,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 
 out_term:
        spin_lock_irqsave(&ha->hardware_lock, flags);
-       qlt_send_term_exchange(vha, NULL, &op->atio, 1);
+       qlt_send_term_exchange(vha, NULL, &op->atio, 1, 0);
        spin_unlock_irqrestore(&ha->hardware_lock, flags);
        kfree(op);
 
@@ -3982,7 +4004,8 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 
        cmd->cmd_in_wq = 1;
        cmd->cmd_flags |= BIT_0;
-       cmd->se_cmd.cpuid = -1;
+       cmd->se_cmd.cpuid = ha->msix_count ?
+               ha->tgt.rspq_vector_cpuid : WORK_CPU_UNBOUND;
 
        spin_lock(&vha->cmd_list_lock);
        list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list);
@@ -3990,7 +4013,6 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 
        INIT_WORK(&cmd->work, qlt_do_work);
        if (ha->msix_count) {
-               cmd->se_cmd.cpuid = ha->tgt.rspq_vector_cpuid;
                if (cmd->atio.u.isp24.fcp_cmnd.rddata)
                        queue_work_on(smp_processor_id(), qla_tgt_wq,
                            &cmd->work);
@@ -4771,7 +4793,7 @@ out_reject:
                dump_stack();
        } else {
                cmd->cmd_flags |= BIT_9;
-               qlt_send_term_exchange(vha, cmd, &cmd->atio, 1);
+               qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
        }
        spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
@@ -4950,7 +4972,7 @@ static void qlt_prepare_srr_imm(struct scsi_qla_host *vha,
                                    sctio, sctio->srr_id);
                                list_del(&sctio->srr_list_entry);
                                qlt_send_term_exchange(vha, sctio->cmd,
-                                   &sctio->cmd->atio, 1);
+                                   &sctio->cmd->atio, 1, 0);
                                kfree(sctio);
                        }
                }
@@ -5123,7 +5145,7 @@ static int __qlt_send_busy(struct scsi_qla_host *vha,
            atio->u.isp24.fcp_hdr.s_id);
        spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
        if (!sess) {
-               qlt_send_term_exchange(vha, NULL, atio, 1);
+               qlt_send_term_exchange(vha, NULL, atio, 1, 0);
                return 0;
        }
        /* Sending marker isn't necessary, since we called from ISR */
@@ -5406,7 +5428,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
 #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */
                                qlt_send_busy(vha, atio, SAM_STAT_BUSY);
 #else
-                               qlt_send_term_exchange(vha, NULL, atio, 1);
+                               qlt_send_term_exchange(vha, NULL, atio, 1, 0);
 #endif
 
                                if (!ha_locked)
@@ -5523,7 +5545,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt)
 #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */
                                qlt_send_busy(vha, atio, 0);
 #else
-                               qlt_send_term_exchange(vha, NULL, atio, 1);
+                               qlt_send_term_exchange(vha, NULL, atio, 1, 0);
 #endif
                        } else {
                                if (tgt->tgt_stop) {
@@ -5532,7 +5554,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt)
                                            "command to target, sending TERM "
                                            "EXCHANGE for rsp\n");
                                        qlt_send_term_exchange(vha, NULL,
-                                           atio, 1);
+                                           atio, 1, 0);
                                } else {
                                        ql_dbg(ql_dbg_tgt, vha, 0xe060,
                                            "qla_target(%d): Unable to send "
@@ -5960,7 +5982,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
        return;
 
 out_term:
-       qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 0);
+       qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
        if (sess)
                ha->tgt.tgt_ops->put_sess(sess);
        spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
index 71b2865ba3c8416d6ce583df64125dfb7071ca2b..22a6a767fe07b1eba54e23007606bb3c39226b0f 100644 (file)
@@ -943,6 +943,36 @@ struct qla_tgt_sess {
        qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
 };
 
+typedef enum {
+       /*
+        * BIT_0 - Atio Arrival / schedule to work
+        * BIT_1 - qlt_do_work
+        * BIT_2 - qlt_do work failed
+        * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending
+        * BIT_4 - read respond/tcm_qla2xx_queue_data_in
+        * BIT_5 - status respond / tcm_qla2xx_queue_status
+        * BIT_6 - tcm request to abort/Term exchange.
+        *      pre_xmit_response->qlt_send_term_exchange
+        * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response)
+        * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer)
+        * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange)
+        * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data
+
+        * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd
+        * BIT_13 - Bad completion -
+        *      qlt_ctio_do_completion --> qlt_term_ctio_exchange
+        * BIT_14 - Back end data received/sent.
+        * BIT_15 - SRR prepare ctio
+        * BIT_16 - complete free
+        * BIT_17 - flush - qlt_abort_cmd_on_host_reset
+        * BIT_18 - completion w/abort status
+        * BIT_19 - completion w/unknown status
+        * BIT_20 - tcm_qla2xxx_free_cmd
+        */
+       CMD_FLAG_DATA_WORK = BIT_11,
+       CMD_FLAG_DATA_WORK_FREE = BIT_21,
+} cmd_flags_t;
+
 struct qla_tgt_cmd {
        struct se_cmd se_cmd;
        struct qla_tgt_sess *sess;
@@ -952,6 +982,7 @@ struct qla_tgt_cmd {
        /* Sense buffer that will be mapped into outgoing status */
        unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER];
 
+       spinlock_t cmd_lock;
        /* to save extra sess dereferences */
        unsigned int conf_compl_supported:1;
        unsigned int sg_mapped:1;
@@ -986,30 +1017,8 @@ struct qla_tgt_cmd {
 
        uint64_t jiffies_at_alloc;
        uint64_t jiffies_at_free;
-       /* BIT_0 - Atio Arrival / schedule to work
-        * BIT_1 - qlt_do_work
-        * BIT_2 - qlt_do work failed
-        * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending
-        * BIT_4 - read respond/tcm_qla2xx_queue_data_in
-        * BIT_5 - status respond / tcm_qla2xx_queue_status
-        * BIT_6 - tcm request to abort/Term exchange.
-        *      pre_xmit_response->qlt_send_term_exchange
-        * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response)
-        * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer)
-        * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange)
-        * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data
-        * BIT_11 - Data actually going to TCM : tcm_qla2xx_handle_data_work
-        * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd
-        * BIT_13 - Bad completion -
-        *      qlt_ctio_do_completion --> qlt_term_ctio_exchange
-        * BIT_14 - Back end data received/sent.
-        * BIT_15 - SRR prepare ctio
-        * BIT_16 - complete free
-        * BIT_17 - flush - qlt_abort_cmd_on_host_reset
-        * BIT_18 - completion w/abort status
-        * BIT_19 - completion w/unknown status
-        */
-       uint32_t cmd_flags;
+
+       cmd_flags_t cmd_flags;
 };
 
 struct qla_tgt_sess_work_param {
@@ -1148,7 +1157,7 @@ static inline void sid_to_portid(const uint8_t *s_id, port_id_t *p)
 extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *);
 extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *);
 extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t);
-extern void qlt_abort_cmd(struct qla_tgt_cmd *);
+extern int qlt_abort_cmd(struct qla_tgt_cmd *);
 extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *);
 extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *);
 extern void qlt_free_cmd(struct qla_tgt_cmd *cmd);
index ddbe2e7ac14d095129cc9cbd90ab87cbd414bc36..c3e62252460452aac6507db5ddc7ad576f969ffa 100644 (file)
@@ -395,6 +395,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha,
        if (ent->t263.queue_type == T263_QUEUE_TYPE_REQ) {
                for (i = 0; i < vha->hw->max_req_queues; i++) {
                        struct req_que *req = vha->hw->req_q_map[i];
+
+                       if (!test_bit(i, vha->hw->req_qid_map))
+                               continue;
+
                        if (req || !buf) {
                                length = req ?
                                    req->length : REQUEST_ENTRY_CNT_24XX;
@@ -408,6 +412,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha,
        } else if (ent->t263.queue_type == T263_QUEUE_TYPE_RSP) {
                for (i = 0; i < vha->hw->max_rsp_queues; i++) {
                        struct rsp_que *rsp = vha->hw->rsp_q_map[i];
+
+                       if (!test_bit(i, vha->hw->rsp_qid_map))
+                               continue;
+
                        if (rsp || !buf) {
                                length = rsp ?
                                    rsp->length : RESPONSE_ENTRY_CNT_MQ;
@@ -634,6 +642,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha,
        if (ent->t274.queue_type == T274_QUEUE_TYPE_REQ_SHAD) {
                for (i = 0; i < vha->hw->max_req_queues; i++) {
                        struct req_que *req = vha->hw->req_q_map[i];
+
+                       if (!test_bit(i, vha->hw->req_qid_map))
+                               continue;
+
                        if (req || !buf) {
                                qla27xx_insert16(i, buf, len);
                                qla27xx_insert16(1, buf, len);
@@ -645,6 +657,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha,
        } else if (ent->t274.queue_type == T274_QUEUE_TYPE_RSP_SHAD) {
                for (i = 0; i < vha->hw->max_rsp_queues; i++) {
                        struct rsp_que *rsp = vha->hw->rsp_q_map[i];
+
+                       if (!test_bit(i, vha->hw->rsp_qid_map))
+                               continue;
+
                        if (rsp || !buf) {
                                qla27xx_insert16(i, buf, len);
                                qla27xx_insert16(1, buf, len);
index faf0a126627f65d6cbe275c1ea4b34062a579a7a..1808a01cfb7e79fe9aa34d341bfcd8fc3603c898 100644 (file)
@@ -298,6 +298,10 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
 {
        cmd->vha->tgt_counters.core_qla_free_cmd++;
        cmd->cmd_in_wq = 1;
+
+       BUG_ON(cmd->cmd_flags & BIT_20);
+       cmd->cmd_flags |= BIT_20;
+
        INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free);
        queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
@@ -374,6 +378,20 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
 {
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
+
+       if (cmd->aborted) {
+               /* Cmd can loop during Q-full.  tcm_qla2xxx_aborted_task
+                * can get ahead of this cmd. tcm_qla2xxx_aborted_task
+                * already kick start the free.
+                */
+               pr_debug("write_pending aborted cmd[%p] refcount %d "
+                       "transport_state %x, t_state %x, se_cmd_flags %x\n",
+                       cmd,cmd->se_cmd.cmd_kref.refcount.counter,
+                       cmd->se_cmd.transport_state,
+                       cmd->se_cmd.t_state,
+                       cmd->se_cmd.se_cmd_flags);
+               return 0;
+       }
        cmd->cmd_flags |= BIT_3;
        cmd->bufflen = se_cmd->data_length;
        cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
@@ -405,7 +423,7 @@ static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd)
            se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) {
                spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
                wait_for_completion_timeout(&se_cmd->t_transport_stop_comp,
-                                           3 * HZ);
+                                               50);
                return 0;
        }
        spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
@@ -444,6 +462,9 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
        if (bidi)
                flags |= TARGET_SCF_BIDI_OP;
 
+       if (se_cmd->cpuid != WORK_CPU_UNBOUND)
+               flags |= TARGET_SCF_USE_CPUID;
+
        sess = cmd->sess;
        if (!sess) {
                pr_err("Unable to locate struct qla_tgt_sess from qla_tgt_cmd\n");
@@ -465,13 +486,25 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 {
        struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
+       unsigned long flags;
 
        /*
         * Ensure that the complete FCP WRITE payload has been received.
         * Otherwise return an exception via CHECK_CONDITION status.
         */
        cmd->cmd_in_wq = 0;
-       cmd->cmd_flags |= BIT_11;
+
+       spin_lock_irqsave(&cmd->cmd_lock, flags);
+       cmd->cmd_flags |= CMD_FLAG_DATA_WORK;
+       if (cmd->aborted) {
+               cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+               spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+
+               tcm_qla2xxx_free_cmd(cmd);
+               return;
+       }
+       spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+
        cmd->vha->tgt_counters.qla_core_ret_ctio++;
        if (!cmd->write_data_transferred) {
                /*
@@ -546,6 +579,20 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
 
+       if (cmd->aborted) {
+               /* Cmd can loop during Q-full.  tcm_qla2xxx_aborted_task
+                * can get ahead of this cmd. tcm_qla2xxx_aborted_task
+                * already kick start the free.
+                */
+               pr_debug("queue_data_in aborted cmd[%p] refcount %d "
+                       "transport_state %x, t_state %x, se_cmd_flags %x\n",
+                       cmd,cmd->se_cmd.cmd_kref.refcount.counter,
+                       cmd->se_cmd.transport_state,
+                       cmd->se_cmd.t_state,
+                       cmd->se_cmd.se_cmd_flags);
+               return 0;
+       }
+
        cmd->cmd_flags |= BIT_4;
        cmd->bufflen = se_cmd->data_length;
        cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
@@ -637,11 +684,34 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd)
        qlt_xmit_tm_rsp(mcmd);
 }
 
+
+#define DATA_WORK_NOT_FREE(_flags) \
+       (( _flags & (CMD_FLAG_DATA_WORK|CMD_FLAG_DATA_WORK_FREE)) == \
+        CMD_FLAG_DATA_WORK)
 static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 {
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
-       qlt_abort_cmd(cmd);
+       unsigned long flags;
+
+       if (qlt_abort_cmd(cmd))
+               return;
+
+       spin_lock_irqsave(&cmd->cmd_lock, flags);
+       if ((cmd->state == QLA_TGT_STATE_NEW)||
+               ((cmd->state == QLA_TGT_STATE_DATA_IN) &&
+                DATA_WORK_NOT_FREE(cmd->cmd_flags)) ) {
+
+               cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+               spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+               /* Cmd have not reached firmware.
+                * Use this trigger to free it. */
+               tcm_qla2xxx_free_cmd(cmd);
+               return;
+       }
+       spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+       return;
+
 }
 
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,
index 2c1160c7ec92fdd48c078c4862a2dac7904edc31..bbfbfd9e5aa391ff65d5e31859f31765bb5b77c3 100644 (file)
@@ -205,6 +205,8 @@ static struct {
        {"Intel", "Multi-Flex", NULL, BLIST_NO_RSOC},
        {"iRiver", "iFP Mass Driver", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
        {"LASOUND", "CDX7405", "3.10", BLIST_MAX5LUN | BLIST_SINGLELUN},
+       {"Marvell", "Console", NULL, BLIST_SKIP_VPD_PAGES},
+       {"Marvell", "91xx Config", "1.01", BLIST_SKIP_VPD_PAGES},
        {"MATSHITA", "PD-1", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
        {"MATSHITA", "DMC-LC5", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
        {"MATSHITA", "DMC-LC40", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
@@ -227,6 +229,7 @@ static struct {
        {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
        {"Promise", "", NULL, BLIST_SPARSELUN},
        {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
+       {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
        {"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
        {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN},
        {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN},
index 4f18a851e2c7f7de25e590e9a6f93750150e5f37..00bc7218a7f80f06a139a7743b5a2c51331f68f0 100644 (file)
@@ -1272,16 +1272,18 @@ static void __scsi_remove_target(struct scsi_target *starget)
 void scsi_remove_target(struct device *dev)
 {
        struct Scsi_Host *shost = dev_to_shost(dev->parent);
-       struct scsi_target *starget;
+       struct scsi_target *starget, *last_target = NULL;
        unsigned long flags;
 
 restart:
        spin_lock_irqsave(shost->host_lock, flags);
        list_for_each_entry(starget, &shost->__targets, siblings) {
-               if (starget->state == STARGET_DEL)
+               if (starget->state == STARGET_DEL ||
+                   starget == last_target)
                        continue;
                if (starget->dev.parent == dev || &starget->dev == dev) {
                        kref_get(&starget->reap_ref);
+                       last_target = starget;
                        spin_unlock_irqrestore(shost->host_lock, flags);
                        __scsi_remove_target(starget);
                        scsi_target_reap(starget);
index 4e08d1cd704d1c261c82a98067649154f433ecd1..d749da765df141d7aa66ef2a31e591f6cbc94fed 100644 (file)
@@ -761,7 +761,7 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
                break;
 
        default:
-               ret = BLKPREP_KILL;
+               ret = BLKPREP_INVALID;
                goto out;
        }
 
@@ -839,7 +839,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
        int ret;
 
        if (sdkp->device->no_write_same)
-               return BLKPREP_KILL;
+               return BLKPREP_INVALID;
 
        BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
 
@@ -2893,7 +2893,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
            sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
            sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
                rw_max = q->limits.io_opt =
-                       logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+                       sdkp->opt_xfer_blocks * sdp->sector_size;
        else
                rw_max = BLK_DEF_MAX_SECTORS;
 
@@ -3268,8 +3268,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        int ret = 0;
 
-       if (!sdkp)
-               return 0;       /* this can happen */
+       if (!sdkp)      /* E.g.: runtime suspend following sd_remove() */
+               return 0;
 
        if (sdkp->WCE && sdkp->media_present) {
                sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@ static int sd_resume(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+
        if (!sdkp->device->manage_start_stop)
                return 0;
 
index 503ab8b46c0b4e8a73d7e826ba1aaf3be1abd6b0..5e820674432ca38c46ad154b831c8eb19e974d34 100644 (file)
@@ -1261,7 +1261,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
        }
 
        sfp->mmap_called = 1;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+       vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
        vma->vm_private_data = sfp;
        vma->vm_ops = &sg_mmap_vm_ops;
        return 0;
index 8bd54a64efd6a52aef163f0cbe4f34f5ef9ac71c..64c867405ad4ff2d5e53972a8ba34f785cc81962 100644 (file)
@@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct device *dev)
 {
        struct scsi_cd *cd = dev_get_drvdata(dev);
 
+       if (!cd)        /* E.g.: runtime suspend following sr_remove() */
+               return 0;
+
        if (cd->media_present)
                return -EBUSY;
        else
@@ -985,6 +988,7 @@ static int sr_remove(struct device *dev)
        scsi_autopm_get_device(cd->device);
 
        del_gendisk(cd->disk);
+       dev_set_drvdata(dev, NULL);
 
        mutex_lock(&sr_ref_mutex);
        kref_put(&cd->kref, sr_kref_release);
index 41c115c230d9bf66da94693597967d037fdefaf6..292c04eec9ad4b56518afc243d92b37602a3f456 100644 (file)
@@ -42,6 +42,7 @@
 #include <scsi/scsi_devinfo.h>
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/scsi_transport.h>
 
 /*
  * All wire protocol details (storage protocol between the guest and the host)
@@ -390,7 +391,7 @@ module_param(storvsc_ringbuffer_size, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
 
 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
-MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
@@ -477,19 +478,18 @@ struct hv_host_device {
 struct storvsc_scan_work {
        struct work_struct work;
        struct Scsi_Host *host;
-       uint lun;
+       u8 lun;
+       u8 tgt_id;
 };
 
 static void storvsc_device_scan(struct work_struct *work)
 {
        struct storvsc_scan_work *wrk;
-       uint lun;
        struct scsi_device *sdev;
 
        wrk = container_of(work, struct storvsc_scan_work, work);
-       lun = wrk->lun;
 
-       sdev = scsi_device_lookup(wrk->host, 0, 0, lun);
+       sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
        if (!sdev)
                goto done;
        scsi_rescan_device(&sdev->sdev_gendev);
@@ -540,7 +540,7 @@ static void storvsc_remove_lun(struct work_struct *work)
        if (!scsi_host_get(wrk->host))
                goto done;
 
-       sdev = scsi_device_lookup(wrk->host, 0, 0, wrk->lun);
+       sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
 
        if (sdev) {
                scsi_remove_device(sdev);
@@ -940,6 +940,7 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
 
        wrk->host = host;
        wrk->lun = vm_srb->lun;
+       wrk->tgt_id = vm_srb->target_id;
        INIT_WORK(&wrk->work, process_err_fn);
        schedule_work(&wrk->work);
 }
@@ -1770,6 +1771,11 @@ static int __init storvsc_drv_init(void)
        fc_transport_template = fc_attach_transport(&fc_transport_functions);
        if (!fc_transport_template)
                return -ENODEV;
+
+       /*
+        * Install Hyper-V specific timeout handler.
+        */
+       fc_transport_template->eh_timed_out = storvsc_eh_timed_out;
 #endif
 
        ret = vmbus_driver_register(&storvsc_drv);
index 22a42836d193a3723c4cb1193a0c72aeb9d99ffe..b9de487bbd311d4aeec4e215cf698c959f4b1db6 100644 (file)
 #define NCR5380_queue_command           sun3scsi_queue_command
 #define NCR5380_bus_reset               sun3scsi_bus_reset
 #define NCR5380_abort                   sun3scsi_abort
-#define NCR5380_show_info               sun3scsi_show_info
 #define NCR5380_info                    sun3scsi_info
 
 #define NCR5380_dma_read_setup(instance, data, count) \
-        sun3scsi_dma_setup(data, count, 0)
+        sun3scsi_dma_setup(instance, data, count, 0)
 #define NCR5380_dma_write_setup(instance, data, count) \
-        sun3scsi_dma_setup(data, count, 1)
+        sun3scsi_dma_setup(instance, data, count, 1)
 #define NCR5380_dma_residual(instance) \
         sun3scsi_dma_residual(instance)
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
@@ -86,10 +85,6 @@ module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
 
-/* #define RESET_BOOT */
-
-#define        AFTER_RESET_DELAY       (HZ/2)
-
 /* ms to wait after hitting dma regs */
 #define SUN3_DMA_DELAY 10
 
@@ -100,11 +95,10 @@ static struct scsi_cmnd *sun3_dma_setup_done;
 static unsigned char *sun3_scsi_regp;
 static volatile struct sun3_dma_regs *dregs;
 static struct sun3_udc_regs *udc_regs;
-static unsigned char *sun3_dma_orig_addr = NULL;
-static unsigned long sun3_dma_orig_count = 0;
-static int sun3_dma_active = 0;
-static unsigned long last_residual = 0;
-static struct Scsi_Host *default_instance;
+static unsigned char *sun3_dma_orig_addr;
+static unsigned long sun3_dma_orig_count;
+static int sun3_dma_active;
+static unsigned long last_residual;
 
 /*
  * NCR 5380 register access functions
@@ -144,50 +138,12 @@ static inline void sun3_udc_write(unsigned short val, unsigned char reg)
 }
 #endif
 
-#ifdef RESET_BOOT
-static void sun3_scsi_reset_boot(struct Scsi_Host *instance)
-{
-       unsigned long end;
-       
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No
-        * messing with the queues, interrupts, or locks necessary here.
-        */
-
-       printk( "Sun3 SCSI: resetting the SCSI bus..." );
-
-       /* switch off SCSI IRQ - catch an interrupt without IRQ bit set else */
-//             sun3_disable_irq( IRQ_SUN3_SCSI );
-
-       /* get in phase */
-       NCR5380_write( TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
-       /* assert RST */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay( 50 );
-
-       /* reset RST and interrupt */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
-       for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
-               barrier();
-
-       /* switch on SCSI IRQ again */
-//             sun3_enable_irq( IRQ_SUN3_SCSI );
-
-       printk( " done\n" );
-}
-#endif
-
 // safe bits for the CSR
 #define CSR_GOOD 0x060f
 
-static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
+static irqreturn_t scsi_sun3_intr(int irq, void *dev)
 {
+       struct Scsi_Host *instance = dev;
        unsigned short csr = dregs->csr;
        int handled = 0;
 
@@ -196,46 +152,24 @@ static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
 #endif
 
        if(csr & ~CSR_GOOD) {
-               if(csr & CSR_DMA_BUSERR) {
-                       printk("scsi%d: bus error in dma\n", default_instance->host_no);
-               }
-
-               if(csr & CSR_DMA_CONFLICT) {
-                       printk("scsi%d: dma conflict\n", default_instance->host_no);
-               }
+               if (csr & CSR_DMA_BUSERR)
+                       shost_printk(KERN_ERR, instance, "bus error in DMA\n");
+               if (csr & CSR_DMA_CONFLICT)
+                       shost_printk(KERN_ERR, instance, "DMA conflict\n");
                handled = 1;
        }
 
        if(csr & (CSR_SDB_INT | CSR_DMA_INT)) {
-               NCR5380_intr(irq, dummy);
+               NCR5380_intr(irq, dev);
                handled = 1;
        }
 
        return IRQ_RETVAL(handled);
 }
 
-/*
- * Debug stuff - to be called on NMI, or sysrq key. Use at your own risk; 
- * reentering NCR5380_print_status seems to have ugly side effects
- */
-
-/* this doesn't seem to get used at all -- sam */
-#if 0
-void sun3_sun3_debug (void)
-{
-       unsigned long flags;
-
-       if (default_instance) {
-                       local_irq_save(flags);
-                       NCR5380_print_status(default_instance);
-                       local_irq_restore(flags);
-       }
-}
-#endif
-
-
 /* sun3scsi_dma_setup() -- initialize the dma controller for a read/write */
-static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int write_flag)
+static unsigned long sun3scsi_dma_setup(struct Scsi_Host *instance,
+                                void *data, unsigned long count, int write_flag)
 {
        void *addr;
 
@@ -287,10 +221,9 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
        dregs->csr |= CSR_FIFO;
        
        if(dregs->fifo_count != count) { 
-               printk("scsi%d: fifo_mismatch %04x not %04x\n",
-                      default_instance->host_no, dregs->fifo_count,
-                      (unsigned int) count);
-               NCR5380_dprint(NDEBUG_DMA, default_instance);
+               shost_printk(KERN_ERR, instance, "FIFO mismatch %04x not %04x\n",
+                            dregs->fifo_count, (unsigned int) count);
+               NCR5380_dprint(NDEBUG_DMA, instance);
        }
 
        /* setup udc */
@@ -325,21 +258,6 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
 
 }
 
-#ifndef SUN3_SCSI_VME
-static inline unsigned long sun3scsi_dma_count(struct Scsi_Host *instance)
-{
-       unsigned short resid;
-
-       dregs->udc_addr = 0x32; 
-       udelay(SUN3_DMA_DELAY);
-       resid = dregs->udc_data;
-       udelay(SUN3_DMA_DELAY);
-       resid *= 2;
-
-       return (unsigned long) resid;
-}
-#endif
-
 static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 {
        return last_residual;
@@ -437,7 +355,10 @@ static int sun3scsi_dma_finish(int write_flag)
                }
        }
 
-       count = sun3scsi_dma_count(default_instance);
+       dregs->udc_addr = 0x32;
+       udelay(SUN3_DMA_DELAY);
+       count = 2 * dregs->udc_data;
+       udelay(SUN3_DMA_DELAY);
 
        fifo = dregs->fifo_count;
        last_residual = fifo;
@@ -502,17 +423,17 @@ static int sun3scsi_dma_finish(int write_flag)
 static struct scsi_host_template sun3_scsi_template = {
        .module                 = THIS_MODULE,
        .proc_name              = DRV_MODULE_NAME,
-       .show_info              = sun3scsi_show_info,
        .name                   = SUN3_SCSI_NAME,
        .info                   = sun3scsi_info,
        .queuecommand           = sun3scsi_queue_command,
-       .eh_abort_handler       = sun3scsi_abort,
-       .eh_bus_reset_handler   = sun3scsi_bus_reset,
+       .eh_abort_handler       = sun3scsi_abort,
+       .eh_bus_reset_handler   = sun3scsi_bus_reset,
        .can_queue              = 16,
        .this_id                = 7,
        .sg_tablesize           = SG_NONE,
        .cmd_per_lun            = 2,
-       .use_clustering         = DISABLE_CLUSTERING
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
 };
 
 static int __init sun3_scsi_probe(struct platform_device *pdev)
@@ -591,7 +512,6 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
                error = -ENOMEM;
                goto fail_alloc;
        }
-       default_instance = instance;
 
        instance->io_port = (unsigned long)ioaddr;
        instance->irq = irq->start;
@@ -600,7 +520,9 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        error = request_irq(instance->irq, scsi_sun3_intr, 0,
                            "NCR5380", instance);
@@ -631,9 +553,7 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
        dregs->ivect = VME_DATA24 | (instance->irq & 0xff);
 #endif
 
-#ifdef RESET_BOOT
-       sun3_scsi_reset_boot(instance);
-#endif
+       NCR5380_maybe_reset_bus(instance);
 
        error = scsi_add_host(instance, NULL);
        if (error)
@@ -649,6 +569,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
 fail_alloc:
        if (udc_regs)
index 87828acbf7c63601d618909a59028a3b4ffa6372..4615fda60dbdf6477ff28566740f59f6f51975ea 100644 (file)
  * 15 9-11
  */
  
-#include <linux/signal.h>
 #include <linux/io.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
-#include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/delay.h>
 
 #include <scsi/scsi_host.h>
 #include "t128.h"
@@ -126,7 +123,7 @@ static struct signature {
 
 static int __init t128_setup(char *str)
 {
-    static int commandline_current = 0;
+       static int commandline_current;
     int i;
     int ints[10];
 
@@ -165,7 +162,7 @@ __setup("t128=", t128_setup);
 
 static int __init t128_detect(struct scsi_host_template *tpnt)
 {
-    static int current_override = 0, current_base = 0;
+       static int current_override, current_base;
     struct Scsi_Host *instance;
     unsigned long base;
     void __iomem *p;
@@ -182,9 +179,8 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
                base = 0;
        } else 
            for (; !base && (current_base < NO_BASES); ++current_base) {
-#if (TDEBUG & TDEBUG_INIT)
-    printk("scsi-t128 : probing address %08x\n", bases[current_base].address);
-#endif
+               dprintk(NDEBUG_INIT, "t128: probing address 0x%08x\n",
+                       bases[current_base].address);
                if (bases[current_base].noauto)
                        continue;
                p = ioremap(bases[current_base].address, 0x2000);
@@ -195,17 +191,13 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
                                        signatures[sig].string,
                                        strlen(signatures[sig].string))) {
                        base = bases[current_base].address;
-#if (TDEBUG & TDEBUG_INIT)
-                       printk("scsi-t128 : detected board.\n");
-#endif
+                       dprintk(NDEBUG_INIT, "t128: detected board\n");
                        goto found;
                    }
                iounmap(p);
            }
 
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
-       printk("scsi-t128 : base = %08x\n", (unsigned int) base);
-#endif
+       dprintk(NDEBUG_INIT, "t128: base = 0x%08x\n", (unsigned int)base);
 
        if (!base)
            break;
@@ -213,12 +205,15 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
 found:
        instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
        if(instance == NULL)
-               break;
-               
+               goto out_unmap;
+
        instance->base = base;
        ((struct NCR5380_hostdata *)instance->hostdata)->base = p;
 
-       NCR5380_init(instance, 0);
+       if (NCR5380_init(instance, 0))
+               goto out_unregister;
+
+       NCR5380_maybe_reset_bus(instance);
 
        if (overrides[current_override].irq != IRQ_AUTO)
            instance->irq = overrides[current_override].irq;
@@ -242,27 +237,30 @@ found:
            printk("scsi%d : please jumper the board for a free IRQ.\n", instance->host_no);
        }
 
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
-       printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+       dprintk(NDEBUG_INIT, "scsi%d: irq = %d\n",
+               instance->host_no, instance->irq);
 
        ++current_override;
        ++count;
     }
     return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_unmap:
+       iounmap(p);
+       return count;
 }
 
 static int t128_release(struct Scsi_Host *shost)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(shost);
+       struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
-       iounmap(base);
+       iounmap(hostdata->base);
        return 0;
 }
 
@@ -308,14 +306,14 @@ static int t128_biosparam(struct scsi_device *sdev, struct block_device *bdev,
  *     timeout.
  */
 
-static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
-    int len) {
-    NCR5380_local_declare();
-    void __iomem *reg;
+static inline int
+NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       void __iomem *reg, *base = hostdata->base;
     unsigned char *d = dst;
     register int i = len;
 
-    NCR5380_setup(instance);
     reg = base + T_DATA_REG_OFFSET;
 
 #if 0
@@ -354,14 +352,14 @@ static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
  *     timeout.
  */
 
-static inline int NCR5380_pwrite (struct Scsi_Host *instance, unsigned char *src,
-    int len) {
-    NCR5380_local_declare();
-    void __iomem *reg;
+static inline int
+NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       void __iomem *reg, *base = hostdata->base;
     unsigned char *s = src;
     register int i = len;
 
-    NCR5380_setup(instance);
     reg = base + T_DATA_REG_OFFSET;
 
 #if 0
@@ -392,21 +390,23 @@ MODULE_LICENSE("GPL");
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
-       .name           = "Trantor T128/T128F/T228",
-       .detect         = t128_detect,
-       .release        = t128_release,
-       .proc_name      = "t128",
-       .show_info      = t128_show_info,
-       .write_info     = t128_write_info,
-       .info           = t128_info,
-       .queuecommand   = t128_queue_command,
-       .eh_abort_handler = t128_abort,
-       .eh_bus_reset_handler    = t128_bus_reset,
-       .bios_param     = t128_biosparam,
-       .can_queue      = CAN_QUEUE,
-        .this_id        = 7,
-       .sg_tablesize   = SG_ALL,
-       .cmd_per_lun    = CMD_PER_LUN,
-       .use_clustering = DISABLE_CLUSTERING,
+       .name                   = "Trantor T128/T128F/T228",
+       .detect                 = t128_detect,
+       .release                = t128_release,
+       .proc_name              = "t128",
+       .show_info              = t128_show_info,
+       .write_info             = t128_write_info,
+       .info                   = t128_info,
+       .queuecommand           = t128_queue_command,
+       .eh_abort_handler       = t128_abort,
+       .eh_bus_reset_handler   = t128_bus_reset,
+       .bios_param             = t128_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
index 2c7371454dfd40cf5d398f64c9072d5f11f9e03d..dd16d85497e168680a5d3467f24baf0cbfe1e05a 100644 (file)
 #ifndef T128_H
 #define T128_H
 
-#define TDEBUG         0
-#define TDEBUG_INIT    0x1
-#define TDEBUG_TRANSFER 0x2
-
 /*
  * The trantor boards are memory mapped. They use an NCR5380 or
  * equivalent (my sample board had part second sourced from ZILOG).
 
 #define T_DATA_REG_OFFSET      0x1e00  /* rw 512 bytes long */
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
 #define NCR5380_implementation_fields \
     void __iomem *base
 
-#define NCR5380_local_declare() \
-    void __iomem *base
-
-#define NCR5380_setup(instance) \
-    base = ((struct NCR5380_hostdata *)(instance->hostdata))->base
+#define T128_address(reg) \
+       (((struct NCR5380_hostdata *)shost_priv(instance))->base + T_5380_OFFSET + ((reg) * 0x20))
 
-#define T128_address(reg) (base + T_5380_OFFSET + ((reg) * 0x20))
-
-#if !(TDEBUG & TDEBUG_TRANSFER)
 #define NCR5380_read(reg) readb(T128_address(reg))
 #define NCR5380_write(reg, value) writeb((value),(T128_address(reg)))
-#else
-#define NCR5380_read(reg)                                              \
-    (((unsigned char) printk("scsi%d : read register %d at address %08x\n"\
-    , instance->hostno, (reg), T128_address(reg))), readb(T128_address(reg)))
-
-#define NCR5380_write(reg, value) {                                    \
-    printk("scsi%d : write %02x to register %d at address %08x\n",     \
-           instance->hostno, (value), (reg), T128_address(reg));       \
-    writeb((value), (T128_address(reg)));                              \
-}
-#endif
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr t128_intr
-#define do_NCR5380_intr do_t128_intr
 #define NCR5380_queue_command t128_queue_command
 #define NCR5380_abort t128_abort
 #define NCR5380_bus_reset t128_bus_reset
 
 #define T128_IRQS 0xc4a8
 
-#endif /* ndef ASM */
 #endif /* T128_H */
index 91a003011acfacb277e892b47c99771ceae80ffa..a9bac3bf20de14e541a22608fcb1542cad0f6d5f 100644 (file)
@@ -34,7 +34,7 @@ static struct pm_clk_notifier_block platform_bus_notifier = {
 
 static int __init sh_pm_runtime_init(void)
 {
-       if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
+       if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
                if (!of_find_compatible_node(NULL, NULL,
                                             "renesas,cpg-mstp-clocks"))
                        return 0;
index fb2b3935b658f5bae0286b8153bafe7753ef6c8c..88260205a2614c84e8293bf82ca47c49abb2e29d 100644 (file)
@@ -7,6 +7,7 @@ source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
 source "drivers/soc/sunxi/Kconfig"
+source "drivers/soc/tegra/Kconfig"
 source "drivers/soc/ti/Kconfig"
 source "drivers/soc/versatile/Kconfig"
 
index 0ad66fa9bb1aa47c97a7a5c1cae80f7bbc945473..5548a31e1a39a100142b45841cbe38e1aa007e38 100644 (file)
@@ -288,7 +288,7 @@ static struct spm_driver_data *spm_get_drv(struct platform_device *pdev,
        struct spm_driver_data *drv = NULL;
        struct device_node *cpu_node, *saw_node;
        int cpu;
-       bool found;
+       bool found = 0;
 
        for_each_possible_cpu(cpu) {
                cpu_node = of_cpu_device_node_get(cpu);
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
new file mode 100644 (file)
index 0000000..d0c3c3e
--- /dev/null
@@ -0,0 +1,83 @@
+if ARCH_TEGRA
+
+# 32-bit ARM SoCs
+if ARM
+
+config ARCH_TEGRA_2x_SOC
+       bool "Enable support for Tegra20 family"
+       select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
+       select ARM_ERRATA_720789
+       select ARM_ERRATA_754327 if SMP
+       select ARM_ERRATA_764369 if SMP
+       select PINCTRL_TEGRA20
+       select PL310_ERRATA_727915 if CACHE_L2X0
+       select PL310_ERRATA_769419 if CACHE_L2X0
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra AP20 and T20 processors, based on the
+         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_3x_SOC
+       bool "Enable support for Tegra30 family"
+       select ARM_ERRATA_754322
+       select ARM_ERRATA_764369 if SMP
+       select PINCTRL_TEGRA30
+       select PL310_ERRATA_769419 if CACHE_L2X0
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T30 processor family, based on the
+         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_114_SOC
+       bool "Enable support for Tegra114 family"
+       select ARM_ERRATA_798181 if SMP
+       select ARM_L1_CACHE_SHIFT_6
+       select HAVE_ARM_ARCH_TIMER
+       select PINCTRL_TEGRA114
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T114 processor family, based on the
+         ARM CortexA15MP CPU
+
+config ARCH_TEGRA_124_SOC
+       bool "Enable support for Tegra124 family"
+       select ARM_L1_CACHE_SHIFT_6
+       select HAVE_ARM_ARCH_TIMER
+       select PINCTRL_TEGRA124
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T124 processor family, based on the
+         ARM CortexA15MP CPU
+
+endif
+
+# 64-bit ARM SoCs
+if ARM64
+
+config ARCH_TEGRA_132_SOC
+       bool "NVIDIA Tegra132 SoC"
+       select PINCTRL_TEGRA124
+       help
+         Enable support for NVIDIA Tegra132 SoC, based on the Denver
+         ARMv8 CPU.  The Tegra132 SoC is similar to the Tegra124 SoC,
+         but contains an NVIDIA Denver CPU complex in place of
+         Tegra124's "4+1" Cortex-A15 CPU complex.
+
+config ARCH_TEGRA_210_SOC
+       bool "NVIDIA Tegra210 SoC"
+       select PINCTRL_TEGRA210
+       help
+         Enable support for the NVIDIA Tegra210 SoC. Also known as Tegra X1,
+         the Tegra210 has four Cortex-A57 cores paired with four Cortex-A53
+         cores in a switched configuration. It features a GPU of the Maxwell
+         architecture with support for DX11, SM4, OpenGL 4.5, OpenGL ES 3.1
+         and providing 256 CUDA cores. It supports hardware-accelerated en-
+         and decoding of various video standards including H.265, H.264 and
+         VP8 at 4K resolution and up to 60 fps.
+
+         Besides the multimedia features it also comes with a variety of I/O
+         controllers, such as GPIO, I2C, SPI, SDHCI, PCIe, SATA and XHCI, to
+         name only a few.
+
+endif
+endif
index aebad36391c93b36658c52222e761366c111216f..8feac599e9ab4bfe886b8da89960f04ed32c1aba 100644 (file)
@@ -1571,6 +1571,7 @@ static int atmel_spi_probe(struct platform_device *pdev)
 
        as->use_cs_gpios = true;
        if (atmel_spi_is_v2(as) &&
+           pdev->dev.of_node &&
            !of_get_property(pdev->dev.of_node, "cs-gpios", NULL)) {
                as->use_cs_gpios = false;
                master->num_chipselect = 4;
index 7de6f8472a8100656884647ad5447be94604e7bb..ecc73c0a97cf5ebf0d2a6c623cf3ce1ac311e62b 100644 (file)
@@ -73,8 +73,8 @@
 
 /* Bitfields in CNTL1 */
 #define BCM2835_AUX_SPI_CNTL1_CSHIGH   0x00000700
-#define BCM2835_AUX_SPI_CNTL1_IDLE     0x00000080
-#define BCM2835_AUX_SPI_CNTL1_TXEMPTY  0x00000040
+#define BCM2835_AUX_SPI_CNTL1_TXEMPTY  0x00000080
+#define BCM2835_AUX_SPI_CNTL1_IDLE     0x00000040
 #define BCM2835_AUX_SPI_CNTL1_MSBF_IN  0x00000002
 #define BCM2835_AUX_SPI_CNTL1_KEEP_IN  0x00000001
 
index 7fd6a4c009d25d8ebfd3f739bd72220f1701320a..7cb0c1921495959dcb6c919c3cb9f1811a5e90bb 100644 (file)
@@ -84,7 +84,7 @@ struct fsl_espi_transfer {
 /* SPCOM register values */
 #define SPCOM_CS(x)            ((x) << 30)
 #define SPCOM_TRANLEN(x)       ((x) << 0)
-#define        SPCOM_TRANLEN_MAX       0xFFFF  /* Max transaction length */
+#define        SPCOM_TRANLEN_MAX       0x10000 /* Max transaction length */
 
 #define AUTOSUSPEND_TIMEOUT 2000
 
@@ -233,7 +233,7 @@ static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
        reinit_completion(&mpc8xxx_spi->done);
 
        /* Set SPCOM[CS] and SPCOM[TRANLEN] field */
-       if ((t->len - 1) > SPCOM_TRANLEN_MAX) {
+       if (t->len > SPCOM_TRANLEN_MAX) {
                dev_err(mpc8xxx_spi->dev, "Transaction length (%d)"
                                " beyond the SPCOM[TRANLEN] field\n", t->len);
                return -EINVAL;
index d98c33cb64f9b4825998eb1faa2316b1e4796a7d..6a4ff27f4357eb229815c18b535327487f2df580 100644 (file)
@@ -929,7 +929,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
                                        tx->sgl, tx->nents, DMA_MEM_TO_DEV,
                                        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
                if (!desc_tx)
-                       goto no_dma;
+                       goto tx_nodma;
 
                desc_tx->callback = spi_imx_dma_tx_callback;
                desc_tx->callback_param = (void *)spi_imx;
@@ -941,7 +941,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
                                        rx->sgl, rx->nents, DMA_DEV_TO_MEM,
                                        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
                if (!desc_rx)
-                       goto no_dma;
+                       goto rx_nodma;
 
                desc_rx->callback = spi_imx_dma_rx_callback;
                desc_rx->callback_param = (void *)spi_imx;
@@ -1008,7 +1008,9 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 
        return ret;
 
-no_dma:
+rx_nodma:
+       dmaengine_terminate_all(master->dma_tx);
+tx_nodma:
        pr_warn_once("%s %s: DMA not available, falling back to PIO\n",
                     dev_driver_string(&master->dev),
                     dev_name(&master->dev));
index 894616f687b0f314168bc17e77d14b7e477a4ec8..cf4bb36bee25c9bdf8ad39a570ceb61fda3a7421 100644 (file)
@@ -761,6 +761,7 @@ static int spi_test_run_iter(struct spi_device *spi,
                test.iterate_transfer_mask = 1;
 
        /* count number of transfers with tx/rx_buf != NULL */
+       rx_count = tx_count = 0;
        for (i = 0; i < test.transfer_count; i++) {
                if (test.transfers[i].tx_buf)
                        tx_count++;
index 7273820275e90c86ddee32b796187d2b940a5c27..0caa3c8bef46c46e0ed66bf89f518cc5c5236449 100644 (file)
@@ -1490,6 +1490,8 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
        return status;
 
 disable_pm:
+       pm_runtime_dont_use_autosuspend(&pdev->dev);
+       pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
 free_master:
        spi_master_put(master);
@@ -1501,6 +1503,7 @@ static int omap2_mcspi_remove(struct platform_device *pdev)
        struct spi_master *master = platform_get_drvdata(pdev);
        struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
 
+       pm_runtime_dont_use_autosuspend(mcspi->dev);
        pm_runtime_put_sync(mcspi->dev);
        pm_runtime_disable(&pdev->dev);
 
index cde5ff7529eb28b26e684eea53f4e168361937ff..d1a750760cf30b270b44929ac91eb22971d15b22 100644 (file)
@@ -613,9 +613,10 @@ out:
        return err;
 }
 
-static int ssb_bus_register(struct ssb_bus *bus,
-                           ssb_invariants_func_t get_invariants,
-                           unsigned long baseaddr)
+static int __maybe_unused
+ssb_bus_register(struct ssb_bus *bus,
+                ssb_invariants_func_t get_invariants,
+                unsigned long baseaddr)
 {
        int err;
 
index 58d4517e18369f984fd1fac9bfe53b9cc9ba4a3a..b9519be90fdae94ad5dbd278a336216de2971ce8 100644 (file)
@@ -6,6 +6,7 @@ menu "Analog to digital converters"
 config AD7606
        tristate "Analog Devices AD7606 ADC driver"
        depends on GPIOLIB || COMPILE_TEST
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        help
index f129039bece3c74b96fb78e70996ea65279502ea..69287108f793bcb81bcf91451b64f1f4f86c2257 100644 (file)
@@ -217,8 +217,12 @@ error_ret:
 static int ade7753_reset(struct device *dev)
 {
        u16 val;
+       int ret;
+
+       ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+       if (ret)
+               return ret;
 
-       ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
        val |= BIT(6); /* Software Chip Reset */
 
        return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
@@ -343,8 +347,12 @@ error_ret:
 static int ade7753_stop_device(struct device *dev)
 {
        u16 val;
+       int ret;
+
+       ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+       if (ret)
+               return ret;
 
-       ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
        val |= BIT(4);  /* AD converters can be turned off */
 
        return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
index d6273e143324e196b4ef2dbd9af967f3516235fa..a80d993b882eb1b3a76cab8ac6ce5ec0814a2e9f 100644 (file)
@@ -151,16 +151,12 @@ do {                                                                          \
 
 #define LIBCFS_FREE(ptr, size)                                   \
 do {                                                               \
-       int s = (size);                                          \
        if (unlikely((ptr) == NULL)) {                            \
                CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at "    \
-                      "%s:%d\n", s, __FILE__, __LINE__);              \
+                      "%s:%d\n", (int)(size), __FILE__, __LINE__);     \
                break;                                            \
        }                                                              \
-       if (unlikely(s > LIBCFS_VMALLOC_SIZE))                    \
-               vfree(ptr);                                 \
-       else                                                        \
-               kfree(ptr);                                       \
+       kvfree(ptr);                                      \
 } while (0)
 
 /******************************************************************************/
index 72af486b65df70ee2b011ee745bef74c33beec5c..cb74ae731b955d623b9d7cce3c3cb3441229365e 100644 (file)
@@ -2070,32 +2070,13 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 
 static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
 {
-       struct ib_device_attr *attr;
-       int rc;
-
        /* It's safe to assume a HCA can handle a page size
         * matching that of the native system */
        hdev->ibh_page_shift = PAGE_SHIFT;
        hdev->ibh_page_size  = 1 << PAGE_SHIFT;
        hdev->ibh_page_mask  = ~((__u64)hdev->ibh_page_size - 1);
 
-       LIBCFS_ALLOC(attr, sizeof(*attr));
-       if (attr == NULL) {
-               CERROR("Out of memory\n");
-               return -ENOMEM;
-       }
-
-       rc = ib_query_device(hdev->ibh_ibdev, attr);
-       if (rc == 0)
-               hdev->ibh_mr_size = attr->max_mr_size;
-
-       LIBCFS_FREE(attr, sizeof(*attr));
-
-       if (rc != 0) {
-               CERROR("Failed to query IB device: %d\n", rc);
-               return rc;
-       }
-
+       hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
        if (hdev->ibh_mr_size == ~0ULL) {
                hdev->ibh_mr_shift = 64;
                return 0;
index 7b355319079c3f2b4ca41f944929411747b30b5c..8982f7d1b374823eb8dec18f98b0b1ef167e8f26 100644 (file)
@@ -1858,7 +1858,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
        int api32 = ll_need_32bit_api(sbi);
        loff_t ret = -EINVAL;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (origin) {
        case SEEK_SET:
                break;
@@ -1896,7 +1896,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
        goto out;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index c92d58b770ec2bbd3b56c0600a0c2a6aeb8b015e..39e2ffd5f97f1addb2d0b0067285935b30f07185 100644 (file)
@@ -2082,17 +2082,17 @@ putgl:
        /* update time if requested */
        rc = 0;
        if (llss->ia2.ia_valid != 0) {
-               mutex_lock(&llss->inode1->i_mutex);
+               inode_lock(llss->inode1);
                rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
-               mutex_unlock(&llss->inode1->i_mutex);
+               inode_unlock(llss->inode1);
        }
 
        if (llss->ia1.ia_valid != 0) {
                int rc1;
 
-               mutex_lock(&llss->inode2->i_mutex);
+               inode_lock(llss->inode2);
                rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
-               mutex_unlock(&llss->inode2->i_mutex);
+               inode_unlock(llss->inode2);
                if (rc == 0)
                        rc = rc1;
        }
@@ -2179,13 +2179,13 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
                         ATTR_MTIME | ATTR_MTIME_SET |
                         ATTR_ATIME | ATTR_ATIME_SET;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        rc = ll_setattr_raw(file->f_path.dentry, attr, true);
        if (rc == -ENODATA)
                rc = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        kfree(attr);
 free_hss:
@@ -2609,7 +2609,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
 
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* catch async errors that were recorded back when async writeback
         * failed for pages in this mapping. */
@@ -2641,7 +2641,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                        fd->fd_write_failed = false;
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
index ee8a1d67d19119ee034ed5ffa7d64829042aa20d..845e992ca5fcb9898abb4887f70c9165818d4079 100644 (file)
@@ -631,8 +631,6 @@ struct ll_file_data {
 
 struct lov_stripe_md;
 
-extern spinlock_t inode_lock;
-
 extern struct dentry *llite_root;
 extern struct kset *llite_kset;
 
index 1db93af62badd52ecf551ce6b5b3f2d050efd678..b2fc5b3786ee62890956c25f6972ee329489346f 100644 (file)
@@ -1277,7 +1277,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
                return -ENOMEM;
 
        if (!S_ISDIR(inode->i_mode))
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        memcpy(&op_data->op_attr, attr, sizeof(*attr));
 
@@ -1358,7 +1358,7 @@ out:
        ll_finish_md_op_data(op_data);
 
        if (!S_ISDIR(inode->i_mode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
                        inode_dio_wait(inode);
        }
index e578a1130ad1bfd007ef8c498b06fe4917d07efe..18aab25f9cd9440680c216fc14b340cfc00e5cec 100644 (file)
@@ -245,9 +245,9 @@ static int ll_get_name(struct dentry *dentry, char *name,
                goto out;
        }
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        rc = ll_dir_read(dir, &lgd.ctx);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        if (!rc && !lgd.lgd_found)
                rc = -ENOENT;
 out:
index 420d39123877b61c92c24d40dbe4ac37e5e0fc82..871924b3f2e72f44a8360cb1e6638d1c13b0eee3 100644 (file)
@@ -257,9 +257,9 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
         *    be asked to write less pages once, this purely depends on
         *    implementation. Anyway, we should be careful to avoid deadlocking.
         */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        cl_io_fini(env, io);
        return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
 }
index 95cdb0c58b04dffaa784cd04ef07f6f36fad004e..f355474967d62617319859e8df2830795a0a5458 100644 (file)
@@ -115,8 +115,8 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
                struct inode *inode = vmpage->mapping->host;
                loff_t pos;
 
-               if (mutex_trylock(&inode->i_mutex)) {
-                       mutex_unlock(&(inode)->i_mutex);
+               if (inode_trylock(inode)) {
+                       inode_unlock((inode));
 
                        /* this is too bad. Someone is trying to write the
                         * page w/o holding inode mutex. This means we can
index 39fa13b74cbdac3f82f128aee9837063d253183a..711fda93a58df7af9a6dbfe7e0403c0ef9d0700f 100644 (file)
@@ -403,7 +403,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
         * 1. Need inode mutex to operate transient pages.
         */
        if (iov_iter_rw(iter) == READ)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        LASSERT(obj->cob_transient_pages == 0);
        while (iov_iter_count(iter)) {
@@ -454,7 +454,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 out:
        LASSERT(obj->cob_transient_pages == 0);
        if (iov_iter_rw(iter) == READ)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        if (tot_bytes > 0) {
                if (iov_iter_rw(iter) == WRITE) {
index f68e972886caeb510b53f7704dc7ca85a8f1df90..0920ac6b3003afcf0a973a8a25f91c87d3900513 100644 (file)
@@ -439,7 +439,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
        struct inode    *inode = ccc_object_inode(io->ci_obj);
        int result = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (cl_io_is_trunc(io))
                result = vvp_io_setattr_trunc(env, ios, inode,
                                        io->u.ci_setattr.sa_attr.lvb_size);
@@ -459,7 +459,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
                 * because osc has already notified to destroy osc_extents. */
                vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 static void vvp_io_setattr_fini(const struct lu_env *env,
index 99c0d7aee921d884d7ebc81e2d2658b978c88930..a133475a7c74af7a93fd0ffe2fac4294db284f0c 100644 (file)
@@ -428,7 +428,7 @@ static void vvp_transient_page_verify(const struct cl_page *page)
 {
        struct inode *inode = ccc_object_inode(page->cp_obj);
 
-       LASSERT(!mutex_trylock(&inode->i_mutex));
+       LASSERT(!inode_trylock(inode));
 }
 
 static int vvp_transient_page_own(const struct lu_env *env,
@@ -480,9 +480,9 @@ static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
        struct inode    *inode = ccc_object_inode(slice->cpl_obj);
        int     locked;
 
-       locked = !mutex_trylock(&inode->i_mutex);
+       locked = !inode_trylock(inode);
        if (!locked)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        return locked ? -EBUSY : -ENODATA;
 }
 
@@ -502,7 +502,7 @@ static void vvp_transient_page_fini(const struct lu_env *env,
        struct ccc_object *clobj = cl2ccc(clp->cp_obj);
 
        vvp_page_fini_common(cp);
-       LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+       LASSERT(!inode_trylock(clobj->cob_inode));
        clobj->cob_transient_pages--;
 }
 
@@ -548,7 +548,7 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
        } else {
                struct ccc_object *clobj = cl2ccc(obj);
 
-               LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+               LASSERT(!inode_trylock(clobj->cob_inode));
                cl_page_slice_add(page, &cpg->cpg_cl, obj,
                                &vvp_transient_page_ops);
                clobj->cob_transient_pages++;
index 79ac19246548afb5a6e5574a97d09dd7cb8ca110..70b8f4fabfad31af474fae8b7943f7a1404cbb8f 100644 (file)
@@ -825,8 +825,7 @@ static void lcd_write_cmd_s(int cmd)
        lcd_send_serial(0x1F);  /* R/W=W, RS=0 */
        lcd_send_serial(cmd & 0x0F);
        lcd_send_serial((cmd >> 4) & 0x0F);
-       /* the shortest command takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest command takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -837,8 +836,7 @@ static void lcd_write_data_s(int data)
        lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
        lcd_send_serial(data & 0x0F);
        lcd_send_serial((data >> 4) & 0x0F);
-       /* the shortest data takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest data takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -848,20 +846,19 @@ static void lcd_write_cmd_p8(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, cmd);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_CLR;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(120, 500); /* the shortest command takes at least 120 us */
+       udelay(120);    /* the shortest command takes at least 120 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -871,20 +868,19 @@ static void lcd_write_data_p8(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_SET;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(45, 100);  /* the shortest data takes at least 45 us */
+       udelay(45);     /* the shortest data takes at least 45 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -894,7 +890,7 @@ static void lcd_write_cmd_tilcd(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the control port */
        w_ctr(pprt, cmd);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -904,7 +900,7 @@ static void lcd_write_data_tilcd(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -947,7 +943,7 @@ static void lcd_clear_fast_s(void)
                lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
                lcd_send_serial(' ' & 0x0F);
                lcd_send_serial((' ' >> 4) & 0x0F);
-               usleep_range(40, 100);  /* the shortest data takes at least 40 us */
+               udelay(40);     /* the shortest data takes at least 40 us */
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -971,7 +967,7 @@ static void lcd_clear_fast_p8(void)
                w_dtr(pprt, ' ');
 
                /* maintain the data during 20 us before the strobe */
-               usleep_range(20, 100);
+               udelay(20);
 
                bits.e = BIT_SET;
                bits.rs = BIT_SET;
@@ -979,13 +975,13 @@ static void lcd_clear_fast_p8(void)
                set_ctrl_bits();
 
                /* maintain the strobe during 40 us */
-               usleep_range(40, 100);
+               udelay(40);
 
                bits.e = BIT_CLR;
                set_ctrl_bits();
 
                /* the shortest data takes at least 45 us */
-               usleep_range(45, 100);
+               udelay(45);
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -1007,7 +1003,7 @@ static void lcd_clear_fast_tilcd(void)
        for (pos = 0; pos < lcd.height * lcd.hwidth; pos++) {
                /* present the data to the data port */
                w_dtr(pprt, ' ');
-               usleep_range(60, 120);
+               udelay(60);
        }
 
        spin_unlock_irq(&pprt_lock);
index ba8765063174c4e05b517cdf45d33c6c5f57cf25..f1f3ecadf0fb0d0e1a0bb11d014d766e1187e55c 100644 (file)
@@ -22,12 +22,6 @@ menuconfig STAGING_RDMA
 # Please keep entries in alphabetic order
 if STAGING_RDMA
 
-source "drivers/staging/rdma/amso1100/Kconfig"
-
-source "drivers/staging/rdma/ehca/Kconfig"
-
 source "drivers/staging/rdma/hfi1/Kconfig"
 
-source "drivers/staging/rdma/ipath/Kconfig"
-
 endif
index 139d78ef2c24388b93ed6a008090190cff6de663..8c7fc1de48a7bf6f86f103ad120f9052ea399c58 100644 (file)
@@ -1,5 +1,2 @@
 # Entries for RDMA_STAGING tree
-obj-$(CONFIG_INFINIBAND_AMSO1100)      += amso1100/
-obj-$(CONFIG_INFINIBAND_EHCA)  += ehca/
 obj-$(CONFIG_INFINIBAND_HFI1)  += hfi1/
-obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
diff --git a/drivers/staging/rdma/amso1100/Kbuild b/drivers/staging/rdma/amso1100/Kbuild
deleted file mode 100644 (file)
index 950dfab..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG
-
-obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
-
-iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
-       c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/staging/rdma/amso1100/Kconfig b/drivers/staging/rdma/amso1100/Kconfig
deleted file mode 100644 (file)
index e6ce5f2..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-config INFINIBAND_AMSO1100
-       tristate "Ammasso 1100 HCA support"
-       depends on PCI && INET
-       ---help---
-         This is a low-level driver for the Ammasso 1100 host
-         channel adapter (HCA).
-
-config INFINIBAND_AMSO1100_DEBUG
-       bool "Verbose debugging output"
-       depends on INFINIBAND_AMSO1100
-       default n
-       ---help---
-         This option causes the amso1100 driver to produce a bunch of
-         debug messages.  Select this if you are developing the driver
-         or trying to diagnose a problem.
diff --git a/drivers/staging/rdma/amso1100/TODO b/drivers/staging/rdma/amso1100/TODO
deleted file mode 100644 (file)
index 18b00a5..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-7/2015
-
-The amso1100 driver has been deprecated and moved to drivers/staging.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/amso1100/c2.c b/drivers/staging/rdma/amso1100/c2.c
deleted file mode 100644 (file)
index b46ebd1..0000000
+++ /dev/null
@@ -1,1240 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/prefetch.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_provider.h"
-
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
-    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
-
-static int debug = -1;         /* defaults above */
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-
-static int c2_up(struct net_device *netdev);
-static int c2_down(struct net_device *netdev);
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
-static void c2_tx_interrupt(struct net_device *netdev);
-static void c2_rx_interrupt(struct net_device *netdev);
-static irqreturn_t c2_interrupt(int irq, void *dev_id);
-static void c2_tx_timeout(struct net_device *netdev);
-static int c2_change_mtu(struct net_device *netdev, int new_mtu);
-static void c2_reset(struct c2_port *c2_port);
-
-static struct pci_device_id c2_pci_table[] = {
-       { PCI_DEVICE(0x18b8, 0xb001) },
-       { 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, c2_pci_table);
-
-static void c2_set_rxbufsize(struct c2_port *c2_port)
-{
-       struct net_device *netdev = c2_port->netdev;
-
-       if (netdev->mtu > RX_BUF_SIZE)
-               c2_port->rx_buf_size =
-                   netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
-                   NET_IP_ALIGN;
-       else
-               c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
-}
-
-/*
- * Allocate TX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
-                           dma_addr_t base, void __iomem * mmio_txp_ring)
-{
-       struct c2_tx_desc *tx_desc;
-       struct c2_txp_desc __iomem *txp_desc;
-       struct c2_element *elem;
-       int i;
-
-       tx_ring->start = kmalloc_array(tx_ring->count, sizeof(*elem),
-                                      GFP_KERNEL);
-       if (!tx_ring->start)
-               return -ENOMEM;
-
-       elem = tx_ring->start;
-       tx_desc = vaddr;
-       txp_desc = mmio_txp_ring;
-       for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
-               tx_desc->len = 0;
-               tx_desc->status = 0;
-
-               /* Set TXP_HTXD_UNINIT */
-               __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-                            (void __iomem *) txp_desc + C2_TXP_ADDR);
-               __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
-               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-                            (void __iomem *) txp_desc + C2_TXP_FLAGS);
-
-               elem->skb = NULL;
-               elem->ht_desc = tx_desc;
-               elem->hw_desc = txp_desc;
-
-               if (i == tx_ring->count - 1) {
-                       elem->next = tx_ring->start;
-                       tx_desc->next_offset = base;
-               } else {
-                       elem->next = elem + 1;
-                       tx_desc->next_offset =
-                           base + (i + 1) * sizeof(*tx_desc);
-               }
-       }
-
-       tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
-
-       return 0;
-}
-
-/*
- * Allocate RX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
-                           dma_addr_t base, void __iomem * mmio_rxp_ring)
-{
-       struct c2_rx_desc *rx_desc;
-       struct c2_rxp_desc __iomem *rxp_desc;
-       struct c2_element *elem;
-       int i;
-
-       rx_ring->start = kmalloc_array(rx_ring->count, sizeof(*elem),
-                                      GFP_KERNEL);
-       if (!rx_ring->start)
-               return -ENOMEM;
-
-       elem = rx_ring->start;
-       rx_desc = vaddr;
-       rxp_desc = mmio_rxp_ring;
-       for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
-               rx_desc->len = 0;
-               rx_desc->status = 0;
-
-               /* Set RXP_HRXD_UNINIT */
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK),
-                      (void __iomem *) rxp_desc + C2_RXP_STATUS);
-               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
-               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
-               __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-                            (void __iomem *) rxp_desc + C2_RXP_ADDR);
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-                            (void __iomem *) rxp_desc + C2_RXP_FLAGS);
-
-               elem->skb = NULL;
-               elem->ht_desc = rx_desc;
-               elem->hw_desc = rxp_desc;
-
-               if (i == rx_ring->count - 1) {
-                       elem->next = rx_ring->start;
-                       rx_desc->next_offset = base;
-               } else {
-                       elem->next = elem + 1;
-                       rx_desc->next_offset =
-                           base + (i + 1) * sizeof(*rx_desc);
-               }
-       }
-
-       rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
-
-       return 0;
-}
-
-/* Setup buffer for receiving */
-static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_rx_desc *rx_desc = elem->ht_desc;
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen;
-       struct c2_rxp_hdr *rxp_hdr;
-
-       skb = dev_alloc_skb(c2_port->rx_buf_size);
-       if (unlikely(!skb)) {
-               pr_debug("%s: out of memory for receive\n",
-                       c2_port->netdev->name);
-               return -ENOMEM;
-       }
-
-       /* Zero out the rxp hdr in the sk_buff */
-       memset(skb->data, 0, sizeof(*rxp_hdr));
-
-       skb->dev = c2_port->netdev;
-
-       maplen = c2_port->rx_buf_size;
-       mapaddr =
-           pci_map_single(c2dev->pcidev, skb->data, maplen,
-                          PCI_DMA_FROMDEVICE);
-
-       /* Set the sk_buff RXP_header to RXP_HRXD_READY */
-       rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-       rxp_hdr->flags = RXP_HRXD_READY;
-
-       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-       __raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
-                    elem->hw_desc + C2_RXP_LEN);
-       __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                    elem->hw_desc + C2_RXP_FLAGS);
-
-       elem->skb = skb;
-       elem->mapaddr = mapaddr;
-       elem->maplen = maplen;
-       rx_desc->len = maplen;
-
-       return 0;
-}
-
-/*
- * Allocate buffers for the Rx ring
- * For receive:  rx_ring.to_clean is next received frame
- */
-static int c2_rx_fill(struct c2_port *c2_port)
-{
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       int ret = 0;
-
-       elem = rx_ring->start;
-       do {
-               if (c2_rx_alloc(c2_port, elem)) {
-                       ret = 1;
-                       break;
-               }
-       } while ((elem = elem->next) != rx_ring->start);
-
-       rx_ring->to_clean = rx_ring->start;
-       return ret;
-}
-
-/* Free all buffers in RX ring, assumes receiver stopped */
-static void c2_rx_clean(struct c2_port *c2_port)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       struct c2_rx_desc *rx_desc;
-
-       elem = rx_ring->start;
-       do {
-               rx_desc = elem->ht_desc;
-               rx_desc->len = 0;
-
-               __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-               __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-               __raw_writew(0, elem->hw_desc + C2_RXP_LEN);
-               __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-                            elem->hw_desc + C2_RXP_ADDR);
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-                            elem->hw_desc + C2_RXP_FLAGS);
-
-               if (elem->skb) {
-                       pci_unmap_single(c2dev->pcidev, elem->mapaddr,
-                                        elem->maplen, PCI_DMA_FROMDEVICE);
-                       dev_kfree_skb(elem->skb);
-                       elem->skb = NULL;
-               }
-       } while ((elem = elem->next) != rx_ring->start);
-}
-
-static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
-{
-       struct c2_tx_desc *tx_desc = elem->ht_desc;
-
-       tx_desc->len = 0;
-
-       pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
-                        PCI_DMA_TODEVICE);
-
-       if (elem->skb) {
-               dev_kfree_skb_any(elem->skb);
-               elem->skb = NULL;
-       }
-
-       return 0;
-}
-
-/* Free all buffers in TX ring, assumes transmitter stopped */
-static void c2_tx_clean(struct c2_port *c2_port)
-{
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       struct c2_txp_desc txp_htxd;
-       int retry;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-       elem = tx_ring->start;
-
-       do {
-               retry = 0;
-               do {
-                       txp_htxd.flags =
-                           readw(elem->hw_desc + C2_TXP_FLAGS);
-
-                       if (txp_htxd.flags == TXP_HTXD_READY) {
-                               retry = 1;
-                               __raw_writew(0,
-                                            elem->hw_desc + C2_TXP_LEN);
-                               __raw_writeq(0,
-                                            elem->hw_desc + C2_TXP_ADDR);
-                               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
-                                            elem->hw_desc + C2_TXP_FLAGS);
-                               c2_port->netdev->stats.tx_dropped++;
-                               break;
-                       } else {
-                               __raw_writew(0,
-                                            elem->hw_desc + C2_TXP_LEN);
-                               __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-                                            elem->hw_desc + C2_TXP_ADDR);
-                               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-                                            elem->hw_desc + C2_TXP_FLAGS);
-                       }
-
-                       c2_tx_free(c2_port->c2dev, elem);
-
-               } while ((elem = elem->next) != tx_ring->start);
-       } while (retry);
-
-       c2_port->tx_avail = c2_port->tx_ring.count - 1;
-       c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
-
-       if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-               netif_wake_queue(c2_port->netdev);
-
-       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-}
-
-/*
- * Process transmit descriptors marked 'DONE' by the firmware,
- * freeing up their unneeded sk_buffs.
- */
-static void c2_tx_interrupt(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       struct c2_txp_desc txp_htxd;
-
-       spin_lock(&c2_port->tx_lock);
-
-       for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
-            elem = elem->next) {
-               txp_htxd.flags =
-                   be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS));
-
-               if (txp_htxd.flags != TXP_HTXD_DONE)
-                       break;
-
-               if (netif_msg_tx_done(c2_port)) {
-                       /* PCI reads are expensive in fast path */
-                       txp_htxd.len =
-                           be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN));
-                       pr_debug("%s: tx done slot %3Zu status 0x%x len "
-                               "%5u bytes\n",
-                               netdev->name, elem - tx_ring->start,
-                               txp_htxd.flags, txp_htxd.len);
-               }
-
-               c2_tx_free(c2dev, elem);
-               ++(c2_port->tx_avail);
-       }
-
-       tx_ring->to_clean = elem;
-
-       if (netif_queue_stopped(netdev)
-           && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-               netif_wake_queue(netdev);
-
-       spin_unlock(&c2_port->tx_lock);
-}
-
-static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
-{
-       struct c2_rx_desc *rx_desc = elem->ht_desc;
-       struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-
-       if (rxp_hdr->status != RXP_HRXD_OK ||
-           rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
-               pr_debug("BAD RXP_HRXD\n");
-               pr_debug("  rx_desc : %p\n", rx_desc);
-               pr_debug("    index : %Zu\n",
-                       elem - c2_port->rx_ring.start);
-               pr_debug("    len   : %u\n", rx_desc->len);
-               pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
-                       (void *) __pa((unsigned long) rxp_hdr));
-               pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
-               pr_debug("    status: 0x%x\n", rxp_hdr->status);
-               pr_debug("    len   : %u\n", rxp_hdr->len);
-               pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
-       }
-
-       /* Setup the skb for reuse since we're dropping this pkt */
-       elem->skb->data = elem->skb->head;
-       skb_reset_tail_pointer(elem->skb);
-
-       /* Zero out the rxp hdr in the sk_buff */
-       memset(elem->skb->data, 0, sizeof(*rxp_hdr));
-
-       /* Write the descriptor to the adapter's rx ring */
-       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-       __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-       __raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
-                    elem->hw_desc + C2_RXP_LEN);
-       __raw_writeq((__force u64) cpu_to_be64(elem->mapaddr),
-                    elem->hw_desc + C2_RXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                    elem->hw_desc + C2_RXP_FLAGS);
-
-       pr_debug("packet dropped\n");
-       c2_port->netdev->stats.rx_dropped++;
-}
-
-static void c2_rx_interrupt(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       struct c2_rx_desc *rx_desc;
-       struct c2_rxp_hdr *rxp_hdr;
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen, buflen;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2dev->lock, flags);
-
-       /* Begin where we left off */
-       rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
-
-       for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
-            elem = elem->next) {
-               rx_desc = elem->ht_desc;
-               mapaddr = elem->mapaddr;
-               maplen = elem->maplen;
-               skb = elem->skb;
-               rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-
-               if (rxp_hdr->flags != RXP_HRXD_DONE)
-                       break;
-               buflen = rxp_hdr->len;
-
-               /* Sanity check the RXP header */
-               if (rxp_hdr->status != RXP_HRXD_OK ||
-                   buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
-                       c2_rx_error(c2_port, elem);
-                       continue;
-               }
-
-               /*
-                * Allocate and map a new skb for replenishing the host
-                * RX desc
-                */
-               if (c2_rx_alloc(c2_port, elem)) {
-                       c2_rx_error(c2_port, elem);
-                       continue;
-               }
-
-               /* Unmap the old skb */
-               pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
-                                PCI_DMA_FROMDEVICE);
-
-               prefetch(skb->data);
-
-               /*
-                * Skip past the leading 8 bytes comprising of the
-                * "struct c2_rxp_hdr", prepended by the adapter
-                * to the usual Ethernet header ("struct ethhdr"),
-                * to the start of the raw Ethernet packet.
-                *
-                * Fix up the various fields in the sk_buff before
-                * passing it up to netif_rx(). The transfer size
-                * (in bytes) specified by the adapter len field of
-                * the "struct rxp_hdr_t" does NOT include the
-                * "sizeof(struct c2_rxp_hdr)".
-                */
-               skb->data += sizeof(*rxp_hdr);
-               skb_set_tail_pointer(skb, buflen);
-               skb->len = buflen;
-               skb->protocol = eth_type_trans(skb, netdev);
-
-               netif_rx(skb);
-
-               netdev->stats.rx_packets++;
-               netdev->stats.rx_bytes += buflen;
-       }
-
-       /* Save where we left off */
-       rx_ring->to_clean = elem;
-       c2dev->cur_rx = elem - rx_ring->start;
-       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-       spin_unlock_irqrestore(&c2dev->lock, flags);
-}
-
-/*
- * Handle netisr0 TX & RX interrupts.
- */
-static irqreturn_t c2_interrupt(int irq, void *dev_id)
-{
-       unsigned int netisr0, dmaisr;
-       int handled = 0;
-       struct c2_dev *c2dev = dev_id;
-
-       /* Process CCILNET interrupts */
-       netisr0 = readl(c2dev->regs + C2_NISR0);
-       if (netisr0) {
-
-               /*
-                * There is an issue with the firmware that always
-                * provides the status of RX for both TX & RX
-                * interrupts.  So process both queues here.
-                */
-               c2_rx_interrupt(c2dev->netdev);
-               c2_tx_interrupt(c2dev->netdev);
-
-               /* Clear the interrupt */
-               writel(netisr0, c2dev->regs + C2_NISR0);
-               handled++;
-       }
-
-       /* Process RNIC interrupts */
-       dmaisr = readl(c2dev->regs + C2_DISR);
-       if (dmaisr) {
-               writel(dmaisr, c2dev->regs + C2_DISR);
-               c2_rnic_interrupt(c2dev);
-               handled++;
-       }
-
-       if (handled) {
-               return IRQ_HANDLED;
-       } else {
-               return IRQ_NONE;
-       }
-}
-
-static int c2_up(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_element *elem;
-       struct c2_rxp_hdr *rxp_hdr;
-       struct in_device *in_dev;
-       size_t rx_size, tx_size;
-       int ret, i;
-       unsigned int netimr0;
-
-       if (netif_msg_ifup(c2_port))
-               pr_debug("%s: enabling interface\n", netdev->name);
-
-       /* Set the Rx buffer size based on MTU */
-       c2_set_rxbufsize(c2_port);
-
-       /* Allocate DMA'able memory for Tx/Rx host descriptor rings */
-       rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
-       tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
-
-       c2_port->mem_size = tx_size + rx_size;
-       c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
-                                            &c2_port->dma);
-       if (c2_port->mem == NULL) {
-               pr_debug("Unable to allocate memory for "
-                       "host descriptor rings\n");
-               return -ENOMEM;
-       }
-
-       /* Create the Rx host descriptor ring */
-       if ((ret =
-            c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
-                             c2dev->mmio_rxp_ring))) {
-               pr_debug("Unable to create RX ring\n");
-               goto bail0;
-       }
-
-       /* Allocate Rx buffers for the host descriptor ring */
-       if (c2_rx_fill(c2_port)) {
-               pr_debug("Unable to fill RX ring\n");
-               goto bail1;
-       }
-
-       /* Create the Tx host descriptor ring */
-       if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
-                                   c2_port->dma + rx_size,
-                                   c2dev->mmio_txp_ring))) {
-               pr_debug("Unable to create TX ring\n");
-               goto bail1;
-       }
-
-       /* Set the TX pointer to where we left off */
-       c2_port->tx_avail = c2_port->tx_ring.count - 1;
-       c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
-           c2_port->tx_ring.start + c2dev->cur_tx;
-
-       /* missing: Initialize MAC */
-
-       BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
-
-       /* Reset the adapter, ensures the driver is in sync with the RXP */
-       c2_reset(c2_port);
-
-       /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
-       for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
-            i++, elem++) {
-               rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-               rxp_hdr->flags = 0;
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                            elem->hw_desc + C2_RXP_FLAGS);
-       }
-
-       /* Enable network packets */
-       netif_start_queue(netdev);
-
-       /* Enable IRQ */
-       writel(0, c2dev->regs + C2_IDIS);
-       netimr0 = readl(c2dev->regs + C2_NIMR0);
-       netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
-       writel(netimr0, c2dev->regs + C2_NIMR0);
-
-       /* Tell the stack to ignore arp requests for ipaddrs bound to
-        * other interfaces.  This is needed to prevent the host stack
-        * from responding to arp requests to the ipaddr bound on the
-        * rdma interface.
-        */
-       in_dev = in_dev_get(netdev);
-       IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1);
-       in_dev_put(in_dev);
-
-       return 0;
-
-bail1:
-       c2_rx_clean(c2_port);
-       kfree(c2_port->rx_ring.start);
-
-bail0:
-       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-                           c2_port->dma);
-
-       return ret;
-}
-
-static int c2_down(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-
-       if (netif_msg_ifdown(c2_port))
-               pr_debug("%s: disabling interface\n",
-                       netdev->name);
-
-       /* Wait for all the queued packets to get sent */
-       c2_tx_interrupt(netdev);
-
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-
-       /* Disable IRQs by clearing the interrupt mask */
-       writel(1, c2dev->regs + C2_IDIS);
-       writel(0, c2dev->regs + C2_NIMR0);
-
-       /* missing: Stop transmitter */
-
-       /* missing: Stop receiver */
-
-       /* Reset the adapter, ensures the driver is in sync with the RXP */
-       c2_reset(c2_port);
-
-       /* missing: Turn off LEDs here */
-
-       /* Free all buffers in the host descriptor rings */
-       c2_tx_clean(c2_port);
-       c2_rx_clean(c2_port);
-
-       /* Free the host descriptor rings */
-       kfree(c2_port->rx_ring.start);
-       kfree(c2_port->tx_ring.start);
-       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-                           c2_port->dma);
-
-       return 0;
-}
-
-static void c2_reset(struct c2_port *c2_port)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       unsigned int cur_rx = c2dev->cur_rx;
-
-       /* Tell the hardware to quiesce */
-       C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
-
-       /*
-        * The hardware will reset the C2_PCI_HRX_QUI bit once
-        * the RXP is quiesced.  Wait 2 seconds for this.
-        */
-       ssleep(2);
-
-       cur_rx = C2_GET_CUR_RX(c2dev);
-
-       if (cur_rx & C2_PCI_HRX_QUI)
-               pr_debug("c2_reset: failed to quiesce the hardware!\n");
-
-       cur_rx &= ~C2_PCI_HRX_QUI;
-
-       c2dev->cur_rx = cur_rx;
-
-       pr_debug("Current RX: %u\n", c2dev->cur_rx);
-}
-
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       dma_addr_t mapaddr;
-       u32 maplen;
-       unsigned long flags;
-       unsigned int i;
-
-       spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-       if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
-               netif_stop_queue(netdev);
-               spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-               pr_debug("%s: Tx ring full when queue awake!\n",
-                       netdev->name);
-               return NETDEV_TX_BUSY;
-       }
-
-       maplen = skb_headlen(skb);
-       mapaddr =
-           pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
-
-       elem = tx_ring->to_use;
-       elem->skb = skb;
-       elem->mapaddr = mapaddr;
-       elem->maplen = maplen;
-
-       /* Tell HW to xmit */
-       __raw_writeq((__force u64) cpu_to_be64(mapaddr),
-                    elem->hw_desc + C2_TXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(maplen),
-                    elem->hw_desc + C2_TXP_LEN);
-       __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-                    elem->hw_desc + C2_TXP_FLAGS);
-
-       netdev->stats.tx_packets++;
-       netdev->stats.tx_bytes += maplen;
-
-       /* Loop thru additional data fragments and queue them */
-       if (skb_shinfo(skb)->nr_frags) {
-               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                       const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-                       maplen = skb_frag_size(frag);
-                       mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag,
-                                                  0, maplen, DMA_TO_DEVICE);
-                       elem = elem->next;
-                       elem->skb = NULL;
-                       elem->mapaddr = mapaddr;
-                       elem->maplen = maplen;
-
-                       /* Tell HW to xmit */
-                       __raw_writeq((__force u64) cpu_to_be64(mapaddr),
-                                    elem->hw_desc + C2_TXP_ADDR);
-                       __raw_writew((__force u16) cpu_to_be16(maplen),
-                                    elem->hw_desc + C2_TXP_LEN);
-                       __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-                                    elem->hw_desc + C2_TXP_FLAGS);
-
-                       netdev->stats.tx_packets++;
-                       netdev->stats.tx_bytes += maplen;
-               }
-       }
-
-       tx_ring->to_use = elem->next;
-       c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
-
-       if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
-               netif_stop_queue(netdev);
-               if (netif_msg_tx_queued(c2_port))
-                       pr_debug("%s: transmit queue full\n",
-                               netdev->name);
-       }
-
-       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-       netdev->trans_start = jiffies;
-
-       return NETDEV_TX_OK;
-}
-
-static void c2_tx_timeout(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-
-       if (netif_msg_timer(c2_port))
-               pr_debug("%s: tx timeout\n", netdev->name);
-
-       c2_tx_clean(c2_port);
-}
-
-static int c2_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       int ret = 0;
-
-       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-               return -EINVAL;
-
-       netdev->mtu = new_mtu;
-
-       if (netif_running(netdev)) {
-               c2_down(netdev);
-
-               c2_up(netdev);
-       }
-
-       return ret;
-}
-
-static const struct net_device_ops c2_netdev = {
-       .ndo_open               = c2_up,
-       .ndo_stop               = c2_down,
-       .ndo_start_xmit         = c2_xmit_frame,
-       .ndo_tx_timeout         = c2_tx_timeout,
-       .ndo_change_mtu         = c2_change_mtu,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-/* Initialize network device */
-static struct net_device *c2_devinit(struct c2_dev *c2dev,
-                                    void __iomem * mmio_addr)
-{
-       struct c2_port *c2_port = NULL;
-       struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
-
-       if (!netdev) {
-               pr_debug("c2_port etherdev alloc failed");
-               return NULL;
-       }
-
-       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-       netdev->netdev_ops = &c2_netdev;
-       netdev->watchdog_timeo = C2_TX_TIMEOUT;
-       netdev->irq = c2dev->pcidev->irq;
-
-       c2_port = netdev_priv(netdev);
-       c2_port->netdev = netdev;
-       c2_port->c2dev = c2dev;
-       c2_port->msg_enable = netif_msg_init(debug, default_msg);
-       c2_port->tx_ring.count = C2_NUM_TX_DESC;
-       c2_port->rx_ring.count = C2_NUM_RX_DESC;
-
-       spin_lock_init(&c2_port->tx_lock);
-
-       /* Copy our 48-bit ethernet hardware address */
-       memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
-
-       /* Validate the MAC address */
-       if (!is_valid_ether_addr(netdev->dev_addr)) {
-               pr_debug("Invalid MAC Address\n");
-               pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name,
-                        netdev->dev_addr, netdev->irq);
-               free_netdev(netdev);
-               return NULL;
-       }
-
-       c2dev->netdev = netdev;
-
-       return netdev;
-}
-
-static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
-{
-       int ret = 0, i;
-       unsigned long reg0_start, reg0_flags, reg0_len;
-       unsigned long reg2_start, reg2_flags, reg2_len;
-       unsigned long reg4_start, reg4_flags, reg4_len;
-       unsigned kva_map_size;
-       struct net_device *netdev = NULL;
-       struct c2_dev *c2dev = NULL;
-       void __iomem *mmio_regs = NULL;
-
-       printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
-               DRV_VERSION);
-
-       /* Enable PCI device */
-       ret = pci_enable_device(pcidev);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
-                       pci_name(pcidev));
-               goto bail0;
-       }
-
-       reg0_start = pci_resource_start(pcidev, BAR_0);
-       reg0_len = pci_resource_len(pcidev, BAR_0);
-       reg0_flags = pci_resource_flags(pcidev, BAR_0);
-
-       reg2_start = pci_resource_start(pcidev, BAR_2);
-       reg2_len = pci_resource_len(pcidev, BAR_2);
-       reg2_flags = pci_resource_flags(pcidev, BAR_2);
-
-       reg4_start = pci_resource_start(pcidev, BAR_4);
-       reg4_len = pci_resource_len(pcidev, BAR_4);
-       reg4_flags = pci_resource_flags(pcidev, BAR_4);
-
-       pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
-       pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
-       pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
-
-       /* Make sure PCI base addr are MMIO */
-       if (!(reg0_flags & IORESOURCE_MEM) ||
-           !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
-               printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
-               ret = -ENODEV;
-               goto bail1;
-       }
-
-       /* Check for weird/broken PCI region reporting */
-       if ((reg0_len < C2_REG0_SIZE) ||
-           (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
-               printk(KERN_ERR PFX "Invalid PCI region sizes\n");
-               ret = -ENODEV;
-               goto bail1;
-       }
-
-       /* Reserve PCI I/O and memory resources */
-       ret = pci_request_regions(pcidev, DRV_NAME);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: Unable to request regions\n",
-                       pci_name(pcidev));
-               goto bail1;
-       }
-
-       if ((sizeof(dma_addr_t) > 4)) {
-               ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
-               if (ret < 0) {
-                       printk(KERN_ERR PFX "64b DMA configuration failed\n");
-                       goto bail2;
-               }
-       } else {
-               ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
-               if (ret < 0) {
-                       printk(KERN_ERR PFX "32b DMA configuration failed\n");
-                       goto bail2;
-               }
-       }
-
-       /* Enables bus-mastering on the device */
-       pci_set_master(pcidev);
-
-       /* Remap the adapter PCI registers in BAR4 */
-       mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-                                   sizeof(struct c2_adapter_pci_regs));
-       if (!mmio_regs) {
-               printk(KERN_ERR PFX
-                       "Unable to remap adapter PCI registers in BAR4\n");
-               ret = -EIO;
-               goto bail2;
-       }
-
-       /* Validate PCI regs magic */
-       for (i = 0; i < sizeof(c2_magic); i++) {
-               if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
-                       printk(KERN_ERR PFX "Downlevel Firmware boot loader "
-                               "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
-                              "utility to update your boot loader\n",
-                               i + 1, sizeof(c2_magic),
-                               readb(mmio_regs + C2_REGS_MAGIC + i),
-                               c2_magic[i]);
-                       printk(KERN_ERR PFX "Adapter not claimed\n");
-                       iounmap(mmio_regs);
-                       ret = -EIO;
-                       goto bail2;
-               }
-       }
-
-       /* Validate the adapter version */
-       if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
-               printk(KERN_ERR PFX "Version mismatch "
-                       "[fw=%u, c2=%u], Adapter not claimed\n",
-                       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)),
-                       C2_VERSION);
-               ret = -EINVAL;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       /* Validate the adapter IVN */
-       if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
-               printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
-                      "the OpenIB device support kit. "
-                      "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)),
-                      C2_IVN);
-               ret = -EINVAL;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       /* Allocate hardware structure */
-       c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
-       if (!c2dev) {
-               printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
-                       pci_name(pcidev));
-               ret = -ENOMEM;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       memset(c2dev, 0, sizeof(*c2dev));
-       spin_lock_init(&c2dev->lock);
-       c2dev->pcidev = pcidev;
-       c2dev->cur_tx = 0;
-
-       /* Get the last RX index */
-       c2dev->cur_rx =
-           (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) -
-            0xffffc000) / sizeof(struct c2_rxp_desc);
-
-       /* Request an interrupt line for the driver */
-       ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
-                       pci_name(pcidev), pcidev->irq);
-               iounmap(mmio_regs);
-               goto bail3;
-       }
-
-       /* Set driver specific data */
-       pci_set_drvdata(pcidev, c2dev);
-
-       /* Initialize network device */
-       if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
-               ret = -ENOMEM;
-               iounmap(mmio_regs);
-               goto bail4;
-       }
-
-       /* Save off the actual size prior to unmapping mmio_regs */
-       kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE));
-
-       /* Unmap the adapter PCI registers in BAR4 */
-       iounmap(mmio_regs);
-
-       /* Register network device */
-       ret = register_netdev(netdev);
-       if (ret) {
-               printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
-                       ret);
-               goto bail5;
-       }
-
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-
-       /* Remap the adapter HRXDQ PA space to kernel VA space */
-       c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
-                                              C2_RXP_HRXDQ_SIZE);
-       if (!c2dev->mmio_rxp_ring) {
-               printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
-               ret = -EIO;
-               goto bail6;
-       }
-
-       /* Remap the adapter HTXDQ PA space to kernel VA space */
-       c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
-                                              C2_TXP_HTXDQ_SIZE);
-       if (!c2dev->mmio_txp_ring) {
-               printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
-               ret = -EIO;
-               goto bail7;
-       }
-
-       /* Save off the current RX index in the last 4 bytes of the TXP Ring */
-       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-       /* Remap the PCI registers in adapter BAR0 to kernel VA space */
-       c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
-       if (!c2dev->regs) {
-               printk(KERN_ERR PFX "Unable to remap BAR0\n");
-               ret = -EIO;
-               goto bail8;
-       }
-
-       /* Remap the PCI registers in adapter BAR4 to kernel VA space */
-       c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
-       c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-                                    kva_map_size);
-       if (!c2dev->kva) {
-               printk(KERN_ERR PFX "Unable to remap BAR4\n");
-               ret = -EIO;
-               goto bail9;
-       }
-
-       /* Print out the MAC address */
-       pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr,
-                netdev->irq);
-
-       ret = c2_rnic_init(c2dev);
-       if (ret) {
-               printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
-               goto bail10;
-       }
-
-       ret = c2_register_device(c2dev);
-       if (ret)
-               goto bail10;
-
-       return 0;
-
- bail10:
-       iounmap(c2dev->kva);
-
- bail9:
-       iounmap(c2dev->regs);
-
- bail8:
-       iounmap(c2dev->mmio_txp_ring);
-
- bail7:
-       iounmap(c2dev->mmio_rxp_ring);
-
- bail6:
-       unregister_netdev(netdev);
-
- bail5:
-       free_netdev(netdev);
-
- bail4:
-       free_irq(pcidev->irq, c2dev);
-
- bail3:
-       ib_dealloc_device(&c2dev->ibdev);
-
- bail2:
-       pci_release_regions(pcidev);
-
- bail1:
-       pci_disable_device(pcidev);
-
- bail0:
-       return ret;
-}
-
-static void c2_remove(struct pci_dev *pcidev)
-{
-       struct c2_dev *c2dev = pci_get_drvdata(pcidev);
-       struct net_device *netdev = c2dev->netdev;
-
-       /* Unregister with OpenIB */
-       c2_unregister_device(c2dev);
-
-       /* Clean up the RNIC resources */
-       c2_rnic_term(c2dev);
-
-       /* Remove network device from the kernel */
-       unregister_netdev(netdev);
-
-       /* Free network device */
-       free_netdev(netdev);
-
-       /* Free the interrupt line */
-       free_irq(pcidev->irq, c2dev);
-
-       /* missing: Turn LEDs off here */
-
-       /* Unmap adapter PA space */
-       iounmap(c2dev->kva);
-       iounmap(c2dev->regs);
-       iounmap(c2dev->mmio_txp_ring);
-       iounmap(c2dev->mmio_rxp_ring);
-
-       /* Free the hardware structure */
-       ib_dealloc_device(&c2dev->ibdev);
-
-       /* Release reserved PCI I/O and memory resources */
-       pci_release_regions(pcidev);
-
-       /* Disable PCI device */
-       pci_disable_device(pcidev);
-
-       /* Clear driver specific data */
-       pci_set_drvdata(pcidev, NULL);
-}
-
-static struct pci_driver c2_pci_driver = {
-       .name = DRV_NAME,
-       .id_table = c2_pci_table,
-       .probe = c2_probe,
-       .remove = c2_remove,
-};
-
-module_pci_driver(c2_pci_driver);
diff --git a/drivers/staging/rdma/amso1100/c2.h b/drivers/staging/rdma/amso1100/c2.h
deleted file mode 100644 (file)
index 21b565a..0000000
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __C2_H
-#define __C2_H
-
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/idr.h>
-
-#include "c2_provider.h"
-#include "c2_mq.h"
-#include "c2_status.h"
-
-#define DRV_NAME     "c2"
-#define DRV_VERSION  "1.1"
-#define PFX          DRV_NAME ": "
-
-#define BAR_0                0
-#define BAR_2                2
-#define BAR_4                4
-
-#define RX_BUF_SIZE         (1536 + 8)
-#define ETH_JUMBO_MTU        9000
-#define C2_MAGIC            "CEPHEUS"
-#define C2_VERSION           4
-#define C2_IVN              (18 & 0x7fffffff)
-
-#define C2_REG0_SIZE        (16 * 1024)
-#define C2_REG2_SIZE        (2 * 1024 * 1024)
-#define C2_REG4_SIZE        (256 * 1024 * 1024)
-#define C2_NUM_TX_DESC       341
-#define C2_NUM_RX_DESC       256
-#define C2_PCI_REGS_OFFSET  (0x10000)
-#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
-#define C2_RXP_HRXDQ_SIZE   (4096)
-#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
-#define C2_TXP_HTXDQ_SIZE   (4096)
-#define C2_TX_TIMEOUT      (6*HZ)
-
-/* CEPHEUS */
-static const u8 c2_magic[] = {
-       0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
-};
-
-enum adapter_pci_regs {
-       C2_REGS_MAGIC = 0x0000,
-       C2_REGS_VERS = 0x0008,
-       C2_REGS_IVN = 0x000C,
-       C2_REGS_PCI_WINSIZE = 0x0010,
-       C2_REGS_Q0_QSIZE = 0x0014,
-       C2_REGS_Q0_MSGSIZE = 0x0018,
-       C2_REGS_Q0_POOLSTART = 0x001C,
-       C2_REGS_Q0_SHARED = 0x0020,
-       C2_REGS_Q1_QSIZE = 0x0024,
-       C2_REGS_Q1_MSGSIZE = 0x0028,
-       C2_REGS_Q1_SHARED = 0x0030,
-       C2_REGS_Q2_QSIZE = 0x0034,
-       C2_REGS_Q2_MSGSIZE = 0x0038,
-       C2_REGS_Q2_SHARED = 0x0040,
-       C2_REGS_ENADDR = 0x004C,
-       C2_REGS_RDMA_ENADDR = 0x0054,
-       C2_REGS_HRX_CUR = 0x006C,
-};
-
-struct c2_adapter_pci_regs {
-       char reg_magic[8];
-       u32 version;
-       u32 ivn;
-       u32 pci_window_size;
-       u32 q0_q_size;
-       u32 q0_msg_size;
-       u32 q0_pool_start;
-       u32 q0_shared;
-       u32 q1_q_size;
-       u32 q1_msg_size;
-       u32 q1_pool_start;
-       u32 q1_shared;
-       u32 q2_q_size;
-       u32 q2_msg_size;
-       u32 q2_pool_start;
-       u32 q2_shared;
-       u32 log_start;
-       u32 log_size;
-       u8 host_enaddr[8];
-       u8 rdma_enaddr[8];
-       u32 crash_entry;
-       u32 crash_ready[2];
-       u32 fw_txd_cur;
-       u32 fw_hrxd_cur;
-       u32 fw_rxd_cur;
-};
-
-enum pci_regs {
-       C2_HISR = 0x0000,
-       C2_DISR = 0x0004,
-       C2_HIMR = 0x0008,
-       C2_DIMR = 0x000C,
-       C2_NISR0 = 0x0010,
-       C2_NISR1 = 0x0014,
-       C2_NIMR0 = 0x0018,
-       C2_NIMR1 = 0x001C,
-       C2_IDIS = 0x0020,
-};
-
-enum {
-       C2_PCI_HRX_INT = 1 << 8,
-       C2_PCI_HTX_INT = 1 << 17,
-       C2_PCI_HRX_QUI = 1 << 31,
-};
-
-/*
- * Cepheus registers in BAR0.
- */
-struct c2_pci_regs {
-       u32 hostisr;
-       u32 dmaisr;
-       u32 hostimr;
-       u32 dmaimr;
-       u32 netisr0;
-       u32 netisr1;
-       u32 netimr0;
-       u32 netimr1;
-       u32 int_disable;
-};
-
-/* TXP flags */
-enum c2_txp_flags {
-       TXP_HTXD_DONE = 0,
-       TXP_HTXD_READY = 1 << 0,
-       TXP_HTXD_UNINIT = 1 << 1,
-};
-
-/* RXP flags */
-enum c2_rxp_flags {
-       RXP_HRXD_UNINIT = 0,
-       RXP_HRXD_READY = 1 << 0,
-       RXP_HRXD_DONE = 1 << 1,
-};
-
-/* RXP status */
-enum c2_rxp_status {
-       RXP_HRXD_ZERO = 0,
-       RXP_HRXD_OK = 1 << 0,
-       RXP_HRXD_BUF_OV = 1 << 1,
-};
-
-/* TXP descriptor fields */
-enum txp_desc {
-       C2_TXP_FLAGS = 0x0000,
-       C2_TXP_LEN = 0x0002,
-       C2_TXP_ADDR = 0x0004,
-};
-
-/* RXP descriptor fields */
-enum rxp_desc {
-       C2_RXP_FLAGS = 0x0000,
-       C2_RXP_STATUS = 0x0002,
-       C2_RXP_COUNT = 0x0004,
-       C2_RXP_LEN = 0x0006,
-       C2_RXP_ADDR = 0x0008,
-};
-
-struct c2_txp_desc {
-       u16 flags;
-       u16 len;
-       u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_desc {
-       u16 flags;
-       u16 status;
-       u16 count;
-       u16 len;
-       u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_hdr {
-       u16 flags;
-       u16 status;
-       u16 len;
-       u16 rsvd;
-} __attribute__ ((packed));
-
-struct c2_tx_desc {
-       u32 len;
-       u32 status;
-       dma_addr_t next_offset;
-};
-
-struct c2_rx_desc {
-       u32 len;
-       u32 status;
-       dma_addr_t next_offset;
-};
-
-struct c2_alloc {
-       u32 last;
-       u32 max;
-       spinlock_t lock;
-       unsigned long *table;
-};
-
-struct c2_array {
-       struct {
-               void **page;
-               int used;
-       } *page_list;
-};
-
-/*
- * The MQ shared pointer pool is organized as a linked list of
- * chunks. Each chunk contains a linked list of free shared pointers
- * that can be allocated to a given user mode client.
- *
- */
-struct sp_chunk {
-       struct sp_chunk *next;
-       dma_addr_t dma_addr;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-       u16 head;
-       u16 shared_ptr[0];
-};
-
-struct c2_pd_table {
-       u32 last;
-       u32 max;
-       spinlock_t lock;
-       unsigned long *table;
-};
-
-struct c2_qp_table {
-       struct idr idr;
-       spinlock_t lock;
-};
-
-struct c2_element {
-       struct c2_element *next;
-       void *ht_desc;          /* host     descriptor */
-       void __iomem *hw_desc;  /* hardware descriptor */
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen;
-};
-
-struct c2_ring {
-       struct c2_element *to_clean;
-       struct c2_element *to_use;
-       struct c2_element *start;
-       unsigned long count;
-};
-
-struct c2_dev {
-       struct ib_device ibdev;
-       void __iomem *regs;
-       void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
-       void __iomem *mmio_rxp_ring;
-       spinlock_t lock;
-       struct pci_dev *pcidev;
-       struct net_device *netdev;
-       struct net_device *pseudo_netdev;
-       unsigned int cur_tx;
-       unsigned int cur_rx;
-       u32 adapter_handle;
-       int device_cap_flags;
-       void __iomem *kva;      /* KVA device memory */
-       unsigned long pa;       /* PA device memory */
-       void **qptr_array;
-
-       struct kmem_cache *host_msg_cache;
-
-       struct list_head cca_link;              /* adapter list */
-       struct list_head eh_wakeup_list;        /* event wakeup list */
-       wait_queue_head_t req_vq_wo;
-
-       /* Cached RNIC properties */
-       struct ib_device_attr props;
-
-       struct c2_pd_table pd_table;
-       struct c2_qp_table qp_table;
-       int ports;              /* num of GigE ports */
-       int devnum;
-       spinlock_t vqlock;      /* sync vbs req MQ */
-
-       /* Verbs Queues */
-       struct c2_mq req_vq;    /* Verbs Request MQ */
-       struct c2_mq rep_vq;    /* Verbs Reply MQ */
-       struct c2_mq aeq;       /* Async Events MQ */
-
-       /* Kernel client MQs */
-       struct sp_chunk *kern_mqsp_pool;
-
-       /* Device updates these values when posting messages to a host
-        * target queue */
-       u16 req_vq_shared;
-       u16 rep_vq_shared;
-       u16 aeq_shared;
-       u16 irq_claimed;
-
-       /*
-        * Shared host target pages for user-accessible MQs.
-        */
-       int hthead;             /* index of first free entry */
-       void *htpages;          /* kernel vaddr */
-       int htlen;              /* length of htpages memory */
-       void *htuva;            /* user mapped vaddr */
-       spinlock_t htlock;      /* serialize allocation */
-
-       u64 adapter_hint_uva;   /* access to the activity FIFO */
-
-       //      spinlock_t aeq_lock;
-       //      spinlock_t rnic_lock;
-
-       __be16 *hint_count;
-       dma_addr_t hint_count_dma;
-       u16 hints_read;
-
-       int init;               /* TRUE if it's ready */
-       char ae_cache_name[16];
-       char vq_cache_name[16];
-};
-
-struct c2_port {
-       u32 msg_enable;
-       struct c2_dev *c2dev;
-       struct net_device *netdev;
-
-       spinlock_t tx_lock;
-       u32 tx_avail;
-       struct c2_ring tx_ring;
-       struct c2_ring rx_ring;
-
-       void *mem;              /* PCI memory for host rings */
-       dma_addr_t dma;
-       unsigned long mem_size;
-
-       u32 rx_buf_size;
-};
-
-/*
- * Activity FIFO registers in BAR0.
- */
-#define PCI_BAR0_HOST_HINT     0x100
-#define PCI_BAR0_ADAPTER_HINT  0x2000
-
-/*
- * Ammasso PCI vendor id and Cepheus PCI device id.
- */
-#define CQ_ARMED       0x01
-#define CQ_WAIT_FOR_DMA        0x80
-
-/*
- * The format of a hint is as follows:
- * Lower 16 bits are the count of hints for the queue.
- * Next 15 bits are the qp_index
- * Upper most bit depends on who reads it:
- *    If read by producer, then it means Full (1) or Not-Full (0)
- *    If read by consumer, then it means Empty (1) or Not-Empty (0)
- */
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-
-/*
- * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
- * struct.
- */
-#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
-
-#ifndef readq
-static inline u64 readq(const void __iomem * addr)
-{
-       u64 ret = readl(addr + 4);
-       ret <<= 32;
-       ret |= readl(addr);
-
-       return ret;
-}
-#endif
-
-#ifndef writeq
-static inline void __raw_writeq(u64 val, void __iomem * addr)
-{
-       __raw_writel((u32) (val), addr);
-       __raw_writel((u32) (val >> 32), (addr + 4));
-}
-#endif
-
-#define C2_SET_CUR_RX(c2dev, cur_rx) \
-       __raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
-
-#define C2_GET_CUR_RX(c2dev) \
-       be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
-
-static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
-{
-       return container_of(ibdev, struct c2_dev, ibdev);
-}
-
-static inline int c2_errno(void *reply)
-{
-       switch (c2_wr_get_result(reply)) {
-       case C2_OK:
-               return 0;
-       case CCERR_NO_BUFS:
-       case CCERR_INSUFFICIENT_RESOURCES:
-       case CCERR_ZERO_RDMA_READ_RESOURCES:
-               return -ENOMEM;
-       case CCERR_MR_IN_USE:
-       case CCERR_QP_IN_USE:
-               return -EBUSY;
-       case CCERR_ADDR_IN_USE:
-               return -EADDRINUSE;
-       case CCERR_ADDR_NOT_AVAIL:
-               return -EADDRNOTAVAIL;
-       case CCERR_CONN_RESET:
-               return -ECONNRESET;
-       case CCERR_NOT_IMPLEMENTED:
-       case CCERR_INVALID_WQE:
-               return -ENOSYS;
-       case CCERR_QP_NOT_PRIVILEGED:
-               return -EPERM;
-       case CCERR_STACK_ERROR:
-               return -EPROTO;
-       case CCERR_ACCESS_VIOLATION:
-       case CCERR_BASE_AND_BOUNDS_VIOLATION:
-               return -EFAULT;
-       case CCERR_STAG_STATE_NOT_INVALID:
-       case CCERR_INVALID_ADDRESS:
-       case CCERR_INVALID_CQ:
-       case CCERR_INVALID_EP:
-       case CCERR_INVALID_MODIFIER:
-       case CCERR_INVALID_MTU:
-       case CCERR_INVALID_PD_ID:
-       case CCERR_INVALID_QP:
-       case CCERR_INVALID_RNIC:
-       case CCERR_INVALID_STAG:
-               return -EINVAL;
-       default:
-               return -EAGAIN;
-       }
-}
-
-/* Device */
-int c2_register_device(struct c2_dev *c2dev);
-void c2_unregister_device(struct c2_dev *c2dev);
-int c2_rnic_init(struct c2_dev *c2dev);
-void c2_rnic_term(struct c2_dev *c2dev);
-void c2_rnic_interrupt(struct c2_dev *c2dev);
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-
-/* QPs */
-int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
-                      struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-                       struct ib_qp_attr *attr, int attr_mask);
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-                                int ord, int ird);
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-                       struct ib_send_wr **bad_wr);
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-                          struct ib_recv_wr **bad_wr);
-void c2_init_qp_table(struct c2_dev *c2dev);
-void c2_cleanup_qp_table(struct c2_dev *c2dev);
-void c2_set_qp_state(struct c2_qp *, int);
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
-
-/* PDs */
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
-int c2_init_pd_table(struct c2_dev *c2dev);
-void c2_cleanup_pd_table(struct c2_dev *c2dev);
-
-/* CQs */
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-                     struct c2_ucontext *ctx, struct c2_cq *cq);
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-
-/* CM */
-int c2_llp_connect(struct iw_cm_id *cm_id,
-                         struct iw_cm_conn_param *iw_param);
-int c2_llp_accept(struct iw_cm_id *cm_id,
-                        struct iw_cm_conn_param *iw_param);
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
-                        u8 pdata_len);
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
-int c2_llp_service_destroy(struct iw_cm_id *cm_id);
-
-/* MM */
-int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
-                                     int page_size, int pbl_depth, u32 length,
-                                     u32 off, u64 *va, enum c2_acf acf,
-                                     struct c2_mr *mr);
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
-
-/* AE */
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
-
-/* MQSP Allocator */
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-                            struct sp_chunk **root);
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-                            dma_addr_t *dma_addr, gfp_t gfp_mask);
-void c2_free_mqsp(__be16* mqsp);
-#endif
diff --git a/drivers/staging/rdma/amso1100/c2_ae.c b/drivers/staging/rdma/amso1100/c2_ae.c
deleted file mode 100644 (file)
index eb7a92b..0000000
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_status.h"
-#include "c2_ae.h"
-
-static int c2_convert_cm_status(u32 c2_status)
-{
-       switch (c2_status) {
-       case C2_CONN_STATUS_SUCCESS:
-               return 0;
-       case C2_CONN_STATUS_REJECTED:
-               return -ENETRESET;
-       case C2_CONN_STATUS_REFUSED:
-               return -ECONNREFUSED;
-       case C2_CONN_STATUS_TIMEDOUT:
-               return -ETIMEDOUT;
-       case C2_CONN_STATUS_NETUNREACH:
-               return -ENETUNREACH;
-       case C2_CONN_STATUS_HOSTUNREACH:
-               return -EHOSTUNREACH;
-       case C2_CONN_STATUS_INVALID_RNIC:
-               return -EINVAL;
-       case C2_CONN_STATUS_INVALID_QP:
-               return -EINVAL;
-       case C2_CONN_STATUS_INVALID_QP_STATE:
-               return -EINVAL;
-       case C2_CONN_STATUS_ADDR_NOT_AVAIL:
-               return -EADDRNOTAVAIL;
-       default:
-               printk(KERN_ERR PFX
-                      "%s - Unable to convert CM status: %d\n",
-                      __func__, c2_status);
-               return -EIO;
-       }
-}
-
-static const char* to_event_str(int event)
-{
-       static const char* event_str[] = {
-               "CCAE_REMOTE_SHUTDOWN",
-               "CCAE_ACTIVE_CONNECT_RESULTS",
-               "CCAE_CONNECTION_REQUEST",
-               "CCAE_LLP_CLOSE_COMPLETE",
-               "CCAE_TERMINATE_MESSAGE_RECEIVED",
-               "CCAE_LLP_CONNECTION_RESET",
-               "CCAE_LLP_CONNECTION_LOST",
-               "CCAE_LLP_SEGMENT_SIZE_INVALID",
-               "CCAE_LLP_INVALID_CRC",
-               "CCAE_LLP_BAD_FPDU",
-               "CCAE_INVALID_DDP_VERSION",
-               "CCAE_INVALID_RDMA_VERSION",
-               "CCAE_UNEXPECTED_OPCODE",
-               "CCAE_INVALID_DDP_QUEUE_NUMBER",
-               "CCAE_RDMA_READ_NOT_ENABLED",
-               "CCAE_RDMA_WRITE_NOT_ENABLED",
-               "CCAE_RDMA_READ_TOO_SMALL",
-               "CCAE_NO_L_BIT",
-               "CCAE_TAGGED_INVALID_STAG",
-               "CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
-               "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
-               "CCAE_TAGGED_INVALID_PD",
-               "CCAE_WRAP_ERROR",
-               "CCAE_BAD_CLOSE",
-               "CCAE_BAD_LLP_CLOSE",
-               "CCAE_INVALID_MSN_RANGE",
-               "CCAE_INVALID_MSN_GAP",
-               "CCAE_IRRQ_OVERFLOW",
-               "CCAE_IRRQ_MSN_GAP",
-               "CCAE_IRRQ_MSN_RANGE",
-               "CCAE_IRRQ_INVALID_STAG",
-               "CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
-               "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
-               "CCAE_IRRQ_INVALID_PD",
-               "CCAE_IRRQ_WRAP_ERROR",
-               "CCAE_CQ_SQ_COMPLETION_OVERFLOW",
-               "CCAE_CQ_RQ_COMPLETION_ERROR",
-               "CCAE_QP_SRQ_WQE_ERROR",
-               "CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
-               "CCAE_CQ_OVERFLOW",
-               "CCAE_CQ_OPERATION_ERROR",
-               "CCAE_SRQ_LIMIT_REACHED",
-               "CCAE_QP_RQ_LIMIT_REACHED",
-               "CCAE_SRQ_CATASTROPHIC_ERROR",
-               "CCAE_RNIC_CATASTROPHIC_ERROR"
-       };
-
-       if (event < CCAE_REMOTE_SHUTDOWN ||
-           event > CCAE_RNIC_CATASTROPHIC_ERROR)
-               return "<invalid event>";
-
-       event -= CCAE_REMOTE_SHUTDOWN;
-       return event_str[event];
-}
-
-static const char *to_qp_state_str(int state)
-{
-       switch (state) {
-       case C2_QP_STATE_IDLE:
-               return "C2_QP_STATE_IDLE";
-       case C2_QP_STATE_CONNECTING:
-               return "C2_QP_STATE_CONNECTING";
-       case C2_QP_STATE_RTS:
-               return "C2_QP_STATE_RTS";
-       case C2_QP_STATE_CLOSING:
-               return "C2_QP_STATE_CLOSING";
-       case C2_QP_STATE_TERMINATE:
-               return "C2_QP_STATE_TERMINATE";
-       case C2_QP_STATE_ERROR:
-               return "C2_QP_STATE_ERROR";
-       default:
-               return "<invalid QP state>";
-       }
-}
-
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
-{
-       struct c2_mq *mq = c2dev->qptr_array[mq_index];
-       union c2wr *wr;
-       void *resource_user_context;
-       struct iw_cm_event cm_event;
-       struct ib_event ib_event;
-       enum c2_resource_indicator resource_indicator;
-       enum c2_event_id event_id;
-       unsigned long flags;
-       int status;
-       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
-       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
-
-       /*
-        * retrieve the message
-        */
-       wr = c2_mq_consume(mq);
-       if (!wr)
-               return;
-
-       memset(&ib_event, 0, sizeof(ib_event));
-       memset(&cm_event, 0, sizeof(cm_event));
-
-       event_id = c2_wr_get_id(wr);
-       resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
-       resource_user_context =
-           (void *) (unsigned long) wr->ae.ae_generic.user_context;
-
-       status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
-
-       pr_debug("event received c2_dev=%p, event_id=%d, "
-               "resource_indicator=%d, user_context=%p, status = %d\n",
-               c2dev, event_id, resource_indicator, resource_user_context,
-               status);
-
-       switch (resource_indicator) {
-       case C2_RES_IND_QP:{
-
-               struct c2_qp *qp = resource_user_context;
-               struct iw_cm_id *cm_id = qp->cm_id;
-               struct c2wr_ae_active_connect_results *res;
-
-               if (!cm_id) {
-                       pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
-                               qp);
-                       goto ignore_it;
-               }
-               pr_debug("%s: event = %s, user_context=%llx, "
-                       "resource_type=%x, "
-                       "resource=%x, qp_state=%s\n",
-                       __func__,
-                       to_event_str(event_id),
-                       (unsigned long long) wr->ae.ae_generic.user_context,
-                       be32_to_cpu(wr->ae.ae_generic.resource_type),
-                       be32_to_cpu(wr->ae.ae_generic.resource),
-                       to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
-
-               c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
-
-               switch (event_id) {
-               case CCAE_ACTIVE_CONNECT_RESULTS:
-                       res = &wr->ae.ae_active_connect_results;
-                       cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-                       laddr->sin_addr.s_addr = res->laddr;
-                       raddr->sin_addr.s_addr = res->raddr;
-                       laddr->sin_port = res->lport;
-                       raddr->sin_port = res->rport;
-                       if (status == 0) {
-                               cm_event.private_data_len =
-                                       be32_to_cpu(res->private_data_length);
-                               cm_event.private_data = res->private_data;
-                       } else {
-                               spin_lock_irqsave(&qp->lock, flags);
-                               if (qp->cm_id) {
-                                       qp->cm_id->rem_ref(qp->cm_id);
-                                       qp->cm_id = NULL;
-                               }
-                               spin_unlock_irqrestore(&qp->lock, flags);
-                               cm_event.private_data_len = 0;
-                               cm_event.private_data = NULL;
-                       }
-                       if (cm_id->event_handler)
-                               cm_id->event_handler(cm_id, &cm_event);
-                       break;
-               case CCAE_TERMINATE_MESSAGE_RECEIVED:
-               case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
-                       ib_event.device = &c2dev->ibdev;
-                       ib_event.element.qp = &qp->ibqp;
-                       ib_event.event = IB_EVENT_QP_REQ_ERR;
-
-                       if (qp->ibqp.event_handler)
-                               qp->ibqp.event_handler(&ib_event,
-                                                      qp->ibqp.
-                                                      qp_context);
-                       break;
-               case CCAE_BAD_CLOSE:
-               case CCAE_LLP_CLOSE_COMPLETE:
-               case CCAE_LLP_CONNECTION_RESET:
-               case CCAE_LLP_CONNECTION_LOST:
-                       BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
-
-                       spin_lock_irqsave(&qp->lock, flags);
-                       if (qp->cm_id) {
-                               qp->cm_id->rem_ref(qp->cm_id);
-                               qp->cm_id = NULL;
-                       }
-                       spin_unlock_irqrestore(&qp->lock, flags);
-                       cm_event.event = IW_CM_EVENT_CLOSE;
-                       cm_event.status = 0;
-                       if (cm_id->event_handler)
-                               cm_id->event_handler(cm_id, &cm_event);
-                       break;
-               default:
-                       BUG_ON(1);
-                       pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
-                               "CM_ID=%p\n",
-                               __func__, __LINE__,
-                               event_id, qp, cm_id);
-                       break;
-               }
-               break;
-       }
-
-       case C2_RES_IND_EP:{
-
-               struct c2wr_ae_connection_request *req =
-                       &wr->ae.ae_connection_request;
-               struct iw_cm_id *cm_id =
-                       resource_user_context;
-
-               pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
-               if (event_id != CCAE_CONNECTION_REQUEST) {
-                       pr_debug("%s: Invalid event_id: %d\n",
-                               __func__, event_id);
-                       break;
-               }
-               cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
-               cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
-               laddr->sin_addr.s_addr = req->laddr;
-               raddr->sin_addr.s_addr = req->raddr;
-               laddr->sin_port = req->lport;
-               raddr->sin_port = req->rport;
-               cm_event.private_data_len =
-                       be32_to_cpu(req->private_data_length);
-               cm_event.private_data = req->private_data;
-               /*
-                * Until ird/ord negotiation via MPAv2 support is added, send
-                * max supported values
-                */
-               cm_event.ird = cm_event.ord = 128;
-
-               if (cm_id->event_handler)
-                       cm_id->event_handler(cm_id, &cm_event);
-               break;
-       }
-
-       case C2_RES_IND_CQ:{
-               struct c2_cq *cq =
-                   resource_user_context;
-
-               pr_debug("IB_EVENT_CQ_ERR\n");
-               ib_event.device = &c2dev->ibdev;
-               ib_event.element.cq = &cq->ibcq;
-               ib_event.event = IB_EVENT_CQ_ERR;
-
-               if (cq->ibcq.event_handler)
-                       cq->ibcq.event_handler(&ib_event,
-                                              cq->ibcq.cq_context);
-               break;
-       }
-
-       default:
-               printk("Bad resource indicator = %d\n",
-                      resource_indicator);
-               break;
-       }
-
- ignore_it:
-       c2_mq_free(mq);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_ae.h b/drivers/staging/rdma/amso1100/c2_ae.h
deleted file mode 100644 (file)
index 3a065c3..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_AE_H_
-#define _C2_AE_H_
-
-/*
- * WARNING: If you change this file, also bump C2_IVN_BASE
- * in common/include/clustercore/c2_ivn.h.
- */
-
-/*
- * Asynchronous Event Identifiers
- *
- * These start at 0x80 only so it's obvious from inspection that
- * they are not work-request statuses.  This isn't critical.
- *
- * NOTE: these event id's must fit in eight bits.
- */
-enum c2_event_id {
-       CCAE_REMOTE_SHUTDOWN = 0x80,
-       CCAE_ACTIVE_CONNECT_RESULTS,
-       CCAE_CONNECTION_REQUEST,
-       CCAE_LLP_CLOSE_COMPLETE,
-       CCAE_TERMINATE_MESSAGE_RECEIVED,
-       CCAE_LLP_CONNECTION_RESET,
-       CCAE_LLP_CONNECTION_LOST,
-       CCAE_LLP_SEGMENT_SIZE_INVALID,
-       CCAE_LLP_INVALID_CRC,
-       CCAE_LLP_BAD_FPDU,
-       CCAE_INVALID_DDP_VERSION,
-       CCAE_INVALID_RDMA_VERSION,
-       CCAE_UNEXPECTED_OPCODE,
-       CCAE_INVALID_DDP_QUEUE_NUMBER,
-       CCAE_RDMA_READ_NOT_ENABLED,
-       CCAE_RDMA_WRITE_NOT_ENABLED,
-       CCAE_RDMA_READ_TOO_SMALL,
-       CCAE_NO_L_BIT,
-       CCAE_TAGGED_INVALID_STAG,
-       CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
-       CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
-       CCAE_TAGGED_INVALID_PD,
-       CCAE_WRAP_ERROR,
-       CCAE_BAD_CLOSE,
-       CCAE_BAD_LLP_CLOSE,
-       CCAE_INVALID_MSN_RANGE,
-       CCAE_INVALID_MSN_GAP,
-       CCAE_IRRQ_OVERFLOW,
-       CCAE_IRRQ_MSN_GAP,
-       CCAE_IRRQ_MSN_RANGE,
-       CCAE_IRRQ_INVALID_STAG,
-       CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
-       CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
-       CCAE_IRRQ_INVALID_PD,
-       CCAE_IRRQ_WRAP_ERROR,
-       CCAE_CQ_SQ_COMPLETION_OVERFLOW,
-       CCAE_CQ_RQ_COMPLETION_ERROR,
-       CCAE_QP_SRQ_WQE_ERROR,
-       CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
-       CCAE_CQ_OVERFLOW,
-       CCAE_CQ_OPERATION_ERROR,
-       CCAE_SRQ_LIMIT_REACHED,
-       CCAE_QP_RQ_LIMIT_REACHED,
-       CCAE_SRQ_CATASTROPHIC_ERROR,
-       CCAE_RNIC_CATASTROPHIC_ERROR
-/* WARNING If you add more id's, make sure their values fit in eight bits. */
-};
-
-/*
- * Resource Indicators and Identifiers
- */
-enum c2_resource_indicator {
-       C2_RES_IND_QP = 1,
-       C2_RES_IND_EP,
-       C2_RES_IND_CQ,
-       C2_RES_IND_SRQ,
-};
-
-#endif /* _C2_AE_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_alloc.c b/drivers/staging/rdma/amso1100/c2_alloc.c
deleted file mode 100644 (file)
index 039872d..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/bitmap.h>
-
-#include "c2.h"
-
-static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
-                              struct sp_chunk **head)
-{
-       int i;
-       struct sp_chunk *new_head;
-       dma_addr_t dma_addr;
-
-       new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
-                                     &dma_addr, gfp_mask);
-       if (new_head == NULL)
-               return -ENOMEM;
-
-       new_head->dma_addr = dma_addr;
-       dma_unmap_addr_set(new_head, mapping, new_head->dma_addr);
-
-       new_head->next = NULL;
-       new_head->head = 0;
-
-       /* build list where each index is the next free slot */
-       for (i = 0;
-            i < (PAGE_SIZE - sizeof(struct sp_chunk) -
-                 sizeof(u16)) / sizeof(u16) - 1;
-            i++) {
-               new_head->shared_ptr[i] = i + 1;
-       }
-       /* terminate list */
-       new_head->shared_ptr[i] = 0xFFFF;
-
-       *head = new_head;
-       return 0;
-}
-
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-                     struct sp_chunk **root)
-{
-       return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
-}
-
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
-{
-       struct sp_chunk *next;
-
-       while (root) {
-               next = root->next;
-               dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
-                                 dma_unmap_addr(root, mapping));
-               root = next;
-       }
-}
-
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-                     dma_addr_t *dma_addr, gfp_t gfp_mask)
-{
-       u16 mqsp;
-
-       while (head) {
-               mqsp = head->head;
-               if (mqsp != 0xFFFF) {
-                       head->head = head->shared_ptr[mqsp];
-                       break;
-               } else if (head->next == NULL) {
-                       if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
-                           0) {
-                               head = head->next;
-                               mqsp = head->head;
-                               head->head = head->shared_ptr[mqsp];
-                               break;
-                       } else
-                               return NULL;
-               } else
-                       head = head->next;
-       }
-       if (head) {
-               *dma_addr = head->dma_addr +
-                           ((unsigned long) &(head->shared_ptr[mqsp]) -
-                            (unsigned long) head);
-               pr_debug("%s addr %p dma_addr %llx\n", __func__,
-                        &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
-               return (__force __be16 *) &(head->shared_ptr[mqsp]);
-       }
-       return NULL;
-}
-
-void c2_free_mqsp(__be16 *mqsp)
-{
-       struct sp_chunk *head;
-       u16 idx;
-
-       /* The chunk containing this ptr begins at the page boundary */
-       head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
-
-       /* Link head to new mqsp */
-       *mqsp = (__force __be16) head->head;
-
-       /* Compute the shared_ptr index */
-       idx = (offset_in_page(mqsp)) >> 1;
-       idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
-
-       /* Point this index at the head */
-       head->shared_ptr[idx] = head->head;
-
-       /* Point head at this index */
-       head->head = idx;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cm.c b/drivers/staging/rdma/amso1100/c2_cm.c
deleted file mode 100644 (file)
index f8dbdb9..0000000
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_vq.h"
-#include <rdma/iw_cm.h>
-
-int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       struct c2_dev *c2dev = to_c2dev(cm_id->device);
-       struct ib_qp *ibqp;
-       struct c2_qp *qp;
-       struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
-       struct c2_vq_req *vq_req;
-       int err;
-       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-
-       if (cm_id->remote_addr.ss_family != AF_INET)
-               return -ENOSYS;
-
-       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-       if (!ibqp)
-               return -EINVAL;
-       qp = to_c2qp(ibqp);
-
-       /* Associate QP <--> CM_ID */
-       cm_id->provider_data = qp;
-       cm_id->add_ref(cm_id);
-       qp->cm_id = cm_id;
-
-       /*
-        * only support the max private_data length
-        */
-       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-               err = -EINVAL;
-               goto bail0;
-       }
-       /*
-        * Set the rdma read limits
-        */
-       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-       if (err)
-               goto bail0;
-
-       /*
-        * Create and send a WR_QP_CONNECT...
-        */
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       c2_wr_set_id(wr, CCWR_QP_CONNECT);
-       wr->hdr.context = 0;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->qp_handle = qp->adapter_handle;
-
-       wr->remote_addr = raddr->sin_addr.s_addr;
-       wr->remote_port = raddr->sin_port;
-
-       /*
-        * Move any private data from the callers's buf into
-        * the WR.
-        */
-       if (iw_param->private_data) {
-               wr->private_data_length =
-                       cpu_to_be32(iw_param->private_data_len);
-               memcpy(&wr->private_data[0], iw_param->private_data,
-                      iw_param->private_data_len);
-       } else
-               wr->private_data_length = 0;
-
-       /*
-        * Send WR to adapter.  NOTE: There is no synch reply from
-        * the adapter.
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       vq_req_free(c2dev, vq_req);
-
- bail1:
-       kfree(wr);
- bail0:
-       if (err) {
-               /*
-                * If we fail, release reference on QP and
-                * disassociate QP from CM_ID
-                */
-               cm_id->provider_data = NULL;
-               qp->cm_id = NULL;
-               cm_id->rem_ref(cm_id);
-       }
-       return err;
-}
-
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-       struct c2_dev *c2dev;
-       struct c2wr_ep_listen_create_req wr;
-       struct c2wr_ep_listen_create_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-
-       if (cm_id->local_addr.ss_family != AF_INET)
-               return -ENOSYS;
-
-       c2dev = to_c2dev(cm_id->device);
-       if (c2dev == NULL)
-               return -EINVAL;
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
-       wr.hdr.context = (u64) (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.local_addr = laddr->sin_addr.s_addr;
-       wr.local_port = laddr->sin_port;
-       wr.backlog = cpu_to_be32(backlog);
-       wr.user_context = (u64) (unsigned long) cm_id;
-
-       /*
-        * Reference the request struct.  Dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply =
-           (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       if ((err = c2_errno(reply)) != 0)
-               goto bail1;
-
-       /*
-        * Keep the adapter handle. Used in subsequent destroy
-        */
-       cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
-
-       /*
-        * free vq stuff
-        */
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       return 0;
-
- bail1:
-       vq_repbuf_free(c2dev, reply);
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-
-int c2_llp_service_destroy(struct iw_cm_id *cm_id)
-{
-
-       struct c2_dev *c2dev;
-       struct c2wr_ep_listen_destroy_req wr;
-       struct c2wr_ep_listen_destroy_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       c2dev = to_c2dev(cm_id->device);
-       if (c2dev == NULL)
-               return -EINVAL;
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       vq_repbuf_free(c2dev, reply);
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       struct c2_dev *c2dev = to_c2dev(cm_id->device);
-       struct c2_qp *qp;
-       struct ib_qp *ibqp;
-       struct c2wr_cr_accept_req *wr;  /* variable length WR */
-       struct c2_vq_req *vq_req;
-       struct c2wr_cr_accept_rep *reply;       /* VQ Reply msg ptr. */
-       int err;
-
-       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-       if (!ibqp)
-               return -EINVAL;
-       qp = to_c2qp(ibqp);
-
-       /* Set the RDMA read limits */
-       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-       if (err)
-               goto bail0;
-
-       /* Allocate verbs request. */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-       vq_req->qp = qp;
-       vq_req->cm_id = cm_id;
-       vq_req->event = IW_CM_EVENT_ESTABLISHED;
-
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       /* Build the WR */
-       c2_wr_set_id(wr, CCWR_CR_ACCEPT);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
-       wr->qp_handle = qp->adapter_handle;
-
-       /* Replace the cr_handle with the QP after accept */
-       cm_id->provider_data = qp;
-       cm_id->add_ref(cm_id);
-       qp->cm_id = cm_id;
-
-       cm_id->provider_data = qp;
-
-       /* Validate private_data length */
-       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-               err = -EINVAL;
-               goto bail1;
-       }
-
-       if (iw_param->private_data) {
-               wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
-               memcpy(&wr->private_data[0],
-                      iw_param->private_data, iw_param->private_data_len);
-       } else
-               wr->private_data_length = 0;
-
-       /* Reference the request struct.  Dereferenced in the int handler. */
-       vq_req_get(c2dev, vq_req);
-
-       /* Send WR to adapter */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       /* Wait for reply from adapter */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       /* Check that reply is present */
-       reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-       if (!err)
-               c2_set_qp_state(qp, C2_QP_STATE_RTS);
- bail1:
-       kfree(wr);
-       vq_req_free(c2dev, vq_req);
- bail0:
-       if (err) {
-               /*
-                * If we fail, release reference on QP and
-                * disassociate QP from CM_ID
-                */
-               cm_id->provider_data = NULL;
-               qp->cm_id = NULL;
-               cm_id->rem_ref(cm_id);
-       }
-       return err;
-}
-
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-       struct c2_dev *c2dev;
-       struct c2wr_cr_reject_req wr;
-       struct c2_vq_req *vq_req;
-       struct c2wr_cr_reject_rep *reply;
-       int err;
-
-       c2dev = to_c2dev(cm_id->device);
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_CR_REJECT);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_cr_reject_rep *) (unsigned long)
-               vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-       err = c2_errno(reply);
-       /*
-        * free vq stuff
-        */
-       vq_repbuf_free(c2dev, reply);
-
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
deleted file mode 100644 (file)
index 3ef881f..0000000
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
-
-static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
-{
-       struct c2_cq *cq;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2dev->lock, flags);
-       cq = c2dev->qptr_array[cqn];
-       if (!cq) {
-               spin_unlock_irqrestore(&c2dev->lock, flags);
-               return NULL;
-       }
-       atomic_inc(&cq->refcount);
-       spin_unlock_irqrestore(&c2dev->lock, flags);
-       return cq;
-}
-
-static void c2_cq_put(struct c2_cq *cq)
-{
-       if (atomic_dec_and_test(&cq->refcount))
-               wake_up(&cq->wait);
-}
-
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
-{
-       struct c2_cq *cq;
-
-       cq = c2_cq_get(c2dev, mq_index);
-       if (!cq) {
-               printk("discarding events on destroyed CQN=%d\n", mq_index);
-               return;
-       }
-
-       (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
-       c2_cq_put(cq);
-}
-
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
-{
-       struct c2_cq *cq;
-       struct c2_mq *q;
-
-       cq = c2_cq_get(c2dev, mq_index);
-       if (!cq)
-               return;
-
-       spin_lock_irq(&cq->lock);
-       q = &cq->mq;
-       if (q && !c2_mq_empty(q)) {
-               u16 priv = q->priv;
-               struct c2wr_ce *msg;
-
-               while (priv != be16_to_cpu(*q->shared)) {
-                       msg = (struct c2wr_ce *)
-                               (q->msg_pool.host + priv * q->msg_size);
-                       if (msg->qp_user_context == (u64) (unsigned long) qp) {
-                               msg->qp_user_context = (u64) 0;
-                       }
-                       priv = (priv + 1) % q->q_size;
-               }
-       }
-       spin_unlock_irq(&cq->lock);
-       c2_cq_put(cq);
-}
-
-static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
-{
-       switch (status) {
-       case C2_OK:
-               return IB_WC_SUCCESS;
-       case CCERR_FLUSHED:
-               return IB_WC_WR_FLUSH_ERR;
-       case CCERR_BASE_AND_BOUNDS_VIOLATION:
-               return IB_WC_LOC_PROT_ERR;
-       case CCERR_ACCESS_VIOLATION:
-               return IB_WC_LOC_ACCESS_ERR;
-       case CCERR_TOTAL_LENGTH_TOO_BIG:
-               return IB_WC_LOC_LEN_ERR;
-       case CCERR_INVALID_WINDOW:
-               return IB_WC_MW_BIND_ERR;
-       default:
-               return IB_WC_GENERAL_ERR;
-       }
-}
-
-
-static inline int c2_poll_one(struct c2_dev *c2dev,
-                             struct c2_cq *cq, struct ib_wc *entry)
-{
-       struct c2wr_ce *ce;
-       struct c2_qp *qp;
-       int is_recv = 0;
-
-       ce = c2_mq_consume(&cq->mq);
-       if (!ce) {
-               return -EAGAIN;
-       }
-
-       /*
-        * if the qp returned is null then this qp has already
-        * been freed and we are unable process the completion.
-        * try pulling the next message
-        */
-       while ((qp =
-               (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
-               c2_mq_free(&cq->mq);
-               ce = c2_mq_consume(&cq->mq);
-               if (!ce)
-                       return -EAGAIN;
-       }
-
-       entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
-       entry->wr_id = ce->hdr.context;
-       entry->qp = &qp->ibqp;
-       entry->wc_flags = 0;
-       entry->slid = 0;
-       entry->sl = 0;
-       entry->src_qp = 0;
-       entry->dlid_path_bits = 0;
-       entry->pkey_index = 0;
-
-       switch (c2_wr_get_id(ce)) {
-       case C2_WR_TYPE_SEND:
-               entry->opcode = IB_WC_SEND;
-               break;
-       case C2_WR_TYPE_RDMA_WRITE:
-               entry->opcode = IB_WC_RDMA_WRITE;
-               break;
-       case C2_WR_TYPE_RDMA_READ:
-               entry->opcode = IB_WC_RDMA_READ;
-               break;
-       case C2_WR_TYPE_BIND_MW:
-               entry->opcode = IB_WC_BIND_MW;
-               break;
-       case C2_WR_TYPE_RECV:
-               entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
-               entry->opcode = IB_WC_RECV;
-               is_recv = 1;
-               break;
-       default:
-               break;
-       }
-
-       /* consume the WQEs */
-       if (is_recv)
-               c2_mq_lconsume(&qp->rq_mq, 1);
-       else
-               c2_mq_lconsume(&qp->sq_mq,
-                              be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
-
-       /* free the message */
-       c2_mq_free(&cq->mq);
-
-       return 0;
-}
-
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-       struct c2_dev *c2dev = to_c2dev(ibcq->device);
-       struct c2_cq *cq = to_c2cq(ibcq);
-       unsigned long flags;
-       int npolled, err;
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       for (npolled = 0; npolled < num_entries; ++npolled) {
-
-               err = c2_poll_one(c2dev, cq, entry + npolled);
-               if (err)
-                       break;
-       }
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       return npolled;
-}
-
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-       struct c2_mq_shared __iomem *shared;
-       struct c2_cq *cq;
-       unsigned long flags;
-       int ret = 0;
-
-       cq = to_c2cq(ibcq);
-       shared = cq->mq.peer;
-
-       if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
-               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
-       else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
-       else
-               return -EINVAL;
-
-       writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
-
-       /*
-        * Now read back shared->armed to make the PCI
-        * write synchronous.  This is necessary for
-        * correct cq notification semantics.
-        */
-       readb(&shared->armed);
-
-       if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-               spin_lock_irqsave(&cq->lock, flags);
-               ret = !c2_mq_empty(&cq->mq);
-               spin_unlock_irqrestore(&cq->lock, flags);
-       }
-
-       return ret;
-}
-
-static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
-{
-       dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
-                         mq->msg_pool.host, dma_unmap_addr(mq, mapping));
-}
-
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
-                          size_t q_size, size_t msg_size)
-{
-       u8 *pool_start;
-
-       if (q_size > SIZE_MAX / msg_size)
-               return -EINVAL;
-
-       pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
-                                       &mq->host_dma, GFP_KERNEL);
-       if (!pool_start)
-               return -ENOMEM;
-
-       c2_mq_rep_init(mq,
-                      0,               /* index (currently unknown) */
-                      q_size,
-                      msg_size,
-                      pool_start,
-                      NULL,    /* peer (currently unknown) */
-                      C2_MQ_HOST_TARGET);
-
-       dma_unmap_addr_set(mq, mapping, mq->host_dma);
-
-       return 0;
-}
-
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-              struct c2_ucontext *ctx, struct c2_cq *cq)
-{
-       struct c2wr_cq_create_req wr;
-       struct c2wr_cq_create_rep *reply;
-       unsigned long peer_pa;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       might_sleep();
-
-       cq->ibcq.cqe = entries - 1;
-       cq->is_kernel = !ctx;
-
-       /* Allocate a shared pointer */
-       cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                     &cq->mq.shared_dma, GFP_KERNEL);
-       if (!cq->mq.shared)
-               return -ENOMEM;
-
-       /* Allocate pages for the message pool */
-       err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
-       if (err)
-               goto bail0;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_CQ_CREATE);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.msg_size = cpu_to_be32(cq->mq.msg_size);
-       wr.depth = cpu_to_be32(cq->mq.q_size);
-       wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
-       wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
-       wr.user_context = (u64) (unsigned long) (cq);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail2;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail2;
-
-       reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-
-       if ((err = c2_errno(reply)) != 0)
-               goto bail3;
-
-       cq->adapter_handle = reply->cq_handle;
-       cq->mq.index = be32_to_cpu(reply->mq_index);
-
-       peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
-       cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
-       if (!cq->mq.peer) {
-               err = -ENOMEM;
-               goto bail3;
-       }
-
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       spin_lock_init(&cq->lock);
-       atomic_set(&cq->refcount, 1);
-       init_waitqueue_head(&cq->wait);
-
-       /*
-        * Use the MQ index allocated by the adapter to
-        * store the CQ in the qptr_array
-        */
-       cq->cqn = cq->mq.index;
-       c2dev->qptr_array[cq->cqn] = cq;
-
-       return 0;
-
-bail3:
-       vq_repbuf_free(c2dev, reply);
-bail2:
-       vq_req_free(c2dev, vq_req);
-bail1:
-       c2_free_cq_buf(c2dev, &cq->mq);
-bail0:
-       c2_free_mqsp(cq->mq.shared);
-
-       return err;
-}
-
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
-{
-       int err;
-       struct c2_vq_req *vq_req;
-       struct c2wr_cq_destroy_req wr;
-       struct c2wr_cq_destroy_rep *reply;
-
-       might_sleep();
-
-       /* Clear CQ from the qptr array */
-       spin_lock_irq(&c2dev->lock);
-       c2dev->qptr_array[cq->mq.index] = NULL;
-       atomic_dec(&cq->refcount);
-       spin_unlock_irq(&c2dev->lock);
-
-       wait_event(cq->wait, !atomic_read(&cq->refcount));
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               goto bail0;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.cq_handle = cq->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-       if (reply)
-               vq_repbuf_free(c2dev, reply);
-bail1:
-       vq_req_free(c2dev, vq_req);
-bail0:
-       if (cq->is_kernel) {
-               c2_free_cq_buf(c2dev, &cq->mq);
-       }
-
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_intr.c b/drivers/staging/rdma/amso1100/c2_intr.c
deleted file mode 100644 (file)
index 74b32a9..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_vq.h"
-
-static void handle_mq(struct c2_dev *c2dev, u32 index);
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
-
-/*
- * Handle RNIC interrupts
- */
-void c2_rnic_interrupt(struct c2_dev *c2dev)
-{
-       unsigned int mq_index;
-
-       while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
-               mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
-               if (mq_index & 0x80000000) {
-                       break;
-               }
-
-               c2dev->hints_read++;
-               handle_mq(c2dev, mq_index);
-       }
-
-}
-
-/*
- * Top level MQ handler
- */
-static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
-{
-       if (c2dev->qptr_array[mq_index] == NULL) {
-               pr_debug("handle_mq: stray activity for mq_index=%d\n",
-                        mq_index);
-               return;
-       }
-
-       switch (mq_index) {
-       case (0):
-               /*
-                * An index of 0 in the activity queue
-                * indicates the req vq now has messages
-                * available...
-                *
-                * Wake up any waiters waiting on req VQ
-                * message availability.
-                */
-               wake_up(&c2dev->req_vq_wo);
-               break;
-       case (1):
-               handle_vq(c2dev, mq_index);
-               break;
-       case (2):
-               /* We have to purge the VQ in case there are pending
-                * accept reply requests that would result in the
-                * generation of an ESTABLISHED event. If we don't
-                * generate these first, a CLOSE event could end up
-                * being delivered before the ESTABLISHED event.
-                */
-               handle_vq(c2dev, 1);
-
-               c2_ae_event(c2dev, mq_index);
-               break;
-       default:
-               /* There is no event synchronization between CQ events
-                * and AE or CM events. In fact, CQE could be
-                * delivered for all of the I/O up to and including the
-                * FLUSH for a peer disconenct prior to the ESTABLISHED
-                * event being delivered to the app. The reason for this
-                * is that CM events are delivered on a thread, while AE
-                * and CM events are delivered on interrupt context.
-                */
-               c2_cq_event(c2dev, mq_index);
-               break;
-       }
-
-       return;
-}
-
-/*
- * Handles verbs WR replies.
- */
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
-{
-       void *adapter_msg, *reply_msg;
-       struct c2wr_hdr *host_msg;
-       struct c2wr_hdr tmp;
-       struct c2_mq *reply_vq;
-       struct c2_vq_req *req;
-       struct iw_cm_event cm_event;
-       int err;
-
-       reply_vq = c2dev->qptr_array[mq_index];
-
-       /*
-        * get next msg from mq_index into adapter_msg.
-        * don't free it yet.
-        */
-       adapter_msg = c2_mq_consume(reply_vq);
-       if (adapter_msg == NULL) {
-               return;
-       }
-
-       host_msg = vq_repbuf_alloc(c2dev);
-
-       /*
-        * If we can't get a host buffer, then we'll still
-        * wakeup the waiter, we just won't give him the msg.
-        * It is assumed the waiter will deal with this...
-        */
-       if (!host_msg) {
-               pr_debug("handle_vq: no repbufs!\n");
-
-               /*
-                * just copy the WR header into a local variable.
-                * this allows us to still demux on the context
-                */
-               host_msg = &tmp;
-               memcpy(host_msg, adapter_msg, sizeof(tmp));
-               reply_msg = NULL;
-       } else {
-               memcpy(host_msg, adapter_msg, reply_vq->msg_size);
-               reply_msg = host_msg;
-       }
-
-       /*
-        * consume the msg from the MQ
-        */
-       c2_mq_free(reply_vq);
-
-       /*
-        * wakeup the waiter.
-        */
-       req = (struct c2_vq_req *) (unsigned long) host_msg->context;
-       if (req == NULL) {
-               /*
-                * We should never get here, as the adapter should
-                * never send us a reply that we're not expecting.
-                */
-               if (reply_msg != NULL)
-                       vq_repbuf_free(c2dev, host_msg);
-               pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
-               return;
-       }
-
-       if (reply_msg)
-               err = c2_errno(reply_msg);
-       else
-               err = -ENOMEM;
-
-       if (!err) switch (req->event) {
-       case IW_CM_EVENT_ESTABLISHED:
-               c2_set_qp_state(req->qp,
-                               C2_QP_STATE_RTS);
-               /*
-                * Until ird/ord negotiation via MPAv2 support is added, send
-                * max supported values
-                */
-               cm_event.ird = cm_event.ord = 128;
-       case IW_CM_EVENT_CLOSE:
-
-               /*
-                * Move the QP to RTS if this is
-                * the established event
-                */
-               cm_event.event = req->event;
-               cm_event.status = 0;
-               cm_event.local_addr = req->cm_id->local_addr;
-               cm_event.remote_addr = req->cm_id->remote_addr;
-               cm_event.private_data = NULL;
-               cm_event.private_data_len = 0;
-               req->cm_id->event_handler(req->cm_id, &cm_event);
-               break;
-       default:
-               break;
-       }
-
-       req->reply_msg = (u64) (unsigned long) (reply_msg);
-       atomic_set(&req->reply_ready, 1);
-       wake_up(&req->wait_object);
-
-       /*
-        * If the request was cancelled, then this put will
-        * free the vq_req memory...and reply_msg!!!
-        */
-       vq_req_put(c2dev, req);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mm.c b/drivers/staging/rdma/amso1100/c2_mm.c
deleted file mode 100644 (file)
index 25081e2..0000000
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-
-#define PBL_VIRT 1
-#define PBL_PHYS 2
-
-/*
- * Send all the PBL messages to convey the remainder of the PBL
- * Wait for the adapter's reply on the last one.
- * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
- *
- * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
- *       Reply buffer _is_ freed by this function.
- */
-static int
-send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
-                 unsigned long va, u32 pbl_depth,
-                 struct c2_vq_req *vq_req, int pbl_type)
-{
-       u32 pbe_count;          /* amt that fits in a PBL msg */
-       u32 count;              /* amt in this PBL MSG. */
-       struct c2wr_nsmr_pbl_req *wr;   /* PBL WR ptr */
-       struct c2wr_nsmr_pbl_rep *reply;        /* reply ptr */
-       int err, pbl_virt, pbl_index, i;
-
-       switch (pbl_type) {
-       case PBL_VIRT:
-               pbl_virt = 1;
-               break;
-       case PBL_PHYS:
-               pbl_virt = 0;
-               break;
-       default:
-               return -EINVAL;
-               break;
-       }
-
-       pbe_count = (c2dev->req_vq.msg_size -
-                    sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               return -ENOMEM;
-       }
-       c2_wr_set_id(wr, CCWR_NSMR_PBL);
-
-       /*
-        * Only the last PBL message will generate a reply from the verbs,
-        * so we set the context to 0 indicating there is no kernel verbs
-        * handler blocked awaiting this reply.
-        */
-       wr->hdr.context = 0;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->stag_index = stag_index;    /* already swapped */
-       wr->flags = 0;
-       pbl_index = 0;
-       while (pbl_depth) {
-               count = min(pbe_count, pbl_depth);
-               wr->addrs_length = cpu_to_be32(count);
-
-               /*
-                *  If this is the last message, then reference the
-                *  vq request struct cuz we're gonna wait for a reply.
-                *  also make this PBL msg as the last one.
-                */
-               if (count == pbl_depth) {
-                       /*
-                        * reference the request struct.  dereferenced in the
-                        * int handler.
-                        */
-                       vq_req_get(c2dev, vq_req);
-                       wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
-
-                       /*
-                        * This is the last PBL message.
-                        * Set the context to our VQ Request Object so we can
-                        * wait for the reply.
-                        */
-                       wr->hdr.context = (unsigned long) vq_req;
-               }
-
-               /*
-                * If pbl_virt is set then va is a virtual address
-                * that describes a virtually contiguous memory
-                * allocation. The wr needs the start of each virtual page
-                * to be converted to the corresponding physical address
-                * of the page. If pbl_virt is not set then va is an array
-                * of physical addresses and there is no conversion to do.
-                * Just fill in the wr with what is in the array.
-                */
-               for (i = 0; i < count; i++) {
-                       if (pbl_virt) {
-                               va += PAGE_SIZE;
-                       } else {
-                               wr->paddrs[i] =
-                                   cpu_to_be64(((u64 *)va)[pbl_index + i]);
-                       }
-               }
-
-               /*
-                * Send WR to adapter
-                */
-               err = vq_send_wr(c2dev, (union c2wr *) wr);
-               if (err) {
-                       if (count <= pbe_count) {
-                               vq_req_put(c2dev, vq_req);
-                       }
-                       goto bail0;
-               }
-               pbl_depth -= count;
-               pbl_index += count;
-       }
-
-       /*
-        *  Now wait for the reply...
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       kfree(wr);
-       return err;
-}
-
-#define C2_PBL_MAX_DEPTH 131072
-int
-c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
-                          int page_size, int pbl_depth, u32 length,
-                          u32 offset, u64 *va, enum c2_acf acf,
-                          struct c2_mr *mr)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_nsmr_register_req *wr;
-       struct c2wr_nsmr_register_rep *reply;
-       u16 flags;
-       int i, pbe_count, count;
-       int err;
-
-       if (!va || !length || !addr_list || !pbl_depth)
-               return -EINTR;
-
-       /*
-        * Verify PBL depth is within rnic max
-        */
-       if (pbl_depth > C2_PBL_MAX_DEPTH) {
-               return -EINTR;
-       }
-
-       /*
-        * allocate verbs request object
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       /*
-        * build the WR
-        */
-       c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-
-       flags = (acf | MEM_VA_BASED | MEM_REMOTE);
-
-       /*
-        * compute how many pbes can fit in the message
-        */
-       pbe_count = (c2dev->req_vq.msg_size -
-                    sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
-
-       if (pbl_depth <= pbe_count) {
-               flags |= MEM_PBL_COMPLETE;
-       }
-       wr->flags = cpu_to_be16(flags);
-       wr->stag_key = 0;       //stag_key;
-       wr->va = cpu_to_be64(*va);
-       wr->pd_id = mr->pd->pd_id;
-       wr->pbe_size = cpu_to_be32(page_size);
-       wr->length = cpu_to_be32(length);
-       wr->pbl_depth = cpu_to_be32(pbl_depth);
-       wr->fbo = cpu_to_be32(offset);
-       count = min(pbl_depth, pbe_count);
-       wr->addrs_length = cpu_to_be32(count);
-
-       /*
-        * fill out the PBL for this message
-        */
-       for (i = 0; i < count; i++) {
-               wr->paddrs[i] = cpu_to_be64(addr_list[i]);
-       }
-
-       /*
-        * regerence the request struct
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * send the WR to the adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       /*
-        * wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail1;
-       }
-
-       /*
-        * process reply
-        */
-       reply =
-           (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-       if ((err = c2_errno(reply))) {
-               goto bail2;
-       }
-       //*p_pb_entries = be32_to_cpu(reply->pbl_depth);
-       mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
-       vq_repbuf_free(c2dev, reply);
-
-       /*
-        * if there are still more PBEs we need to send them to
-        * the adapter and wait for a reply on the final one.
-        * reuse vq_req for this purpose.
-        */
-       pbl_depth -= count;
-       if (pbl_depth) {
-
-               vq_req->reply_msg = (unsigned long) NULL;
-               atomic_set(&vq_req->reply_ready, 0);
-               err = send_pbl_messages(c2dev,
-                                       cpu_to_be32(mr->ibmr.lkey),
-                                       (unsigned long) &addr_list[i],
-                                       pbl_depth, vq_req, PBL_PHYS);
-               if (err) {
-                       goto bail1;
-               }
-       }
-
-       vq_req_free(c2dev, vq_req);
-       kfree(wr);
-
-       return err;
-
-bail2:
-       vq_repbuf_free(c2dev, reply);
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
-{
-       struct c2_vq_req *vq_req;       /* verbs request object */
-       struct c2wr_stag_dealloc_req wr;        /* work request */
-       struct c2wr_stag_dealloc_rep *reply;    /* WR reply  */
-       int err;
-
-
-       /*
-        * allocate verbs request object
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               return -ENOMEM;
-       }
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
-       wr.hdr.context = (u64) (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.stag_index = cpu_to_be32(stag_index);
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.c b/drivers/staging/rdma/amso1100/c2_mq.c
deleted file mode 100644 (file)
index 7827fb8..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include "c2_mq.h"
-
-void *c2_mq_alloc(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       if (c2_mq_full(q)) {
-               return NULL;
-       } else {
-#ifdef DEBUG
-               struct c2wr_hdr *m =
-                   (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-               BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
-               m->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-               return m;
-#else
-               return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-       }
-}
-
-void c2_mq_produce(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       if (!c2_mq_full(q)) {
-               q->priv = (q->priv + 1) % q->q_size;
-               q->hint_count++;
-               /* Update peer's offset. */
-               __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-       }
-}
-
-void *c2_mq_consume(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-       if (c2_mq_empty(q)) {
-               return NULL;
-       } else {
-#ifdef DEBUG
-               struct c2wr_hdr *m = (struct c2wr_hdr *)
-                   (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-               BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
-#endif
-               return m;
-#else
-               return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-       }
-}
-
-void c2_mq_free(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-       if (!c2_mq_empty(q)) {
-
-#ifdef CCMSGMAGIC
-               {
-                       struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
-                           (q->msg_pool.adapter + q->priv * q->msg_size);
-                       __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
-               }
-#endif
-               q->priv = (q->priv + 1) % q->q_size;
-               /* Update peer's offset. */
-               __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-       }
-}
-
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       while (wqe_count--) {
-               BUG_ON(c2_mq_empty(q));
-               *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
-       }
-}
-
-#if 0
-u32 c2_mq_count(struct c2_mq *q)
-{
-       s32 count;
-
-       if (q->type == C2_MQ_HOST_TARGET)
-               count = be16_to_cpu(*q->shared) - q->priv;
-       else
-               count = q->priv - be16_to_cpu(*q->shared);
-
-       if (count < 0)
-               count += q->q_size;
-
-       return (u32) count;
-}
-#endif  /*  0  */
-
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                   u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
-{
-       BUG_ON(!q->shared);
-
-       /* This code assumes the byte swapping has already been done! */
-       q->index = index;
-       q->q_size = q_size;
-       q->msg_size = msg_size;
-       q->msg_pool.adapter = pool_start;
-       q->peer = (struct c2_mq_shared __iomem *) peer;
-       q->magic = C2_MQ_MAGIC;
-       q->type = type;
-       q->priv = 0;
-       q->hint_count = 0;
-       return;
-}
-
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                   u8 *pool_start, u16 __iomem *peer, u32 type)
-{
-       BUG_ON(!q->shared);
-
-       /* This code assumes the byte swapping has already been done! */
-       q->index = index;
-       q->q_size = q_size;
-       q->msg_size = msg_size;
-       q->msg_pool.host = pool_start;
-       q->peer = (struct c2_mq_shared __iomem *) peer;
-       q->magic = C2_MQ_MAGIC;
-       q->type = type;
-       q->priv = 0;
-       q->hint_count = 0;
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.h b/drivers/staging/rdma/amso1100/c2_mq.h
deleted file mode 100644 (file)
index 8e1b4d1..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _C2_MQ_H_
-#define _C2_MQ_H_
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include "c2_wr.h"
-
-enum c2_shared_regs {
-
-       C2_SHARED_ARMED = 0x10,
-       C2_SHARED_NOTIFY = 0x18,
-       C2_SHARED_SHARED = 0x40,
-};
-
-struct c2_mq_shared {
-       u16 unused1;
-       u8 armed;
-       u8 notification_type;
-       u32 unused2;
-       u16 shared;
-       /* Pad to 64 bytes. */
-       u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
-};
-
-enum c2_mq_type {
-       C2_MQ_HOST_TARGET = 1,
-       C2_MQ_ADAPTER_TARGET = 2,
-};
-
-/*
- * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
- * c2_user_mq_t (which is the same format) is for user-mode MQs...
- */
-#define C2_MQ_MAGIC 0x4d512020 /* 'MQ  ' */
-struct c2_mq {
-       u32 magic;
-       union {
-               u8 *host;
-               u8 __iomem *adapter;
-       } msg_pool;
-       dma_addr_t host_dma;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-       u16 hint_count;
-       u16 priv;
-       struct c2_mq_shared __iomem *peer;
-       __be16 *shared;
-       dma_addr_t shared_dma;
-       u32 q_size;
-       u32 msg_size;
-       u32 index;
-       enum c2_mq_type type;
-};
-
-static __inline__ int c2_mq_empty(struct c2_mq *q)
-{
-       return q->priv == be16_to_cpu(*q->shared);
-}
-
-static __inline__ int c2_mq_full(struct c2_mq *q)
-{
-       return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
-}
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
-void *c2_mq_alloc(struct c2_mq *q);
-void c2_mq_produce(struct c2_mq *q);
-void *c2_mq_consume(struct c2_mq *q);
-void c2_mq_free(struct c2_mq *q);
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                      u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                          u8 *pool_start, u16 __iomem *peer, u32 type);
-
-#endif                         /* _C2_MQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_pd.c b/drivers/staging/rdma/amso1100/c2_pd.c
deleted file mode 100644 (file)
index f3e81dc..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-
-#include "c2.h"
-#include "c2_provider.h"
-
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
-{
-       u32 obj;
-       int ret = 0;
-
-       spin_lock(&c2dev->pd_table.lock);
-       obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
-                                c2dev->pd_table.last);
-       if (obj >= c2dev->pd_table.max)
-               obj = find_first_zero_bit(c2dev->pd_table.table,
-                                         c2dev->pd_table.max);
-       if (obj < c2dev->pd_table.max) {
-               pd->pd_id = obj;
-               __set_bit(obj, c2dev->pd_table.table);
-               c2dev->pd_table.last = obj+1;
-               if (c2dev->pd_table.last >= c2dev->pd_table.max)
-                       c2dev->pd_table.last = 0;
-       } else
-               ret = -ENOMEM;
-       spin_unlock(&c2dev->pd_table.lock);
-       return ret;
-}
-
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
-{
-       spin_lock(&c2dev->pd_table.lock);
-       __clear_bit(pd->pd_id, c2dev->pd_table.table);
-       spin_unlock(&c2dev->pd_table.lock);
-}
-
-int c2_init_pd_table(struct c2_dev *c2dev)
-{
-
-       c2dev->pd_table.last = 0;
-       c2dev->pd_table.max = c2dev->props.max_pd;
-       spin_lock_init(&c2dev->pd_table.lock);
-       c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
-                                       sizeof(long), GFP_KERNEL);
-       if (!c2dev->pd_table.table)
-               return -ENOMEM;
-       bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
-       return 0;
-}
-
-void c2_cleanup_pd_table(struct c2_dev *c2dev)
-{
-       kfree(c2dev->pd_table.table);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
deleted file mode 100644 (file)
index a092ac7..0000000
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/if_arp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include "c2.h"
-#include "c2_provider.h"
-#include "c2_user.h"
-
-static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                          struct ib_udata *uhw)
-{
-       struct c2_dev *c2dev = to_c2dev(ibdev);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       *props = c2dev->props;
-       return 0;
-}
-
-static int c2_query_port(struct ib_device *ibdev,
-                        u8 port, struct ib_port_attr *props)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       props->max_mtu = IB_MTU_4096;
-       props->lid = 0;
-       props->lmc = 0;
-       props->sm_lid = 0;
-       props->sm_sl = 0;
-       props->state = IB_PORT_ACTIVE;
-       props->phys_state = 0;
-       props->port_cap_flags =
-           IB_PORT_CM_SUP |
-           IB_PORT_REINIT_SUP |
-           IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
-       props->gid_tbl_len = 1;
-       props->pkey_tbl_len = 1;
-       props->qkey_viol_cntr = 0;
-       props->active_width = 1;
-       props->active_speed = IB_SPEED_SDR;
-
-       return 0;
-}
-
-static int c2_query_pkey(struct ib_device *ibdev,
-                        u8 port, u16 index, u16 * pkey)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       *pkey = 0;
-       return 0;
-}
-
-static int c2_query_gid(struct ib_device *ibdev, u8 port,
-                       int index, union ib_gid *gid)
-{
-       struct c2_dev *c2dev = to_c2dev(ibdev);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       memset(&(gid->raw[0]), 0, sizeof(gid->raw));
-       memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
-
-       return 0;
-}
-
-/* Allocate the user context data structure. This keeps track
- * of all objects associated with a particular user-mode client.
- */
-static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
-                                            struct ib_udata *udata)
-{
-       struct c2_ucontext *context;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       context = kmalloc(sizeof(*context), GFP_KERNEL);
-       if (!context)
-               return ERR_PTR(-ENOMEM);
-
-       return &context->ibucontext;
-}
-
-static int c2_dealloc_ucontext(struct ib_ucontext *context)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       kfree(context);
-       return 0;
-}
-
-static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
-                                struct ib_ucontext *context,
-                                struct ib_udata *udata)
-{
-       struct c2_pd *pd;
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-       if (!pd)
-               return ERR_PTR(-ENOMEM);
-
-       err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
-       if (err) {
-               kfree(pd);
-               return ERR_PTR(err);
-       }
-
-       if (context) {
-               if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
-                       c2_pd_free(to_c2dev(ibdev), pd);
-                       kfree(pd);
-                       return ERR_PTR(-EFAULT);
-               }
-       }
-
-       return &pd->ibpd;
-}
-
-static int c2_dealloc_pd(struct ib_pd *pd)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
-       kfree(pd);
-
-       return 0;
-}
-
-static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return ERR_PTR(-ENOSYS);
-}
-
-static int c2_ah_destroy(struct ib_ah *ah)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static void c2_add_ref(struct ib_qp *ibqp)
-{
-       struct c2_qp *qp;
-       BUG_ON(!ibqp);
-       qp = to_c2qp(ibqp);
-       atomic_inc(&qp->refcount);
-}
-
-static void c2_rem_ref(struct ib_qp *ibqp)
-{
-       struct c2_qp *qp;
-       BUG_ON(!ibqp);
-       qp = to_c2qp(ibqp);
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
-{
-       struct c2_dev* c2dev = to_c2dev(device);
-       struct c2_qp *qp;
-
-       qp = c2_find_qpn(c2dev, qpn);
-       pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
-               __func__, qp, qpn, device,
-               (qp?atomic_read(&qp->refcount):0));
-
-       return (qp?&qp->ibqp:NULL);
-}
-
-static struct ib_qp *c2_create_qp(struct ib_pd *pd,
-                                 struct ib_qp_init_attr *init_attr,
-                                 struct ib_udata *udata)
-{
-       struct c2_qp *qp;
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       if (init_attr->create_flags)
-               return ERR_PTR(-EINVAL);
-
-       switch (init_attr->qp_type) {
-       case IB_QPT_RC:
-               qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-               if (!qp) {
-                       pr_debug("%s: Unable to allocate QP\n", __func__);
-                       return ERR_PTR(-ENOMEM);
-               }
-               spin_lock_init(&qp->lock);
-               if (pd->uobject) {
-                       /* userspace specific */
-               }
-
-               err = c2_alloc_qp(to_c2dev(pd->device),
-                                 to_c2pd(pd), init_attr, qp);
-
-               if (err && pd->uobject) {
-                       /* userspace specific */
-               }
-
-               break;
-       default:
-               pr_debug("%s: Invalid QP type: %d\n", __func__,
-                       init_attr->qp_type);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (err) {
-               kfree(qp);
-               return ERR_PTR(err);
-       }
-
-       return &qp->ibqp;
-}
-
-static int c2_destroy_qp(struct ib_qp *ib_qp)
-{
-       struct c2_qp *qp = to_c2qp(ib_qp);
-
-       pr_debug("%s:%u qp=%p,qp->state=%d\n",
-               __func__, __LINE__, ib_qp, qp->state);
-       c2_free_qp(to_c2dev(ib_qp->device), qp);
-       kfree(qp);
-       return 0;
-}
-
-static struct ib_cq *c2_create_cq(struct ib_device *ibdev,
-                                 const struct ib_cq_init_attr *attr,
-                                 struct ib_ucontext *context,
-                                 struct ib_udata *udata)
-{
-       int entries = attr->cqe;
-       struct c2_cq *cq;
-       int err;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-       if (!cq) {
-               pr_debug("%s: Unable to allocate CQ\n", __func__);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
-       if (err) {
-               pr_debug("%s: error initializing CQ\n", __func__);
-               kfree(cq);
-               return ERR_PTR(err);
-       }
-
-       return &cq->ibcq;
-}
-
-static int c2_destroy_cq(struct ib_cq *ib_cq)
-{
-       struct c2_cq *cq = to_c2cq(ib_cq);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       c2_free_cq(to_c2dev(ib_cq->device), cq);
-       kfree(cq);
-
-       return 0;
-}
-
-static inline u32 c2_convert_access(int acc)
-{
-       return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
-           (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
-           (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
-           C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
-}
-
-static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
-                                   struct ib_phys_buf *buffer_list,
-                                   int num_phys_buf, int acc, u64 * iova_start)
-{
-       struct c2_mr *mr;
-       u64 *page_list;
-       u32 total_len;
-       int err, i, j, k, page_shift, pbl_depth;
-
-       pbl_depth = 0;
-       total_len = 0;
-
-       page_shift = PAGE_SHIFT;
-       /*
-        * If there is only 1 buffer we assume this could
-        * be a map of all phy mem...use a 32k page_shift.
-        */
-       if (num_phys_buf == 1)
-               page_shift += 3;
-
-       for (i = 0; i < num_phys_buf; i++) {
-
-               if (offset_in_page(buffer_list[i].addr)) {
-                       pr_debug("Unaligned Memory Buffer: 0x%x\n",
-                               (unsigned int) buffer_list[i].addr);
-                       return ERR_PTR(-EINVAL);
-               }
-
-               if (!buffer_list[i].size) {
-                       pr_debug("Invalid Buffer Size\n");
-                       return ERR_PTR(-EINVAL);
-               }
-
-               total_len += buffer_list[i].size;
-               pbl_depth += ALIGN(buffer_list[i].size,
-                                  BIT(page_shift)) >> page_shift;
-       }
-
-       page_list = vmalloc(sizeof(u64) * pbl_depth);
-       if (!page_list) {
-               pr_debug("couldn't vmalloc page_list of size %zd\n",
-                       (sizeof(u64) * pbl_depth));
-               return ERR_PTR(-ENOMEM);
-       }
-
-       for (i = 0, j = 0; i < num_phys_buf; i++) {
-
-               int naddrs;
-
-               naddrs = ALIGN(buffer_list[i].size,
-                              BIT(page_shift)) >> page_shift;
-               for (k = 0; k < naddrs; k++)
-                       page_list[j++] = (buffer_list[i].addr +
-                                                    (k << page_shift));
-       }
-
-       mr = kmalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr) {
-               vfree(page_list);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       mr->pd = to_c2pd(ib_pd);
-       mr->umem = NULL;
-       pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
-               "*iova_start %llx, first pa %llx, last pa %llx\n",
-               __func__, page_shift, pbl_depth, total_len,
-               (unsigned long long) *iova_start,
-               (unsigned long long) page_list[0],
-               (unsigned long long) page_list[pbl_depth-1]);
-       err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
-                                        BIT(page_shift), pbl_depth,
-                                        total_len, 0, iova_start,
-                                        c2_convert_access(acc), mr);
-       vfree(page_list);
-       if (err) {
-               kfree(mr);
-               return ERR_PTR(err);
-       }
-
-       return &mr->ibmr;
-}
-
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ib_phys_buf bl;
-       u64 kva = 0;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* AMSO1100 limit */
-       bl.size = 0xffffffff;
-       bl.addr = 0;
-       return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
-}
-
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                                   u64 virt, int acc, struct ib_udata *udata)
-{
-       u64 *pages;
-       u64 kva = 0;
-       int shift, n, len;
-       int i, k, entry;
-       int err = 0;
-       struct scatterlist *sg;
-       struct c2_pd *c2pd = to_c2pd(pd);
-       struct c2_mr *c2mr;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
-       if (!c2mr)
-               return ERR_PTR(-ENOMEM);
-       c2mr->pd = c2pd;
-
-       c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
-       if (IS_ERR(c2mr->umem)) {
-               err = PTR_ERR(c2mr->umem);
-               kfree(c2mr);
-               return ERR_PTR(err);
-       }
-
-       shift = ffs(c2mr->umem->page_size) - 1;
-       n = c2mr->umem->nmap;
-
-       pages = kmalloc_array(n, sizeof(u64), GFP_KERNEL);
-       if (!pages) {
-               err = -ENOMEM;
-               goto err;
-       }
-
-       i = 0;
-       for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
-               len = sg_dma_len(sg) >> shift;
-               for (k = 0; k < len; ++k) {
-                       pages[i++] =
-                               sg_dma_address(sg) +
-                               (c2mr->umem->page_size * k);
-               }
-       }
-
-       kva = virt;
-       err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
-                                        pages,
-                                        c2mr->umem->page_size,
-                                        i,
-                                        length,
-                                        ib_umem_offset(c2mr->umem),
-                                        &kva,
-                                        c2_convert_access(acc),
-                                        c2mr);
-       kfree(pages);
-       if (err)
-               goto err;
-       return &c2mr->ibmr;
-
-err:
-       ib_umem_release(c2mr->umem);
-       kfree(c2mr);
-       return ERR_PTR(err);
-}
-
-static int c2_dereg_mr(struct ib_mr *ib_mr)
-{
-       struct c2_mr *mr = to_c2mr(ib_mr);
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
-       if (err)
-               pr_debug("c2_stag_dealloc failed: %d\n", err);
-       else {
-               if (mr->umem)
-                       ib_umem_release(mr->umem);
-               kfree(mr);
-       }
-
-       return err;
-}
-
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%x\n", c2dev->props.hw_ver);
-}
-
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
-                          char *buf)
-{
-       struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%x.%x.%x\n",
-                      (int) (c2dev->props.fw_ver >> 32),
-                      (int) (c2dev->props.fw_ver >> 16) & 0xffff,
-                      (int) (c2dev->props.fw_ver & 0xffff));
-}
-
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "AMSO1100\n");
-}
-
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *c2_dev_attributes[] = {
-       &dev_attr_hw_rev,
-       &dev_attr_fw_ver,
-       &dev_attr_hca_type,
-       &dev_attr_board_id
-};
-
-static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                       int attr_mask, struct ib_udata *udata)
-{
-       int err;
-
-       err =
-           c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
-                        attr_mask);
-
-       return err;
-}
-
-static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_process_mad(struct ib_device *ibdev,
-                         int mad_flags,
-                         u8 port_num,
-                         const struct ib_wc *in_wc,
-                         const struct ib_grh *in_grh,
-                         const struct ib_mad_hdr *in_mad,
-                         size_t in_mad_size,
-                         struct ib_mad_hdr *out_mad,
-                         size_t *out_mad_size,
-                         u16 *out_mad_pkey_index)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* Request a connection */
-       return c2_llp_connect(cm_id, iw_param);
-}
-
-static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* Accept the new connection */
-       return c2_llp_accept(cm_id, iw_param);
-}
-
-static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       return c2_llp_reject(cm_id, pdata, pdata_len);
-}
-
-static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       err = c2_llp_service_create(cm_id, backlog);
-       pr_debug("%s:%u err=%d\n",
-               __func__, __LINE__,
-               err);
-       return err;
-}
-
-static int c2_service_destroy(struct iw_cm_id *cm_id)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       return c2_llp_service_destroy(cm_id);
-}
-
-static int c2_pseudo_up(struct net_device *netdev)
-{
-       struct in_device *ind;
-       struct c2_dev *c2dev = netdev->ml_priv;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       pr_debug("adding...\n");
-       for_ifa(ind) {
-#ifdef DEBUG
-               u8 *ip = (u8 *) & ifa->ifa_address;
-
-               pr_debug("%s: %d.%d.%d.%d\n",
-                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-               c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-
-       return 0;
-}
-
-static int c2_pseudo_down(struct net_device *netdev)
-{
-       struct in_device *ind;
-       struct c2_dev *c2dev = netdev->ml_priv;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       pr_debug("deleting...\n");
-       for_ifa(ind) {
-#ifdef DEBUG
-               u8 *ip = (u8 *) & ifa->ifa_address;
-
-               pr_debug("%s: %d.%d.%d.%d\n",
-                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-               c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-
-       return 0;
-}
-
-static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-       kfree_skb(skb);
-       return NETDEV_TX_OK;
-}
-
-static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-               return -EINVAL;
-
-       netdev->mtu = new_mtu;
-
-       /* TODO: Tell rnic about new rmda interface mtu */
-       return 0;
-}
-
-static const struct net_device_ops c2_pseudo_netdev_ops = {
-       .ndo_open               = c2_pseudo_up,
-       .ndo_stop               = c2_pseudo_down,
-       .ndo_start_xmit         = c2_pseudo_xmit_frame,
-       .ndo_change_mtu         = c2_pseudo_change_mtu,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static void setup(struct net_device *netdev)
-{
-       netdev->netdev_ops = &c2_pseudo_netdev_ops;
-
-       netdev->watchdog_timeo = 0;
-       netdev->type = ARPHRD_ETHER;
-       netdev->mtu = 1500;
-       netdev->hard_header_len = ETH_HLEN;
-       netdev->addr_len = ETH_ALEN;
-       netdev->tx_queue_len = 0;
-       netdev->flags |= IFF_NOARP;
-}
-
-static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
-{
-       char name[IFNAMSIZ];
-       struct net_device *netdev;
-
-       /* change ethxxx to iwxxx */
-       strcpy(name, "iw");
-       strcat(name, &c2dev->netdev->name[3]);
-       netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup);
-       if (!netdev) {
-               printk(KERN_ERR PFX "%s -  etherdev alloc failed",
-                       __func__);
-               return NULL;
-       }
-
-       netdev->ml_priv = c2dev;
-
-       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-       memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
-
-       /* Print out the MAC address */
-       pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
-
-#if 0
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-#endif
-       return netdev;
-}
-
-static int c2_port_immutable(struct ib_device *ibdev, u8 port_num,
-                            struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = c2_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
-       return 0;
-}
-
-int c2_register_device(struct c2_dev *dev)
-{
-       int ret = -ENOMEM;
-       int i;
-
-       /* Register pseudo network device */
-       dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
-       if (!dev->pseudo_netdev)
-               goto out;
-
-       ret = register_netdev(dev->pseudo_netdev);
-       if (ret)
-               goto out_free_netdev;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
-       dev->ibdev.owner = THIS_MODULE;
-       dev->ibdev.uverbs_cmd_mask =
-           (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
-           (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
-           (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
-           (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
-           (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
-           (1ull << IB_USER_VERBS_CMD_REG_MR) |
-           (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
-           (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
-           (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
-           (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
-           (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
-           (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
-           (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-           (1ull << IB_USER_VERBS_CMD_POST_RECV);
-
-       dev->ibdev.node_type = RDMA_NODE_RNIC;
-       memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-       memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
-       dev->ibdev.phys_port_cnt = 1;
-       dev->ibdev.num_comp_vectors = 1;
-       dev->ibdev.dma_device = &dev->pcidev->dev;
-       dev->ibdev.query_device = c2_query_device;
-       dev->ibdev.query_port = c2_query_port;
-       dev->ibdev.query_pkey = c2_query_pkey;
-       dev->ibdev.query_gid = c2_query_gid;
-       dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
-       dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
-       dev->ibdev.mmap = c2_mmap_uar;
-       dev->ibdev.alloc_pd = c2_alloc_pd;
-       dev->ibdev.dealloc_pd = c2_dealloc_pd;
-       dev->ibdev.create_ah = c2_ah_create;
-       dev->ibdev.destroy_ah = c2_ah_destroy;
-       dev->ibdev.create_qp = c2_create_qp;
-       dev->ibdev.modify_qp = c2_modify_qp;
-       dev->ibdev.destroy_qp = c2_destroy_qp;
-       dev->ibdev.create_cq = c2_create_cq;
-       dev->ibdev.destroy_cq = c2_destroy_cq;
-       dev->ibdev.poll_cq = c2_poll_cq;
-       dev->ibdev.get_dma_mr = c2_get_dma_mr;
-       dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
-       dev->ibdev.reg_user_mr = c2_reg_user_mr;
-       dev->ibdev.dereg_mr = c2_dereg_mr;
-       dev->ibdev.get_port_immutable = c2_port_immutable;
-
-       dev->ibdev.alloc_fmr = NULL;
-       dev->ibdev.unmap_fmr = NULL;
-       dev->ibdev.dealloc_fmr = NULL;
-       dev->ibdev.map_phys_fmr = NULL;
-
-       dev->ibdev.attach_mcast = c2_multicast_attach;
-       dev->ibdev.detach_mcast = c2_multicast_detach;
-       dev->ibdev.process_mad = c2_process_mad;
-
-       dev->ibdev.req_notify_cq = c2_arm_cq;
-       dev->ibdev.post_send = c2_post_send;
-       dev->ibdev.post_recv = c2_post_receive;
-
-       dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
-       if (dev->ibdev.iwcm == NULL) {
-               ret = -ENOMEM;
-               goto out_unregister_netdev;
-       }
-       dev->ibdev.iwcm->add_ref = c2_add_ref;
-       dev->ibdev.iwcm->rem_ref = c2_rem_ref;
-       dev->ibdev.iwcm->get_qp = c2_get_qp;
-       dev->ibdev.iwcm->connect = c2_connect;
-       dev->ibdev.iwcm->accept = c2_accept;
-       dev->ibdev.iwcm->reject = c2_reject;
-       dev->ibdev.iwcm->create_listen = c2_service_create;
-       dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
-
-       ret = ib_register_device(&dev->ibdev, NULL);
-       if (ret)
-               goto out_free_iwcm;
-
-       for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
-               ret = device_create_file(&dev->ibdev.dev,
-                                              c2_dev_attributes[i]);
-               if (ret)
-                       goto out_unregister_ibdev;
-       }
-       goto out;
-
-out_unregister_ibdev:
-       ib_unregister_device(&dev->ibdev);
-out_free_iwcm:
-       kfree(dev->ibdev.iwcm);
-out_unregister_netdev:
-       unregister_netdev(dev->pseudo_netdev);
-out_free_netdev:
-       free_netdev(dev->pseudo_netdev);
-out:
-       pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
-       return ret;
-}
-
-void c2_unregister_device(struct c2_dev *dev)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       unregister_netdev(dev->pseudo_netdev);
-       free_netdev(dev->pseudo_netdev);
-       ib_unregister_device(&dev->ibdev);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.h b/drivers/staging/rdma/amso1100/c2_provider.h
deleted file mode 100644 (file)
index bf18998..0000000
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_PROVIDER_H
-#define C2_PROVIDER_H
-#include <linux/inetdevice.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-
-#include "c2_mq.h"
-#include <rdma/iw_cm.h>
-
-#define C2_MPT_FLAG_ATOMIC        (1 << 14)
-#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
-#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
-#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
-#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
-
-struct c2_buf_list {
-       void *buf;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-};
-
-
-/* The user context keeps track of objects allocated for a
- * particular user-mode client. */
-struct c2_ucontext {
-       struct ib_ucontext ibucontext;
-};
-
-struct c2_mtt;
-
-/* All objects associated with a PD are kept in the
- * associated user context if present.
- */
-struct c2_pd {
-       struct ib_pd ibpd;
-       u32 pd_id;
-};
-
-struct c2_mr {
-       struct ib_mr ibmr;
-       struct c2_pd *pd;
-       struct ib_umem *umem;
-};
-
-struct c2_av;
-
-enum c2_ah_type {
-       C2_AH_ON_HCA,
-       C2_AH_PCI_POOL,
-       C2_AH_KMALLOC
-};
-
-struct c2_ah {
-       struct ib_ah ibah;
-};
-
-struct c2_cq {
-       struct ib_cq ibcq;
-       spinlock_t lock;
-       atomic_t refcount;
-       int cqn;
-       int is_kernel;
-       wait_queue_head_t wait;
-
-       u32 adapter_handle;
-       struct c2_mq mq;
-};
-
-struct c2_wq {
-       spinlock_t lock;
-};
-struct iw_cm_id;
-struct c2_qp {
-       struct ib_qp ibqp;
-       struct iw_cm_id *cm_id;
-       spinlock_t lock;
-       atomic_t refcount;
-       wait_queue_head_t wait;
-       int qpn;
-
-       u32 adapter_handle;
-       u32 send_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u8 state;
-
-       struct c2_mq sq_mq;
-       struct c2_mq rq_mq;
-};
-
-struct c2_cr_query_attrs {
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-};
-
-static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
-{
-       return container_of(ibpd, struct c2_pd, ibpd);
-}
-
-static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
-{
-       return container_of(ibucontext, struct c2_ucontext, ibucontext);
-}
-
-static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
-{
-       return container_of(ibmr, struct c2_mr, ibmr);
-}
-
-
-static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
-{
-       return container_of(ibah, struct c2_ah, ibah);
-}
-
-static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
-{
-       return container_of(ibcq, struct c2_cq, ibcq);
-}
-
-static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
-{
-       return container_of(ibqp, struct c2_qp, ibqp);
-}
-
-static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
-{
-       struct in_device *ind;
-       int ret = 0;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       for_ifa(ind) {
-               if (ifa->ifa_address == addr) {
-                       ret = 1;
-                       break;
-               }
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-       return ret;
-}
-#endif                         /* C2_PROVIDER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c
deleted file mode 100644 (file)
index ca364db..0000000
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_MAX_ORD_PER_QP 128
-#define C2_MAX_IRD_PER_QP 128
-
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-#define NO_SUPPORT -1
-static const u8 c2_opcode[] = {
-       [IB_WR_SEND] = C2_WR_TYPE_SEND,
-       [IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
-       [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
-       [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
-       [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
-       [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
-       [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
-};
-
-static int to_c2_state(enum ib_qp_state ib_state)
-{
-       switch (ib_state) {
-       case IB_QPS_RESET:
-               return C2_QP_STATE_IDLE;
-       case IB_QPS_RTS:
-               return C2_QP_STATE_RTS;
-       case IB_QPS_SQD:
-               return C2_QP_STATE_CLOSING;
-       case IB_QPS_SQE:
-               return C2_QP_STATE_CLOSING;
-       case IB_QPS_ERR:
-               return C2_QP_STATE_ERROR;
-       default:
-               return -1;
-       }
-}
-
-static int to_ib_state(enum c2_qp_state c2_state)
-{
-       switch (c2_state) {
-       case C2_QP_STATE_IDLE:
-               return IB_QPS_RESET;
-       case C2_QP_STATE_CONNECTING:
-               return IB_QPS_RTR;
-       case C2_QP_STATE_RTS:
-               return IB_QPS_RTS;
-       case C2_QP_STATE_CLOSING:
-               return IB_QPS_SQD;
-       case C2_QP_STATE_ERROR:
-               return IB_QPS_ERR;
-       case C2_QP_STATE_TERMINATE:
-               return IB_QPS_SQE;
-       default:
-               return -1;
-       }
-}
-
-static const char *to_ib_state_str(int ib_state)
-{
-       static const char *state_str[] = {
-               "IB_QPS_RESET",
-               "IB_QPS_INIT",
-               "IB_QPS_RTR",
-               "IB_QPS_RTS",
-               "IB_QPS_SQD",
-               "IB_QPS_SQE",
-               "IB_QPS_ERR"
-       };
-       if (ib_state < IB_QPS_RESET ||
-           ib_state > IB_QPS_ERR)
-               return "<invalid IB QP state>";
-
-       ib_state -= IB_QPS_RESET;
-       return state_str[ib_state];
-}
-
-void c2_set_qp_state(struct c2_qp *qp, int c2_state)
-{
-       int new_state = to_ib_state(c2_state);
-
-       pr_debug("%s: qp[%p] state modify %s --> %s\n",
-              __func__,
-               qp,
-               to_ib_state_str(qp->state),
-               to_ib_state_str(new_state));
-       qp->state = new_state;
-}
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-                struct ib_qp_attr *attr, int attr_mask)
-{
-       struct c2wr_qp_modify_req wr;
-       struct c2wr_qp_modify_rep *reply;
-       struct c2_vq_req *vq_req;
-       unsigned long flags;
-       u8 next_state;
-       int err;
-
-       pr_debug("%s:%d qp=%p, %s --> %s\n",
-               __func__, __LINE__,
-               qp,
-               to_ib_state_str(qp->state),
-               to_ib_state_str(attr->qp_state));
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-       wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-       if (attr_mask & IB_QP_STATE) {
-               /* Ensure the state is valid */
-               if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
-                       err = -EINVAL;
-                       goto bail0;
-               }
-
-               wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
-
-               if (attr->qp_state == IB_QPS_ERR) {
-                       spin_lock_irqsave(&qp->lock, flags);
-                       if (qp->cm_id && qp->state == IB_QPS_RTS) {
-                               pr_debug("Generating CLOSE event for QP-->ERR, "
-                                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
-                               /* Generate an CLOSE event */
-                               vq_req->cm_id = qp->cm_id;
-                               vq_req->event = IW_CM_EVENT_CLOSE;
-                       }
-                       spin_unlock_irqrestore(&qp->lock, flags);
-               }
-               next_state =  attr->qp_state;
-
-       } else if (attr_mask & IB_QP_CUR_STATE) {
-
-               if (attr->cur_qp_state != IB_QPS_RTR &&
-                   attr->cur_qp_state != IB_QPS_RTS &&
-                   attr->cur_qp_state != IB_QPS_SQD &&
-                   attr->cur_qp_state != IB_QPS_SQE) {
-                       err = -EINVAL;
-                       goto bail0;
-               } else
-                       wr.next_qp_state =
-                           cpu_to_be32(to_c2_state(attr->cur_qp_state));
-
-               next_state = attr->cur_qp_state;
-
-       } else {
-               err = 0;
-               goto bail0;
-       }
-
-       /* reference the request struct */
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-       if (!err)
-               qp->state = next_state;
-#ifdef DEBUG
-       else
-               pr_debug("%s: c2_errno=%d\n", __func__, err);
-#endif
-       /*
-        * If we're going to error and generating the event here, then
-        * we need to remove the reference because there will be no
-        * close event generated by the adapter
-       */
-       spin_lock_irqsave(&qp->lock, flags);
-       if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
-               qp->cm_id->rem_ref(qp->cm_id);
-               qp->cm_id = NULL;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-
-       pr_debug("%s:%d qp=%p, cur_state=%s\n",
-               __func__, __LINE__,
-               qp,
-               to_ib_state_str(qp->state));
-       return err;
-}
-
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-                         int ord, int ird)
-{
-       struct c2wr_qp_modify_req wr;
-       struct c2wr_qp_modify_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-       wr.ord = cpu_to_be32(ord);
-       wr.ird = cpu_to_be32(ird);
-       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-       /* reference the request struct */
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       reply = (struct c2wr_qp_modify_rep *) (unsigned long)
-               vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_qp_destroy_req wr;
-       struct c2wr_qp_destroy_rep *reply;
-       unsigned long flags;
-       int err;
-
-       /*
-        * Allocate a verb request message
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               return -ENOMEM;
-       }
-
-       /*
-        * Initialize the WR
-        */
-       c2_wr_set_id(&wr, CCWR_QP_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       spin_lock_irqsave(&qp->lock, flags);
-       if (qp->cm_id && qp->state == IB_QPS_RTS) {
-               pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
-                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
-               /* Generate an CLOSE event */
-               vq_req->qp = qp;
-               vq_req->cm_id = qp->cm_id;
-               vq_req->event = IW_CM_EVENT_CLOSE;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       spin_lock_irqsave(&qp->lock, flags);
-       if (qp->cm_id) {
-               qp->cm_id->rem_ref(qp->cm_id);
-               qp->cm_id = NULL;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       int ret;
-
-       idr_preload(GFP_KERNEL);
-       spin_lock_irq(&c2dev->qp_table.lock);
-
-       ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT);
-       if (ret >= 0)
-               qp->qpn = ret;
-
-       spin_unlock_irq(&c2dev->qp_table.lock);
-       idr_preload_end();
-       return ret < 0 ? ret : 0;
-}
-
-static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
-{
-       spin_lock_irq(&c2dev->qp_table.lock);
-       idr_remove(&c2dev->qp_table.idr, qpn);
-       spin_unlock_irq(&c2dev->qp_table.lock);
-}
-
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
-{
-       unsigned long flags;
-       struct c2_qp *qp;
-
-       spin_lock_irqsave(&c2dev->qp_table.lock, flags);
-       qp = idr_find(&c2dev->qp_table.idr, qpn);
-       spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
-       return qp;
-}
-
-int c2_alloc_qp(struct c2_dev *c2dev,
-               struct c2_pd *pd,
-               struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
-{
-       struct c2wr_qp_create_req wr;
-       struct c2wr_qp_create_rep *reply;
-       struct c2_vq_req *vq_req;
-       struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
-       struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
-       unsigned long peer_pa;
-       u32 q_size, msg_size, mmap_size;
-       void __iomem *mmap;
-       int err;
-
-       err = c2_alloc_qpn(c2dev, qp);
-       if (err)
-               return err;
-       qp->ibqp.qp_num = qp->qpn;
-       qp->ibqp.qp_type = IB_QPT_RC;
-
-       /* Allocate the SQ and RQ shared pointers */
-       qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                        &qp->sq_mq.shared_dma, GFP_KERNEL);
-       if (!qp->sq_mq.shared) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                        &qp->rq_mq.shared_dma, GFP_KERNEL);
-       if (!qp->rq_mq.shared) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       /* Allocate the verbs request */
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-
-       /* Initialize the work request */
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_QP_CREATE);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.sq_cq_handle = send_cq->adapter_handle;
-       wr.rq_cq_handle = recv_cq->adapter_handle;
-       wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
-       wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
-       wr.srq_handle = 0;
-       wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
-                              QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
-       wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-       wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
-       wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-       wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
-       wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
-       wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
-       wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
-       wr.pd_id = pd->pd_id;
-       wr.user_context = (unsigned long) qp;
-
-       vq_req_get(c2dev, vq_req);
-
-       /* Send the WR to the adapter */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail3;
-       }
-
-       /* Wait for the verb reply  */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail3;
-       }
-
-       /* Process the reply */
-       reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail3;
-       }
-
-       if ((err = c2_wr_get_result(reply)) != 0) {
-               goto bail4;
-       }
-
-       /* Fill in the kernel QP struct */
-       atomic_set(&qp->refcount, 1);
-       qp->adapter_handle = reply->qp_handle;
-       qp->state = IB_QPS_RESET;
-       qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
-       qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
-       qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
-       init_waitqueue_head(&qp->wait);
-
-       /* Initialize the SQ MQ */
-       q_size = be32_to_cpu(reply->sq_depth);
-       msg_size = be32_to_cpu(reply->sq_msg_size);
-       peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
-       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-       mmap = ioremap_nocache(peer_pa, mmap_size);
-       if (!mmap) {
-               err = -ENOMEM;
-               goto bail5;
-       }
-
-       c2_mq_req_init(&qp->sq_mq,
-                      be32_to_cpu(reply->sq_mq_index),
-                      q_size,
-                      msg_size,
-                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
-                      mmap,                            /* peer */
-                      C2_MQ_ADAPTER_TARGET);
-
-       /* Initialize the RQ mq */
-       q_size = be32_to_cpu(reply->rq_depth);
-       msg_size = be32_to_cpu(reply->rq_msg_size);
-       peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
-       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-       mmap = ioremap_nocache(peer_pa, mmap_size);
-       if (!mmap) {
-               err = -ENOMEM;
-               goto bail6;
-       }
-
-       c2_mq_req_init(&qp->rq_mq,
-                      be32_to_cpu(reply->rq_mq_index),
-                      q_size,
-                      msg_size,
-                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
-                      mmap,                            /* peer */
-                      C2_MQ_ADAPTER_TARGET);
-
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       return 0;
-
-bail6:
-       iounmap(qp->sq_mq.peer);
-bail5:
-       destroy_qp(c2dev, qp);
-bail4:
-       vq_repbuf_free(c2dev, reply);
-bail3:
-       vq_req_free(c2dev, vq_req);
-bail2:
-       c2_free_mqsp(qp->rq_mq.shared);
-bail1:
-       c2_free_mqsp(qp->sq_mq.shared);
-bail0:
-       c2_free_qpn(c2dev, qp->qpn);
-       return err;
-}
-
-static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_lock_irq(&send_cq->lock);
-       else if (send_cq > recv_cq) {
-               spin_lock_irq(&send_cq->lock);
-               spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
-       } else {
-               spin_lock_irq(&recv_cq->lock);
-               spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
-       }
-}
-
-static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_unlock_irq(&send_cq->lock);
-       else if (send_cq > recv_cq) {
-               spin_unlock(&recv_cq->lock);
-               spin_unlock_irq(&send_cq->lock);
-       } else {
-               spin_unlock(&send_cq->lock);
-               spin_unlock_irq(&recv_cq->lock);
-       }
-}
-
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       struct c2_cq *send_cq;
-       struct c2_cq *recv_cq;
-
-       send_cq = to_c2cq(qp->ibqp.send_cq);
-       recv_cq = to_c2cq(qp->ibqp.recv_cq);
-
-       /*
-        * Lock CQs here, so that CQ polling code can do QP lookup
-        * without taking a lock.
-        */
-       c2_lock_cqs(send_cq, recv_cq);
-       c2_free_qpn(c2dev, qp->qpn);
-       c2_unlock_cqs(send_cq, recv_cq);
-
-       /*
-        * Destroy qp in the rnic...
-        */
-       destroy_qp(c2dev, qp);
-
-       /*
-        * Mark any unreaped CQEs as null and void.
-        */
-       c2_cq_clean(c2dev, qp, send_cq->cqn);
-       if (send_cq != recv_cq)
-               c2_cq_clean(c2dev, qp, recv_cq->cqn);
-       /*
-        * Unmap the MQs and return the shared pointers
-        * to the message pool.
-        */
-       iounmap(qp->sq_mq.peer);
-       iounmap(qp->rq_mq.peer);
-       c2_free_mqsp(qp->sq_mq.shared);
-       c2_free_mqsp(qp->rq_mq.shared);
-
-       atomic_dec(&qp->refcount);
-       wait_event(qp->wait, !atomic_read(&qp->refcount));
-}
-
-/*
- * Function: move_sgl
- *
- * Description:
- * Move an SGL from the user's work request struct into a CCIL Work Request
- * message, swapping to WR byte order and ensure the total length doesn't
- * overflow.
- *
- * IN:
- * dst         - ptr to CCIL Work Request message SGL memory.
- * src         - ptr to the consumers SGL memory.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int
-move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
-        u8 * actual_count)
-{
-       u32 tot = 0;            /* running total */
-       u8 acount = 0;          /* running total non-0 len sge's */
-
-       while (count > 0) {
-               /*
-                * If the addition of this SGE causes the
-                * total SGL length to exceed 2^32-1, then
-                * fail-n-bail.
-                *
-                * If the current total plus the next element length
-                * wraps, then it will go negative and be less than the
-                * current total...
-                */
-               if ((tot + src->length) < tot) {
-                       return -EINVAL;
-               }
-               /*
-                * Bug: 1456 (as well as 1498 & 1643)
-                * Skip over any sge's supplied with len=0
-                */
-               if (src->length) {
-                       tot += src->length;
-                       dst->stag = cpu_to_be32(src->lkey);
-                       dst->to = cpu_to_be64(src->addr);
-                       dst->length = cpu_to_be32(src->length);
-                       dst++;
-                       acount++;
-               }
-               src++;
-               count--;
-       }
-
-       if (acount == 0) {
-               /*
-                * Bug: 1476 (as well as 1498, 1456 and 1643)
-                * Setup the SGL in the WR to make it easier for the RNIC.
-                * This way, the FW doesn't have to deal with special cases.
-                * Setting length=0 should be sufficient.
-                */
-               dst->stag = 0;
-               dst->to = 0;
-               dst->length = 0;
-       }
-
-       *p_len = tot;
-       *actual_count = acount;
-       return 0;
-}
-
-/*
- * Function: c2_activity (private function)
- *
- * Description:
- * Post an mq index to the host->adapter activity fifo.
- *
- * IN:
- * c2dev       - ptr to c2dev structure
- * mq_index    - mq index to post
- * shared      - value most recently written to shared
- *
- * OUT:
- *
- * Return:
- * none
- */
-static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
-{
-       /*
-        * First read the register to see if the FIFO is full, and if so,
-        * spin until it's not.  This isn't perfect -- there is no
-        * synchronization among the clients of the register, but in
-        * practice it prevents multiple CPU from hammering the bus
-        * with PCI RETRY. Note that when this does happen, the card
-        * cannot get on the bus and the card and system hang in a
-        * deadlock -- thus the need for this code. [TOT]
-        */
-       while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
-               udelay(10);
-
-       __raw_writel(C2_HINT_MAKE(mq_index, shared),
-                    c2dev->regs + PCI_BAR0_ADAPTER_HINT);
-}
-
-/*
- * Function: qp_wr_post
- *
- * Description:
- * This in-line function allocates a MQ msg, then moves the host-copy of
- * the completed WR into msg.  Then it posts the message.
- *
- * IN:
- * q           - ptr to user MQ.
- * wr          - ptr to host-copy of the WR.
- * qp          - ptr to user qp
- * size                - Number of bytes to post.  Assumed to be divisible by 4.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
-{
-       union c2wr *msg;
-
-       msg = c2_mq_alloc(q);
-       if (msg == NULL) {
-               return -EINVAL;
-       }
-#ifdef CCMSGMAGIC
-       ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-
-       /*
-        * Since all header fields in the WR are the same as the
-        * CQE, set the following so the adapter need not.
-        */
-       c2_wr_set_result(wr, CCERR_PENDING);
-
-       /*
-        * Copy the wr down to the adapter
-        */
-       memcpy((void *) msg, (void *) wr, size);
-
-       c2_mq_produce(q);
-       return 0;
-}
-
-
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-                struct ib_send_wr **bad_wr)
-{
-       struct c2_dev *c2dev = to_c2dev(ibqp->device);
-       struct c2_qp *qp = to_c2qp(ibqp);
-       union c2wr wr;
-       unsigned long lock_flags;
-       int err = 0;
-
-       u32 flags;
-       u32 tot_len;
-       u8 actual_sge_count;
-       u32 msg_size;
-
-       if (qp->state > IB_QPS_RTS) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       while (ib_wr) {
-
-               flags = 0;
-               wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-               if (ib_wr->send_flags & IB_SEND_SIGNALED) {
-                       flags |= SQ_SIGNALED;
-               }
-
-               switch (ib_wr->opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_INV:
-                       if (ib_wr->opcode == IB_WR_SEND) {
-                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
-                               else
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
-                               wr.sqwr.send.remote_stag = 0;
-                       } else {
-                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
-                               else
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
-                               wr.sqwr.send.remote_stag =
-                                       cpu_to_be32(ib_wr->ex.invalidate_rkey);
-                       }
-
-                       msg_size = sizeof(struct c2wr_send_req) +
-                               sizeof(struct c2_data_addr) * ib_wr->num_sge;
-                       if (ib_wr->num_sge > qp->send_sgl_depth) {
-                               err = -EINVAL;
-                               break;
-                       }
-                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                               flags |= SQ_READ_FENCE;
-                       }
-                       err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
-                                      ib_wr->sg_list,
-                                      ib_wr->num_sge,
-                                      &tot_len, &actual_sge_count);
-                       wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
-                       c2_wr_set_sge_count(&wr, actual_sge_count);
-                       break;
-               case IB_WR_RDMA_WRITE:
-                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
-                       msg_size = sizeof(struct c2wr_rdma_write_req) +
-                           (sizeof(struct c2_data_addr) * ib_wr->num_sge);
-                       if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
-                               err = -EINVAL;
-                               break;
-                       }
-                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                               flags |= SQ_READ_FENCE;
-                       }
-                       wr.sqwr.rdma_write.remote_stag =
-                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
-                       wr.sqwr.rdma_write.remote_to =
-                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-                       err = move_sgl((struct c2_data_addr *)
-                                      & (wr.sqwr.rdma_write.data),
-                                      ib_wr->sg_list,
-                                      ib_wr->num_sge,
-                                      &tot_len, &actual_sge_count);
-                       wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
-                       c2_wr_set_sge_count(&wr, actual_sge_count);
-                       break;
-               case IB_WR_RDMA_READ:
-                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
-                       msg_size = sizeof(struct c2wr_rdma_read_req);
-
-                       /* IWarp only suppots 1 sge for RDMA reads */
-                       if (ib_wr->num_sge > 1) {
-                               err = -EINVAL;
-                               break;
-                       }
-
-                       /*
-                        * Move the local and remote stag/to/len into the WR.
-                        */
-                       wr.sqwr.rdma_read.local_stag =
-                           cpu_to_be32(ib_wr->sg_list->lkey);
-                       wr.sqwr.rdma_read.local_to =
-                           cpu_to_be64(ib_wr->sg_list->addr);
-                       wr.sqwr.rdma_read.remote_stag =
-                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
-                       wr.sqwr.rdma_read.remote_to =
-                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-                       wr.sqwr.rdma_read.length =
-                           cpu_to_be32(ib_wr->sg_list->length);
-                       break;
-               default:
-                       /* error */
-                       msg_size = 0;
-                       err = -EINVAL;
-                       break;
-               }
-
-               /*
-                * If we had an error on the last wr build, then
-                * break out.  Possible errors include bogus WR
-                * type, and a bogus SGL length...
-                */
-               if (err) {
-                       break;
-               }
-
-               /*
-                * Store flags
-                */
-               c2_wr_set_flags(&wr, flags);
-
-               /*
-                * Post the puppy!
-                */
-               spin_lock_irqsave(&qp->lock, lock_flags);
-               err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
-               if (err) {
-                       spin_unlock_irqrestore(&qp->lock, lock_flags);
-                       break;
-               }
-
-               /*
-                * Enqueue mq index to activity FIFO.
-                */
-               c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
-               spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-               ib_wr = ib_wr->next;
-       }
-
-out:
-       if (err)
-               *bad_wr = ib_wr;
-       return err;
-}
-
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-                   struct ib_recv_wr **bad_wr)
-{
-       struct c2_dev *c2dev = to_c2dev(ibqp->device);
-       struct c2_qp *qp = to_c2qp(ibqp);
-       union c2wr wr;
-       unsigned long lock_flags;
-       int err = 0;
-
-       if (qp->state > IB_QPS_RTS) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       /*
-        * Try and post each work request
-        */
-       while (ib_wr) {
-               u32 tot_len;
-               u8 actual_sge_count;
-
-               if (ib_wr->num_sge > qp->recv_sgl_depth) {
-                       err = -EINVAL;
-                       break;
-               }
-
-               /*
-                * Create local host-copy of the WR
-                */
-               wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-               c2_wr_set_id(&wr, CCWR_RECV);
-               c2_wr_set_flags(&wr, 0);
-
-               /* sge_count is limited to eight bits. */
-               BUG_ON(ib_wr->num_sge >= 256);
-               err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
-                              ib_wr->sg_list,
-                              ib_wr->num_sge, &tot_len, &actual_sge_count);
-               c2_wr_set_sge_count(&wr, actual_sge_count);
-
-               /*
-                * If we had an error on the last wr build, then
-                * break out.  Possible errors include bogus WR
-                * type, and a bogus SGL length...
-                */
-               if (err) {
-                       break;
-               }
-
-               spin_lock_irqsave(&qp->lock, lock_flags);
-               err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
-               if (err) {
-                       spin_unlock_irqrestore(&qp->lock, lock_flags);
-                       break;
-               }
-
-               /*
-                * Enqueue mq index to activity FIFO
-                */
-               c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
-               spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-               ib_wr = ib_wr->next;
-       }
-
-out:
-       if (err)
-               *bad_wr = ib_wr;
-       return err;
-}
-
-void c2_init_qp_table(struct c2_dev *c2dev)
-{
-       spin_lock_init(&c2dev->qp_table.lock);
-       idr_init(&c2dev->qp_table.idr);
-}
-
-void c2_cleanup_qp_table(struct c2_dev *c2dev)
-{
-       idr_destroy(&c2dev->qp_table.idr);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_rnic.c b/drivers/staging/rdma/amso1100/c2_rnic.c
deleted file mode 100644 (file)
index 5e65c6d..0000000
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/inet.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <linux/route.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_vq.h"
-
-/* Device capabilities */
-#define C2_MIN_PAGESIZE  1024
-
-#define C2_MAX_MRS       32768
-#define C2_MAX_QPS       16000
-#define C2_MAX_WQE_SZ    256
-#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
-#define C2_MAX_SGES      4
-#define C2_MAX_SGE_RD    1
-#define C2_MAX_CQS       32768
-#define C2_MAX_CQES      4096
-#define C2_MAX_PDS       16384
-
-/*
- * Send the adapter INIT message to the amso1100
- */
-static int c2_adapter_init(struct c2_dev *c2dev)
-{
-       struct c2wr_init_req wr;
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_INIT);
-       wr.hdr.context = 0;
-       wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
-       wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
-       wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
-       wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
-       wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
-       wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
-
-       /* Post the init message */
-       return vq_send_wr(c2dev, (union c2wr *) & wr);
-}
-
-/*
- * Send the adapter TERM message to the amso1100
- */
-static void c2_adapter_term(struct c2_dev *c2dev)
-{
-       struct c2wr_init_req wr;
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_TERM);
-       wr.hdr.context = 0;
-
-       /* Post the init message */
-       vq_send_wr(c2dev, (union c2wr *) & wr);
-       c2dev->init = 0;
-
-       return;
-}
-
-/*
- * Query the adapter
- */
-static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_query_req wr;
-       struct c2wr_rnic_query_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply)
-               err = -ENOMEM;
-       else
-               err = c2_errno(reply);
-       if (err)
-               goto bail2;
-
-       props->fw_ver =
-               ((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
-               ((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
-               (be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
-       memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
-       props->max_mr_size         = 0xFFFFFFFF;
-       props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
-       props->vendor_id           = be32_to_cpu(reply->vendor_id);
-       props->vendor_part_id      = be32_to_cpu(reply->part_number);
-       props->hw_ver              = be32_to_cpu(reply->hw_version);
-       props->max_qp              = be32_to_cpu(reply->max_qps);
-       props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
-       props->device_cap_flags    = c2dev->device_cap_flags;
-       props->max_sge             = C2_MAX_SGES;
-       props->max_sge_rd          = C2_MAX_SGE_RD;
-       props->max_cq              = be32_to_cpu(reply->max_cqs);
-       props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
-       props->max_mr              = be32_to_cpu(reply->max_mrs);
-       props->max_pd              = be32_to_cpu(reply->max_pds);
-       props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
-       props->max_ee_rd_atom      = 0;
-       props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
-       props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
-       props->max_ee_init_rd_atom = 0;
-       props->atomic_cap          = IB_ATOMIC_NONE;
-       props->max_ee              = 0;
-       props->max_rdd             = 0;
-       props->max_mw              = be32_to_cpu(reply->max_mws);
-       props->max_raw_ipv6_qp     = 0;
-       props->max_raw_ethy_qp     = 0;
-       props->max_mcast_grp       = 0;
-       props->max_mcast_qp_attach = 0;
-       props->max_total_mcast_qp_attach = 0;
-       props->max_ah              = 0;
-       props->max_fmr             = 0;
-       props->max_map_per_fmr     = 0;
-       props->max_srq             = 0;
-       props->max_srq_wr          = 0;
-       props->max_srq_sge         = 0;
-       props->max_pkeys           = 0;
-       props->local_ca_ack_delay  = 0;
-
- bail2:
-       vq_repbuf_free(c2dev, reply);
-
- bail1:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Add an IP address to the RNIC interface
- */
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_setconfig_req *wr;
-       struct c2wr_rnic_setconfig_rep *reply;
-       struct c2_netaddr netaddr;
-       int err, len;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       len = sizeof(struct c2_netaddr);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
-
-       netaddr.ip_addr = inaddr;
-       netaddr.netmask = inmask;
-       netaddr.mtu = 0;
-
-       memcpy(wr->data, &netaddr, len);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Delete an IP address from the RNIC interface
- */
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_setconfig_req *wr;
-       struct c2wr_rnic_setconfig_rep *reply;
-       struct c2_netaddr netaddr;
-       int err, len;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       len = sizeof(struct c2_netaddr);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
-
-       netaddr.ip_addr = inaddr;
-       netaddr.netmask = inmask;
-       netaddr.mtu = 0;
-
-       memcpy(wr->data, &netaddr, len);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Open a single RNIC instance to use with all
- * low level openib calls
- */
-static int c2_rnic_open(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *vq_req;
-       union c2wr wr;
-       struct c2wr_rnic_open_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               return -ENOMEM;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
-       wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
-       wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
-       wr.rnic_open.req.port_num = cpu_to_be16(0);
-       wr.rnic_open.req.user_context = (unsigned long) c2dev;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       if ((err = c2_errno(reply)) != 0) {
-               goto bail1;
-       }
-
-       c2dev->adapter_handle = reply->rnic_handle;
-
-bail1:
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Close the RNIC instance
- */
-static int c2_rnic_close(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *vq_req;
-       union c2wr wr;
-       struct c2wr_rnic_close_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               return -ENOMEM;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
-       wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
-       wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       if ((err = c2_errno(reply)) != 0) {
-               goto bail1;
-       }
-
-       c2dev->adapter_handle = 0;
-
-bail1:
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Called by c2_probe to initialize the RNIC. This principally
- * involves initializing the various limits and resource pools that
- * comprise the RNIC instance.
- */
-int c2_rnic_init(struct c2_dev *c2dev)
-{
-       int err;
-       u32 qsize, msgsize;
-       void *q1_pages;
-       void *q2_pages;
-       void __iomem *mmio_regs;
-
-       /* Device capabilities */
-       c2dev->device_cap_flags =
-           (IB_DEVICE_RESIZE_MAX_WR |
-            IB_DEVICE_CURR_QP_STATE_MOD |
-            IB_DEVICE_SYS_IMAGE_GUID |
-            IB_DEVICE_LOCAL_DMA_LKEY |
-            IB_DEVICE_MEM_WINDOW);
-
-       /* Allocate the qptr_array */
-       c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
-       if (!c2dev->qptr_array) {
-               return -ENOMEM;
-       }
-
-       /* Initialize the qptr_array */
-       c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
-       c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
-       c2dev->qptr_array[2] = (void *) &c2dev->aeq;
-
-       /* Initialize data structures */
-       init_waitqueue_head(&c2dev->req_vq_wo);
-       spin_lock_init(&c2dev->vqlock);
-       spin_lock_init(&c2dev->lock);
-
-       /* Allocate MQ shared pointer pool for kernel clients. User
-        * mode client pools are hung off the user context
-        */
-       err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
-       if (err) {
-               goto bail0;
-       }
-
-       /* Allocate shared pointers for Q0, Q1, and Q2 from
-        * the shared pointer pool.
-        */
-
-       c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->hint_count_dma,
-                                            GFP_KERNEL);
-       c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->req_vq.shared_dma,
-                                            GFP_KERNEL);
-       c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->rep_vq.shared_dma,
-                                            GFP_KERNEL);
-       c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                         &c2dev->aeq.shared_dma, GFP_KERNEL);
-       if (!c2dev->hint_count || !c2dev->req_vq.shared ||
-           !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       mmio_regs = c2dev->kva;
-       /* Initialize the Verbs Request Queue */
-       c2_mq_req_init(&c2dev->req_vq, 0,
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
-                      C2_MQ_ADAPTER_TARGET);
-
-       /* Initialize the Verbs Reply Queue */
-       qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
-       msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
-       q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-                                     &c2dev->rep_vq.host_dma, GFP_KERNEL);
-       if (!q1_pages) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-       dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
-       pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
-                (unsigned long long) c2dev->rep_vq.host_dma);
-       c2_mq_rep_init(&c2dev->rep_vq,
-                  1,
-                  qsize,
-                  msgsize,
-                  q1_pages,
-                  mmio_regs +
-                  be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
-                  C2_MQ_HOST_TARGET);
-
-       /* Initialize the Asynchronus Event Queue */
-       qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
-       msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
-       q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-                                     &c2dev->aeq.host_dma, GFP_KERNEL);
-       if (!q2_pages) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-       dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
-       pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
-                (unsigned long long) c2dev->aeq.host_dma);
-       c2_mq_rep_init(&c2dev->aeq,
-                      2,
-                      qsize,
-                      msgsize,
-                      q2_pages,
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
-                      C2_MQ_HOST_TARGET);
-
-       /* Initialize the verbs request allocator */
-       err = vq_init(c2dev);
-       if (err)
-               goto bail3;
-
-       /* Enable interrupts on the adapter */
-       writel(0, c2dev->regs + C2_IDIS);
-
-       /* create the WR init message */
-       err = c2_adapter_init(c2dev);
-       if (err)
-               goto bail4;
-       c2dev->init++;
-
-       /* open an adapter instance */
-       err = c2_rnic_open(c2dev);
-       if (err)
-               goto bail4;
-
-       /* Initialize cached the adapter limits */
-       err = c2_rnic_query(c2dev, &c2dev->props);
-       if (err)
-               goto bail5;
-
-       /* Initialize the PD pool */
-       err = c2_init_pd_table(c2dev);
-       if (err)
-               goto bail5;
-
-       /* Initialize the QP pool */
-       c2_init_qp_table(c2dev);
-       return 0;
-
-bail5:
-       c2_rnic_close(c2dev);
-bail4:
-       vq_term(c2dev);
-bail3:
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->aeq.q_size * c2dev->aeq.msg_size,
-                         q2_pages, dma_unmap_addr(&c2dev->aeq, mapping));
-bail2:
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-                         q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping));
-bail1:
-       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-bail0:
-       vfree(c2dev->qptr_array);
-
-       return err;
-}
-
-/*
- * Called by c2_remove to cleanup the RNIC resources.
- */
-void c2_rnic_term(struct c2_dev *c2dev)
-{
-
-       /* Close the open adapter instance */
-       c2_rnic_close(c2dev);
-
-       /* Send the TERM message to the adapter */
-       c2_adapter_term(c2dev);
-
-       /* Disable interrupts on the adapter */
-       writel(1, c2dev->regs + C2_IDIS);
-
-       /* Free the QP pool */
-       c2_cleanup_qp_table(c2dev);
-
-       /* Free the PD pool */
-       c2_cleanup_pd_table(c2dev);
-
-       /* Free the verbs request allocator */
-       vq_term(c2dev);
-
-       /* Free the asynchronus event queue */
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->aeq.q_size * c2dev->aeq.msg_size,
-                         c2dev->aeq.msg_pool.host,
-                         dma_unmap_addr(&c2dev->aeq, mapping));
-
-       /* Free the verbs reply queue */
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-                         c2dev->rep_vq.msg_pool.host,
-                         dma_unmap_addr(&c2dev->rep_vq, mapping));
-
-       /* Free the MQ shared pointer pool */
-       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-
-       /* Free the qptr_array */
-       vfree(c2dev->qptr_array);
-
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_status.h b/drivers/staging/rdma/amso1100/c2_status.h
deleted file mode 100644 (file)
index 6ee4aa9..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef        _C2_STATUS_H_
-#define _C2_STATUS_H_
-
-/*
- * Verbs Status Codes
- */
-enum c2_status {
-       C2_OK = 0,              /* This must be zero */
-       CCERR_INSUFFICIENT_RESOURCES = 1,
-       CCERR_INVALID_MODIFIER = 2,
-       CCERR_INVALID_MODE = 3,
-       CCERR_IN_USE = 4,
-       CCERR_INVALID_RNIC = 5,
-       CCERR_INTERRUPTED_OPERATION = 6,
-       CCERR_INVALID_EH = 7,
-       CCERR_INVALID_CQ = 8,
-       CCERR_CQ_EMPTY = 9,
-       CCERR_NOT_IMPLEMENTED = 10,
-       CCERR_CQ_DEPTH_TOO_SMALL = 11,
-       CCERR_PD_IN_USE = 12,
-       CCERR_INVALID_PD = 13,
-       CCERR_INVALID_SRQ = 14,
-       CCERR_INVALID_ADDRESS = 15,
-       CCERR_INVALID_NETMASK = 16,
-       CCERR_INVALID_QP = 17,
-       CCERR_INVALID_QP_STATE = 18,
-       CCERR_TOO_MANY_WRS_POSTED = 19,
-       CCERR_INVALID_WR_TYPE = 20,
-       CCERR_INVALID_SGL_LENGTH = 21,
-       CCERR_INVALID_SQ_DEPTH = 22,
-       CCERR_INVALID_RQ_DEPTH = 23,
-       CCERR_INVALID_ORD = 24,
-       CCERR_INVALID_IRD = 25,
-       CCERR_QP_ATTR_CANNOT_CHANGE = 26,
-       CCERR_INVALID_STAG = 27,
-       CCERR_QP_IN_USE = 28,
-       CCERR_OUTSTANDING_WRS = 29,
-       CCERR_STAG_IN_USE = 30,
-       CCERR_INVALID_STAG_INDEX = 31,
-       CCERR_INVALID_SGL_FORMAT = 32,
-       CCERR_ADAPTER_TIMEOUT = 33,
-       CCERR_INVALID_CQ_DEPTH = 34,
-       CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
-       CCERR_INVALID_EP = 36,
-       CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
-       CCERR_FLUSHED = 38,
-       CCERR_INVALID_WQE = 39,
-       CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
-       CCERR_REMOTE_TERMINATION_ERROR = 41,
-       CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
-       CCERR_ACCESS_VIOLATION = 43,
-       CCERR_INVALID_PD_ID = 44,
-       CCERR_WRAP_ERROR = 45,
-       CCERR_INV_STAG_ACCESS_ERROR = 46,
-       CCERR_ZERO_RDMA_READ_RESOURCES = 47,
-       CCERR_QP_NOT_PRIVILEGED = 48,
-       CCERR_STAG_STATE_NOT_INVALID = 49,
-       CCERR_INVALID_PAGE_SIZE = 50,
-       CCERR_INVALID_BUFFER_SIZE = 51,
-       CCERR_INVALID_PBE = 52,
-       CCERR_INVALID_FBO = 53,
-       CCERR_INVALID_LENGTH = 54,
-       CCERR_INVALID_ACCESS_RIGHTS = 55,
-       CCERR_PBL_TOO_BIG = 56,
-       CCERR_INVALID_VA = 57,
-       CCERR_INVALID_REGION = 58,
-       CCERR_INVALID_WINDOW = 59,
-       CCERR_TOTAL_LENGTH_TOO_BIG = 60,
-       CCERR_INVALID_QP_ID = 61,
-       CCERR_ADDR_IN_USE = 62,
-       CCERR_ADDR_NOT_AVAIL = 63,
-       CCERR_NET_DOWN = 64,
-       CCERR_NET_UNREACHABLE = 65,
-       CCERR_CONN_ABORTED = 66,
-       CCERR_CONN_RESET = 67,
-       CCERR_NO_BUFS = 68,
-       CCERR_CONN_TIMEDOUT = 69,
-       CCERR_CONN_REFUSED = 70,
-       CCERR_HOST_UNREACHABLE = 71,
-       CCERR_INVALID_SEND_SGL_DEPTH = 72,
-       CCERR_INVALID_RECV_SGL_DEPTH = 73,
-       CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
-       CCERR_INSUFFICIENT_PRIVILEGES = 75,
-       CCERR_STACK_ERROR = 76,
-       CCERR_INVALID_VERSION = 77,
-       CCERR_INVALID_MTU = 78,
-       CCERR_INVALID_IMAGE = 79,
-       CCERR_PENDING = 98,     /* not an error; user internally by adapter */
-       CCERR_DEFER = 99,       /* not an error; used internally by adapter */
-       CCERR_FAILED_WRITE = 100,
-       CCERR_FAILED_ERASE = 101,
-       CCERR_FAILED_VERIFICATION = 102,
-       CCERR_NOT_FOUND = 103,
-
-};
-
-/*
- * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
- */
-enum c2_connect_status {
-       C2_CONN_STATUS_SUCCESS = C2_OK,
-       C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
-       C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
-       C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
-       C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
-       C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
-       C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
-       C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
-       C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
-       C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
-       C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
-};
-
-/*
- * Flash programming status codes.
- */
-enum c2_flash_status {
-       C2_FLASH_STATUS_SUCCESS = 0x0000,
-       C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
-       C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
-       C2_FLASH_STATUS_ECLBS = 0x0400,
-       C2_FLASH_STATUS_PSLBS = 0x0800,
-       C2_FLASH_STATUS_VPENS = 0x1000,
-};
-
-#endif                         /* _C2_STATUS_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_user.h b/drivers/staging/rdma/amso1100/c2_user.h
deleted file mode 100644 (file)
index 7e9e7ad..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_USER_H
-#define C2_USER_H
-
-#include <linux/types.h>
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct c2_alloc_ucontext_resp {
-       __u32 qp_tab_size;
-       __u32 uarc_size;
-};
-
-struct c2_alloc_pd_resp {
-       __u32 pdn;
-       __u32 reserved;
-};
-
-struct c2_create_cq {
-       __u32 lkey;
-       __u32 pdn;
-       __u64 arm_db_page;
-       __u64 set_db_page;
-       __u32 arm_db_index;
-       __u32 set_db_index;
-};
-
-struct c2_create_cq_resp {
-       __u32 cqn;
-       __u32 reserved;
-};
-
-struct c2_create_qp {
-       __u32 lkey;
-       __u32 reserved;
-       __u64 sq_db_page;
-       __u64 rq_db_page;
-       __u32 sq_db_index;
-       __u32 rq_db_index;
-};
-
-#endif                         /* C2_USER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_vq.c b/drivers/staging/rdma/amso1100/c2_vq.c
deleted file mode 100644 (file)
index 2ec716f..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include "c2_vq.h"
-#include "c2_provider.h"
-
-/*
- * Verbs Request Objects:
- *
- * VQ Request Objects are allocated by the kernel verbs handlers.
- * They contain a wait object, a refcnt, an atomic bool indicating that the
- * adapter has replied, and a copy of the verb reply work request.
- * A pointer to the VQ Request Object is passed down in the context
- * field of the work request message, and reflected back by the adapter
- * in the verbs reply message.  The function handle_vq() in the interrupt
- * path will use this pointer to:
- *     1) append a copy of the verbs reply message
- *     2) mark that the reply is ready
- *     3) wake up the kernel verbs handler blocked awaiting the reply.
- *
- *
- * The kernel verbs handlers do a "get" to put a 2nd reference on the
- * VQ Request object.  If the kernel verbs handler exits before the adapter
- * can respond, this extra reference will keep the VQ Request object around
- * until the adapter's reply can be processed.  The reason we need this is
- * because a pointer to this object is stuffed into the context field of
- * the verbs work request message, and reflected back in the reply message.
- * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
- * kernel verb handler that is blocked awaiting the verb reply.
- * So handle_vq() will do a "put" on the object when it's done accessing it.
- * NOTE:  If we guarantee that the kernel verb handler will never bail before
- *        getting the reply, then we don't need these refcnts.
- *
- *
- * VQ Request objects are freed by the kernel verbs handlers only
- * after the verb has been processed, or when the adapter fails and
- * does not reply.
- *
- *
- * Verbs Reply Buffers:
- *
- * VQ Reply bufs are local host memory copies of a
- * outstanding Verb Request reply
- * message.  The are always allocated by the kernel verbs handlers, and _may_ be
- * freed by either the kernel verbs handler -or- the interrupt handler.  The
- * kernel verbs handler _must_ free the repbuf, then free the vq request object
- * in that order.
- */
-
-int vq_init(struct c2_dev *c2dev)
-{
-       sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
-               (char) ('0' + c2dev->devnum));
-       c2dev->host_msg_cache =
-           kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
-                             SLAB_HWCACHE_ALIGN, NULL);
-       if (c2dev->host_msg_cache == NULL) {
-               return -ENOMEM;
-       }
-       return 0;
-}
-
-void vq_term(struct c2_dev *c2dev)
-{
-       kmem_cache_destroy(c2dev->host_msg_cache);
-}
-
-/* vq_req_alloc - allocate a VQ Request Object and initialize it.
- * The refcnt is set to 1.
- */
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *r;
-
-       r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
-       if (r) {
-               init_waitqueue_head(&r->wait_object);
-               r->reply_msg = 0;
-               r->event = 0;
-               r->cm_id = NULL;
-               r->qp = NULL;
-               atomic_set(&r->refcnt, 1);
-               atomic_set(&r->reply_ready, 0);
-       }
-       return r;
-}
-
-
-/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
- * has already free the VQ Reply Buffer if it existed.
- */
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       r->reply_msg = 0;
-       if (atomic_dec_and_test(&r->refcnt)) {
-               kfree(r);
-       }
-}
-
-/* vq_req_get - reference a VQ Request Object.  Done
- * only in the kernel verbs handlers.
- */
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       atomic_inc(&r->refcnt);
-}
-
-
-/* vq_req_put - dereference and potentially free a VQ Request Object.
- *
- * This is only called by handle_vq() on the
- * interrupt when it is done processing
- * a verb reply message.  If the associated
- * kernel verbs handler has already bailed,
- * then this put will actually free the VQ
- * Request object _and_ the VQ Reply Buffer
- * if it exists.
- */
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       if (atomic_dec_and_test(&r->refcnt)) {
-               if (r->reply_msg != 0)
-                       vq_repbuf_free(c2dev,
-                                      (void *) (unsigned long) r->reply_msg);
-               kfree(r);
-       }
-}
-
-
-/*
- * vq_repbuf_alloc - allocate a VQ Reply Buffer.
- */
-void *vq_repbuf_alloc(struct c2_dev *c2dev)
-{
-       return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
-}
-
-/*
- * vq_send_wr - post a verbs request message to the Verbs Request Queue.
- * If a message is not available in the MQ, then block until one is available.
- * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
- * When the adapter drains the Verbs Request Queue,
- * it inserts MQ index 0 in to the
- * adapter->host activity fifo and interrupts the host.
- */
-int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
-{
-       void *msg;
-       wait_queue_t __wait;
-
-       /*
-        * grab adapter vq lock
-        */
-       spin_lock(&c2dev->vqlock);
-
-       /*
-        * allocate msg
-        */
-       msg = c2_mq_alloc(&c2dev->req_vq);
-
-       /*
-        * If we cannot get a msg, then we'll wait
-        * When a messages are available, the int handler will wake_up()
-        * any waiters.
-        */
-       while (msg == NULL) {
-               pr_debug("%s:%d no available msg in VQ, waiting...\n",
-                      __func__, __LINE__);
-               init_waitqueue_entry(&__wait, current);
-               add_wait_queue(&c2dev->req_vq_wo, &__wait);
-               spin_unlock(&c2dev->vqlock);
-               for (;;) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       if (!c2_mq_full(&c2dev->req_vq)) {
-                               break;
-                       }
-                       if (!signal_pending(current)) {
-                               schedule_timeout(1 * HZ);       /* 1 second... */
-                               continue;
-                       }
-                       set_current_state(TASK_RUNNING);
-                       remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-                       return -EINTR;
-               }
-               set_current_state(TASK_RUNNING);
-               remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-               spin_lock(&c2dev->vqlock);
-               msg = c2_mq_alloc(&c2dev->req_vq);
-       }
-
-       /*
-        * copy wr into adapter msg
-        */
-       memcpy(msg, wr, c2dev->req_vq.msg_size);
-
-       /*
-        * post msg
-        */
-       c2_mq_produce(&c2dev->req_vq);
-
-       /*
-        * release adapter vq lock
-        */
-       spin_unlock(&c2dev->vqlock);
-       return 0;
-}
-
-
-/*
- * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
- */
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
-{
-       if (!wait_event_timeout(req->wait_object,
-                               atomic_read(&req->reply_ready),
-                               60*HZ))
-               return -ETIMEDOUT;
-
-       return 0;
-}
-
-/*
- * vq_repbuf_free - Free a Verbs Reply Buffer.
- */
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
-{
-       kmem_cache_free(c2dev->host_msg_cache, reply);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_vq.h b/drivers/staging/rdma/amso1100/c2_vq.h
deleted file mode 100644 (file)
index c1f6cef..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_VQ_H_
-#define _C2_VQ_H_
-#include <linux/sched.h>
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_provider.h"
-
-struct c2_vq_req {
-       u64 reply_msg;          /* ptr to reply msg */
-       wait_queue_head_t wait_object;  /* wait object for vq reqs */
-       atomic_t reply_ready;   /* set when reply is ready */
-       atomic_t refcnt;        /* used to cancel WRs... */
-       int event;
-       struct iw_cm_id *cm_id;
-       struct c2_qp *qp;
-};
-
-int vq_init(struct c2_dev *c2dev);
-void vq_term(struct c2_dev *c2dev);
-
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
-int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
-
-void *vq_repbuf_alloc(struct c2_dev *c2dev);
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
-
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
-#endif                         /* _C2_VQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_wr.h b/drivers/staging/rdma/amso1100/c2_wr.h
deleted file mode 100644 (file)
index 8d4b4ca..0000000
+++ /dev/null
@@ -1,1520 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_WR_H_
-#define _C2_WR_H_
-
-#ifdef CCDEBUG
-#define CCWR_MAGIC             0xb07700b0
-#endif
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-/* Maximum allowed size in bytes of private_data exchange
- * on connect.
- */
-#define C2_MAX_PRIVATE_DATA_SIZE 200
-
-/*
- * These types are shared among the adapter, host, and CCIL consumer.
- */
-enum c2_cq_notification_type {
-       C2_CQ_NOTIFICATION_TYPE_NONE = 1,
-       C2_CQ_NOTIFICATION_TYPE_NEXT,
-       C2_CQ_NOTIFICATION_TYPE_NEXT_SE
-};
-
-enum c2_setconfig_cmd {
-       C2_CFG_ADD_ADDR = 1,
-       C2_CFG_DEL_ADDR = 2,
-       C2_CFG_ADD_ROUTE = 3,
-       C2_CFG_DEL_ROUTE = 4
-};
-
-enum c2_getconfig_cmd {
-       C2_GETCONFIG_ROUTES = 1,
-       C2_GETCONFIG_ADDRS
-};
-
-/*
- *  CCIL Work Request Identifiers
- */
-enum c2wr_ids {
-       CCWR_RNIC_OPEN = 1,
-       CCWR_RNIC_QUERY,
-       CCWR_RNIC_SETCONFIG,
-       CCWR_RNIC_GETCONFIG,
-       CCWR_RNIC_CLOSE,
-       CCWR_CQ_CREATE,
-       CCWR_CQ_QUERY,
-       CCWR_CQ_MODIFY,
-       CCWR_CQ_DESTROY,
-       CCWR_QP_CONNECT,
-       CCWR_PD_ALLOC,
-       CCWR_PD_DEALLOC,
-       CCWR_SRQ_CREATE,
-       CCWR_SRQ_QUERY,
-       CCWR_SRQ_MODIFY,
-       CCWR_SRQ_DESTROY,
-       CCWR_QP_CREATE,
-       CCWR_QP_QUERY,
-       CCWR_QP_MODIFY,
-       CCWR_QP_DESTROY,
-       CCWR_NSMR_STAG_ALLOC,
-       CCWR_NSMR_REGISTER,
-       CCWR_NSMR_PBL,
-       CCWR_STAG_DEALLOC,
-       CCWR_NSMR_REREGISTER,
-       CCWR_SMR_REGISTER,
-       CCWR_MR_QUERY,
-       CCWR_MW_ALLOC,
-       CCWR_MW_QUERY,
-       CCWR_EP_CREATE,
-       CCWR_EP_GETOPT,
-       CCWR_EP_SETOPT,
-       CCWR_EP_DESTROY,
-       CCWR_EP_BIND,
-       CCWR_EP_CONNECT,
-       CCWR_EP_LISTEN,
-       CCWR_EP_SHUTDOWN,
-       CCWR_EP_LISTEN_CREATE,
-       CCWR_EP_LISTEN_DESTROY,
-       CCWR_EP_QUERY,
-       CCWR_CR_ACCEPT,
-       CCWR_CR_REJECT,
-       CCWR_CONSOLE,
-       CCWR_TERM,
-       CCWR_FLASH_INIT,
-       CCWR_FLASH,
-       CCWR_BUF_ALLOC,
-       CCWR_BUF_FREE,
-       CCWR_FLASH_WRITE,
-       CCWR_INIT,              /* WARNING: Don't move this ever again! */
-
-
-
-       /* Add new IDs here */
-
-
-
-       /*
-        * WARNING: CCWR_LAST must always be the last verbs id defined!
-        *          All the preceding IDs are fixed, and must not change.
-        *          You can add new IDs, but must not remove or reorder
-        *          any IDs. If you do, YOU will ruin any hope of
-        *          compatibility between versions.
-        */
-       CCWR_LAST,
-
-       /*
-        * Start over at 1 so that arrays indexed by user wr id's
-        * begin at 1.  This is OK since the verbs and user wr id's
-        * are always used on disjoint sets of queues.
-        */
-       /*
-        * The order of the CCWR_SEND_XX verbs must
-        * match the order of the RDMA_OPs
-        */
-       CCWR_SEND = 1,
-       CCWR_SEND_INV,
-       CCWR_SEND_SE,
-       CCWR_SEND_SE_INV,
-       CCWR_RDMA_WRITE,
-       CCWR_RDMA_READ,
-       CCWR_RDMA_READ_INV,
-       CCWR_MW_BIND,
-       CCWR_NSMR_FASTREG,
-       CCWR_STAG_INVALIDATE,
-       CCWR_RECV,
-       CCWR_NOP,
-       CCWR_UNIMPL,
-/* WARNING: This must always be the last user wr id defined! */
-};
-#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
-
-/*
- * SQ/RQ Work Request Types
- */
-enum c2_wr_type {
-       C2_WR_TYPE_SEND = CCWR_SEND,
-       C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
-       C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
-       C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
-       C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
-       C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
-       C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
-       C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
-       C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
-       C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
-       C2_WR_TYPE_RECV = CCWR_RECV,
-       C2_WR_TYPE_NOP = CCWR_NOP,
-};
-
-struct c2_netaddr {
-       __be32 ip_addr;
-       __be32 netmask;
-       u32 mtu;
-};
-
-struct c2_route {
-       u32 ip_addr;            /* 0 indicates the default route */
-       u32 netmask;            /* netmask associated with dst */
-       u32 flags;
-       union {
-               u32 ipaddr;     /* address of the nexthop interface */
-               u8 enaddr[6];
-       } nexthop;
-};
-
-/*
- * A Scatter Gather Entry.
- */
-struct c2_data_addr {
-       __be32 stag;
-       __be32 length;
-       __be64 to;
-};
-
-/*
- * MR and MW flags used by the consumer, RI, and RNIC.
- */
-enum c2_mm_flags {
-       MEM_REMOTE = 0x0001,    /* allow mw binds with remote access. */
-       MEM_VA_BASED = 0x0002,  /* Not Zero-based */
-       MEM_PBL_COMPLETE = 0x0004,      /* PBL array is complete in this msg */
-       MEM_LOCAL_READ = 0x0008,        /* allow local reads */
-       MEM_LOCAL_WRITE = 0x0010,       /* allow local writes */
-       MEM_REMOTE_READ = 0x0020,       /* allow remote reads */
-       MEM_REMOTE_WRITE = 0x0040,      /* allow remote writes */
-       MEM_WINDOW_BIND = 0x0080,       /* binds allowed */
-       MEM_SHARED = 0x0100,    /* set if MR is shared */
-       MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */
-};
-
-/*
- * CCIL API ACF flags defined in terms of the low level mem flags.
- * This minimizes translation needed in the user API
- */
-enum c2_acf {
-       C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
-       C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
-       C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
-       C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
-       C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
-};
-
-/*
- * Image types of objects written to flash
- */
-#define C2_FLASH_IMG_BITFILE 1
-#define C2_FLASH_IMG_OPTION_ROM 2
-#define C2_FLASH_IMG_VPD 3
-
-/*
- *  to fix bug 1815 we define the max size allowable of the
- *  terminate message (per the IETF spec).Refer to the IETF
- *  protocol specification, section 12.1.6, page 64)
- *  The message is prefixed by 20 types of DDP info.
- *
- *  Then the message has 6 bytes for the terminate control
- *  and DDP segment length info plus a DDP header (either
- *  14 or 18 byts) plus 28 bytes for the RDMA header.
- *  Thus the max size in:
- *  20 + (6 + 18 + 28) = 72
- */
-#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
-
-/*
- * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
- */
-#define WR_BUILD_STR_LEN 64
-
-/*
- * WARNING:  All of these structs need to align any 64bit types on
- * 64 bit boundaries!  64bit types include u64 and u64.
- */
-
-/*
- * Clustercore Work Request Header.  Be sensitive to field layout
- * and alignment.
- */
-struct c2wr_hdr {
-       /* wqe_count is part of the cqe.  It is put here so the
-        * adapter can write to it while the wr is pending without
-        * clobbering part of the wr.  This word need not be dma'd
-        * from the host to adapter by libccil, but we copy it anyway
-        * to make the memcpy to the adapter better aligned.
-        */
-       __be32 wqe_count;
-
-       /* Put these fields next so that later 32- and 64-bit
-        * quantities are naturally aligned.
-        */
-       u8 id;
-       u8 result;              /* adapter -> host */
-       u8 sge_count;           /* host -> adapter */
-       u8 flags;               /* host -> adapter */
-
-       u64 context;
-#ifdef CCMSGMAGIC
-       u32 magic;
-       u32 pad;
-#endif
-} __attribute__((packed));
-
-/*
- *------------------------ RNIC ------------------------
- */
-
-/*
- * WR_RNIC_OPEN
- */
-
-/*
- * Flags for the RNIC WRs
- */
-enum c2_rnic_flags {
-       RNIC_IRD_STATIC = 0x0001,
-       RNIC_ORD_STATIC = 0x0002,
-       RNIC_QP_STATIC = 0x0004,
-       RNIC_SRQ_SUPPORTED = 0x0008,
-       RNIC_PBL_BLOCK_MODE = 0x0010,
-       RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
-       RNIC_CQ_OVF_DETECTED = 0x0040,
-       RNIC_PRIV_MODE = 0x0080
-};
-
-struct c2wr_rnic_open_req {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       __be16 flags;           /* See enum c2_rnic_flags */
-       __be16 port_num;
-} __attribute__((packed));
-
-struct c2wr_rnic_open_rep {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-union c2wr_rnic_open {
-       struct c2wr_rnic_open_req req;
-       struct c2wr_rnic_open_rep rep;
-} __attribute__((packed));
-
-struct c2wr_rnic_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_QUERY
- */
-struct c2wr_rnic_query_rep {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       __be32 vendor_id;
-       __be32 part_number;
-       __be32 hw_version;
-       __be32 fw_ver_major;
-       __be32 fw_ver_minor;
-       __be32 fw_ver_patch;
-       char fw_ver_build_str[WR_BUILD_STR_LEN];
-       __be32 max_qps;
-       __be32 max_qp_depth;
-       u32 max_srq_depth;
-       u32 max_send_sgl_depth;
-       u32 max_rdma_sgl_depth;
-       __be32 max_cqs;
-       __be32 max_cq_depth;
-       u32 max_cq_event_handlers;
-       __be32 max_mrs;
-       u32 max_pbl_depth;
-       __be32 max_pds;
-       __be32 max_global_ird;
-       u32 max_global_ord;
-       __be32 max_qp_ird;
-       __be32 max_qp_ord;
-       u32 flags;
-       __be32 max_mws;
-       u32 pbe_range_low;
-       u32 pbe_range_high;
-       u32 max_srqs;
-       u32 page_size;
-} __attribute__((packed));
-
-union c2wr_rnic_query {
-       struct c2wr_rnic_query_req req;
-       struct c2wr_rnic_query_rep rep;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_GETCONFIG
- */
-
-struct c2wr_rnic_getconfig_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 option;             /* see c2_getconfig_cmd_t */
-       u64 reply_buf;
-       u32 reply_buf_len;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_getconfig_rep {
-       struct c2wr_hdr hdr;
-       u32 option;             /* see c2_getconfig_cmd_t */
-       u32 count_len;          /* length of the number of addresses configured */
-} __attribute__((packed)) ;
-
-union c2wr_rnic_getconfig {
-       struct c2wr_rnic_getconfig_req req;
-       struct c2wr_rnic_getconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_SETCONFIG
- */
-struct c2wr_rnic_setconfig_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 option;          /* See c2_setconfig_cmd_t */
-       /* variable data and pad. See c2_netaddr and c2_route */
-       u8 data[0];
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_setconfig_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_setconfig {
-       struct c2wr_rnic_setconfig_req req;
-       struct c2wr_rnic_setconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_CLOSE
- */
-struct c2wr_rnic_close_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_close_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_close {
-       struct c2wr_rnic_close_req req;
-       struct c2wr_rnic_close_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ CQ ------------------------
- */
-struct c2wr_cq_create_req {
-       struct c2wr_hdr hdr;
-       __be64 shared_ht;
-       u64 user_context;
-       __be64 msg_pool;
-       u32 rnic_handle;
-       __be32 msg_size;
-       __be32 depth;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_create_rep {
-       struct c2wr_hdr hdr;
-       __be32 mq_index;
-       __be32 adapter_shared;
-       u32 cq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_cq_create {
-       struct c2wr_cq_create_req req;
-       struct c2wr_cq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 cq_handle;
-       u32 new_depth;
-       u64 new_msg_pool;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_modify {
-       struct c2wr_cq_modify_req req;
-       struct c2wr_cq_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 cq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_destroy {
-       struct c2wr_cq_destroy_req req;
-       struct c2wr_cq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ PD ------------------------
- */
-struct c2wr_pd_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_alloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_alloc {
-       struct c2wr_pd_alloc_req req;
-       struct c2wr_pd_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_dealloc {
-       struct c2wr_pd_dealloc_req req;
-       struct c2wr_pd_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ SRQ ------------------------
- */
-struct c2wr_srq_create_req {
-       struct c2wr_hdr hdr;
-       u64 shared_ht;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 srq_depth;
-       u32 srq_limit;
-       u32 sgl_depth;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_create_rep {
-       struct c2wr_hdr hdr;
-       u32 srq_depth;
-       u32 sgl_depth;
-       u32 msg_size;
-       u32 mq_index;
-       u32 mq_start;
-       u32 srq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_srq_create {
-       struct c2wr_srq_create_req req;
-       struct c2wr_srq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 srq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_srq_destroy {
-       struct c2wr_srq_destroy_req req;
-       struct c2wr_srq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ QP ------------------------
- */
-enum c2wr_qp_flags {
-       QP_RDMA_READ = 0x00000001,      /* RDMA read enabled? */
-       QP_RDMA_WRITE = 0x00000002,     /* RDMA write enabled? */
-       QP_MW_BIND = 0x00000004,        /* MWs enabled */
-       QP_ZERO_STAG = 0x00000008,      /* enabled? */
-       QP_REMOTE_TERMINATION = 0x00000010,     /* remote end terminated */
-       QP_RDMA_READ_RESPONSE = 0x00000020      /* Remote RDMA read  */
-           /* enabled? */
-};
-
-struct c2wr_qp_create_req {
-       struct c2wr_hdr hdr;
-       __be64 shared_sq_ht;
-       __be64 shared_rq_ht;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 sq_cq_handle;
-       u32 rq_cq_handle;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 srq_handle;
-       u32 srq_limit;
-       __be32 flags;           /* see enum c2wr_qp_flags */
-       __be32 send_sgl_depth;
-       __be32 recv_sgl_depth;
-       __be32 rdma_write_sgl_depth;
-       __be32 ord;
-       __be32 ird;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_create_rep {
-       struct c2wr_hdr hdr;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 send_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u32 ord;
-       u32 ird;
-       __be32 sq_msg_size;
-       __be32 sq_mq_index;
-       __be32 sq_mq_start;
-       __be32 rq_msg_size;
-       __be32 rq_mq_index;
-       __be32 rq_mq_start;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-union c2wr_qp_create {
-       struct c2wr_qp_create_req req;
-       struct c2wr_qp_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_rep {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 sq_depth;
-       u32 rq_depth;
-       u32 send_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 ord;
-       u32 ird;
-       u16 qp_state;
-       u16 flags;              /* see c2wr_qp_flags_t */
-       u32 qp_id;
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-       u32 terminate_msg_length;       /* 0 if not present */
-       u8 data[0];
-       /* Terminate Message in-line here. */
-} __attribute__((packed)) ;
-
-union c2wr_qp_query {
-       struct c2wr_qp_query_req req;
-       struct c2wr_qp_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_req {
-       struct c2wr_hdr hdr;
-       u64 stream_msg;
-       u32 stream_msg_length;
-       u32 rnic_handle;
-       u32 qp_handle;
-       __be32 next_qp_state;
-       __be32 ord;
-       __be32 ird;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 llp_ep_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_rep {
-       struct c2wr_hdr hdr;
-       u32 ord;
-       u32 ird;
-       u32 sq_depth;
-       u32 rq_depth;
-       u32 sq_msg_size;
-       u32 sq_mq_index;
-       u32 sq_mq_start;
-       u32 rq_msg_size;
-       u32 rq_mq_index;
-       u32 rq_mq_start;
-} __attribute__((packed)) ;
-
-union c2wr_qp_modify {
-       struct c2wr_qp_modify_req req;
-       struct c2wr_qp_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_qp_destroy {
-       struct c2wr_qp_destroy_req req;
-       struct c2wr_qp_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
- * only be posted when a QP is in IDLE state.  After the connect request is
- * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
- * No synchronous reply from adapter to this WR.  The results of
- * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
- * See c2wr_ae_active_connect_results_t
- */
-struct c2wr_qp_connect_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-       __be32 remote_addr;
-       __be16 remote_port;
-       u16 pad;
-       __be32 private_data_length;
-       u8 private_data[0];     /* Private data in-line. */
-} __attribute__((packed)) ;
-
-struct c2wr_qp_connect {
-       struct c2wr_qp_connect_req req;
-       /* no synchronous reply.         */
-} __attribute__((packed)) ;
-
-
-/*
- *------------------------ MM ------------------------
- */
-
-struct c2wr_nsmr_stag_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pbl_depth;
-       u32 pd_id;
-       u32 flags;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_stag_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_stag_alloc {
-       struct c2wr_nsmr_stag_alloc_req req;
-       struct c2wr_nsmr_stag_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_req {
-       struct c2wr_hdr hdr;
-       __be64 va;
-       u32 rnic_handle;
-       __be16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 pd_id;
-       __be32 pbl_depth;
-       __be32 pbe_size;
-       __be32 fbo;
-       __be32 length;
-       __be32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       __be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       __be32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_register {
-       struct c2wr_nsmr_register_req req;
-       struct c2wr_nsmr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 flags;
-       __be32 stag_index;
-       __be32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       __be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_pbl {
-       struct c2wr_nsmr_pbl_req req;
-       struct c2wr_nsmr_pbl_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_rep {
-       struct c2wr_hdr hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 pd_id;
-       u32 flags;
-       u32 pbl_depth;
-} __attribute__((packed)) ;
-
-union c2wr_mr_query {
-       struct c2wr_mr_query_req req;
-       struct c2wr_mr_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_rep {
-       struct c2wr_hdr hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 pd_id;
-       u32 flags;
-} __attribute__((packed)) ;
-
-union c2wr_mw_query {
-       struct c2wr_mw_query_req req;
-       struct c2wr_mw_query_rep rep;
-} __attribute__((packed)) ;
-
-
-struct c2wr_stag_dealloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_stag_dealloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_stag_dealloc {
-       struct c2wr_stag_dealloc_req req;
-       struct c2wr_stag_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_req {
-       struct c2wr_hdr hdr;
-       u64 va;
-       u32 rnic_handle;
-       u16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 stag_index;
-       u32 pd_id;
-       u32 pbl_depth;
-       u32 pbe_size;
-       u32 fbo;
-       u32 length;
-       u32 addrs_length;
-       u32 pad1;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       u64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_reregister {
-       struct c2wr_nsmr_reregister_req req;
-       struct c2wr_nsmr_reregister_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_req {
-       struct c2wr_hdr hdr;
-       u64 va;
-       u32 rnic_handle;
-       u16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 stag_index;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_rep {
-       struct c2wr_hdr hdr;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_smr_register {
-       struct c2wr_smr_register_req req;
-       struct c2wr_smr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_mw_alloc {
-       struct c2wr_mw_alloc_req req;
-       struct c2wr_mw_alloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ WRs -----------------------
- */
-
-struct c2wr_user_hdr {
-       struct c2wr_hdr hdr;            /* Has status and WR Type */
-} __attribute__((packed)) ;
-
-enum c2_qp_state {
-       C2_QP_STATE_IDLE = 0x01,
-       C2_QP_STATE_CONNECTING = 0x02,
-       C2_QP_STATE_RTS = 0x04,
-       C2_QP_STATE_CLOSING = 0x08,
-       C2_QP_STATE_TERMINATE = 0x10,
-       C2_QP_STATE_ERROR = 0x20,
-};
-
-/* Completion queue entry. */
-struct c2wr_ce {
-       struct c2wr_hdr hdr;            /* Has status and WR Type */
-       u64 qp_user_context;    /* c2_user_qp_t * */
-       u32 qp_state;           /* Current QP State */
-       u32 handle;             /* QPID or EP Handle */
-       __be32 bytes_rcvd;              /* valid for RECV WCs */
-       u32 stag;
-} __attribute__((packed)) ;
-
-
-/*
- * Flags used for all post-sq WRs.  These must fit in the flags
- * field of the struct c2wr_hdr (eight bits).
- */
-enum {
-       SQ_SIGNALED = 0x01,
-       SQ_READ_FENCE = 0x02,
-       SQ_FENCE = 0x04,
-};
-
-/*
- * Common fields for all post-sq WRs.  Namely the standard header and a
- * secondary header with fields common to all post-sq WRs.
- */
-struct c2_sq_hdr {
-       struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * Same as above but for post-rq WRs.
- */
-struct c2_rq_hdr {
-       struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * use the same struct for all sends.
- */
-struct c2wr_send_req {
-       struct c2_sq_hdr sq_hdr;
-       __be32 sge_len;
-       __be32 remote_stag;
-       u8 data[0];             /* SGE array */
-} __attribute__((packed));
-
-union c2wr_send {
-       struct c2wr_send_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_write_req {
-       struct c2_sq_hdr sq_hdr;
-       __be64 remote_to;
-       __be32 remote_stag;
-       __be32 sge_len;
-       u8 data[0];             /* SGE array */
-} __attribute__((packed));
-
-union c2wr_rdma_write {
-       struct c2wr_rdma_write_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_read_req {
-       struct c2_sq_hdr sq_hdr;
-       __be64 local_to;
-       __be64 remote_to;
-       __be32 local_stag;
-       __be32 remote_stag;
-       __be32 length;
-} __attribute__((packed));
-
-union c2wr_rdma_read {
-       struct c2wr_rdma_read_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_mw_bind_req {
-       struct c2_sq_hdr sq_hdr;
-       u64 va;
-       u8 stag_key;
-       u8 pad[3];
-       u32 mw_stag_index;
-       u32 mr_stag_index;
-       u32 length;
-       u32 flags;
-} __attribute__((packed));
-
-union c2wr_mw_bind {
-       struct c2wr_mw_bind_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_nsmr_fastreg_req {
-       struct c2_sq_hdr sq_hdr;
-       u64 va;
-       u8 stag_key;
-       u8 pad[3];
-       u32 stag_index;
-       u32 pbe_size;
-       u32 fbo;
-       u32 length;
-       u32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       u64 paddrs[0];
-} __attribute__((packed));
-
-union c2wr_nsmr_fastreg {
-       struct c2wr_nsmr_fastreg_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_stag_invalidate_req {
-       struct c2_sq_hdr sq_hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 stag_index;
-} __attribute__((packed));
-
-union c2wr_stag_invalidate {
-       struct c2wr_stag_invalidate_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-union c2wr_sqwr {
-       struct c2_sq_hdr sq_hdr;
-       struct c2wr_send_req send;
-       struct c2wr_send_req send_se;
-       struct c2wr_send_req send_inv;
-       struct c2wr_send_req send_se_inv;
-       struct c2wr_rdma_write_req rdma_write;
-       struct c2wr_rdma_read_req rdma_read;
-       struct c2wr_mw_bind_req mw_bind;
-       struct c2wr_nsmr_fastreg_req nsmr_fastreg;
-       struct c2wr_stag_invalidate_req stag_inv;
-} __attribute__((packed));
-
-
-/*
- * RQ WRs
- */
-struct c2wr_rqwr {
-       struct c2_rq_hdr rq_hdr;
-       u8 data[0];             /* array of SGEs */
-} __attribute__((packed));
-
-union c2wr_recv {
-       struct c2wr_rqwr req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-/*
- * All AEs start with this header.  Most AEs only need to convey the
- * information in the header.  Some, like LLP connection events, need
- * more info.  The union typdef c2wr_ae_t has all the possible AEs.
- *
- * hdr.context is the user_context from the rnic_open WR.  NULL If this
- * is not affiliated with an rnic
- *
- * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
- * CCAE_LLP_CLOSE_COMPLETE)
- *
- * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
- *
- * user_context is the context passed down when the host created the resource.
- */
-struct c2wr_ae_hdr {
-       struct c2wr_hdr hdr;
-       u64 user_context;       /* user context for this res. */
-       __be32 resource_type;   /* see enum c2_resource_indicator */
-       __be32 resource;        /* handle for resource */
-       __be32 qp_state;        /* current QP State */
-} __attribute__((packed));
-
-/*
- * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
- * the adapter moves the QP into RTS state
- */
-struct c2wr_ae_active_connect_results {
-       struct c2wr_ae_hdr ae_hdr;
-       __be32 laddr;
-       __be32 raddr;
-       __be16 lport;
-       __be16 rport;
-       __be32 private_data_length;
-       u8 private_data[0];     /* data is in-line in the msg. */
-} __attribute__((packed));
-
-/*
- * When connections are established by the stack (and the private data
- * MPA frame is received), the adapter will generate an event to the host.
- * The details of the connection, any private data, and the new connection
- * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
- * AE queue:
- */
-struct c2wr_ae_connection_request {
-       struct c2wr_ae_hdr ae_hdr;
-       u32 cr_handle;          /* connreq handle (sock ptr) */
-       __be32 laddr;
-       __be32 raddr;
-       __be16 lport;
-       __be16 rport;
-       __be32 private_data_length;
-       u8 private_data[0];     /* data is in-line in the msg. */
-} __attribute__((packed));
-
-union c2wr_ae {
-       struct c2wr_ae_hdr ae_generic;
-       struct c2wr_ae_active_connect_results ae_active_connect_results;
-       struct c2wr_ae_connection_request ae_connection_request;
-} __attribute__((packed));
-
-struct c2wr_init_req {
-       struct c2wr_hdr hdr;
-       __be64 hint_count;
-       __be64 q0_host_shared;
-       __be64 q1_host_shared;
-       __be64 q1_host_msg_pool;
-       __be64 q2_host_shared;
-       __be64 q2_host_msg_pool;
-} __attribute__((packed));
-
-struct c2wr_init_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_init {
-       struct c2wr_init_req req;
-       struct c2wr_init_rep rep;
-} __attribute__((packed));
-
-/*
- * For upgrading flash.
- */
-
-struct c2wr_flash_init_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-struct c2wr_flash_init_rep {
-       struct c2wr_hdr hdr;
-       u32 adapter_flash_buf_offset;
-       u32 adapter_flash_len;
-} __attribute__((packed));
-
-union c2wr_flash_init {
-       struct c2wr_flash_init_req req;
-       struct c2wr_flash_init_rep rep;
-} __attribute__((packed));
-
-struct c2wr_flash_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 len;
-} __attribute__((packed));
-
-struct c2wr_flash_rep {
-       struct c2wr_hdr hdr;
-       u32 status;
-} __attribute__((packed));
-
-union c2wr_flash {
-       struct c2wr_flash_req req;
-       struct c2wr_flash_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 size;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 offset;             /* 0 if mem not available */
-       u32 size;               /* 0 if mem not available */
-} __attribute__((packed));
-
-union c2wr_buf_alloc {
-       struct c2wr_buf_alloc_req req;
-       struct c2wr_buf_alloc_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_free_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 offset;             /* Must match value from alloc */
-       u32 size;               /* Must match value from alloc */
-} __attribute__((packed));
-
-struct c2wr_buf_free_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_buf_free {
-       struct c2wr_buf_free_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_flash_write_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 offset;
-       u32 size;
-       u32 type;
-       u32 flags;
-} __attribute__((packed));
-
-struct c2wr_flash_write_rep {
-       struct c2wr_hdr hdr;
-       u32 status;
-} __attribute__((packed));
-
-union c2wr_flash_write {
-       struct c2wr_flash_write_req req;
-       struct c2wr_flash_write_rep rep;
-} __attribute__((packed));
-
-/*
- * Messages for LLP connection setup.
- */
-
-/*
- * Listen Request.  This allocates a listening endpoint to allow passive
- * connection setup.  Newly established LLP connections are passed up
- * via an AE.  See c2wr_ae_connection_request_t
- */
-struct c2wr_ep_listen_create_req {
-       struct c2wr_hdr hdr;
-       u64 user_context;       /* returned in AEs. */
-       u32 rnic_handle;
-       __be32 local_addr;              /* local addr, or 0  */
-       __be16 local_port;              /* 0 means "pick one" */
-       u16 pad;
-       __be32 backlog;         /* tradional tcp listen bl */
-} __attribute__((packed));
-
-struct c2wr_ep_listen_create_rep {
-       struct c2wr_hdr hdr;
-       u32 ep_handle;          /* handle to new listening ep */
-       u16 local_port;         /* resulting port... */
-       u16 pad;
-} __attribute__((packed));
-
-union c2wr_ep_listen_create {
-       struct c2wr_ep_listen_create_req req;
-       struct c2wr_ep_listen_create_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_ep_listen_destroy {
-       struct c2wr_ep_listen_destroy_req req;
-       struct c2wr_ep_listen_destroy_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_query_rep {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-} __attribute__((packed));
-
-union c2wr_ep_query {
-       struct c2wr_ep_query_req req;
-       struct c2wr_ep_query_rep rep;
-} __attribute__((packed));
-
-
-/*
- * The host passes this down to indicate acceptance of a pending iWARP
- * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
- * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
- */
-struct c2wr_cr_accept_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;          /* QP to bind to this LLP conn */
-       u32 ep_handle;          /* LLP  handle to accept */
-       __be32 private_data_length;
-       u8 private_data[0];     /* data in-line in msg. */
-} __attribute__((packed));
-
-/*
- * adapter sends reply when private data is successfully submitted to
- * the LLP.
- */
-struct c2wr_cr_accept_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_accept {
-       struct c2wr_cr_accept_req req;
-       struct c2wr_cr_accept_rep rep;
-} __attribute__((packed));
-
-/*
- * The host sends this down if a given iWARP connection request was
- * rejected by the consumer.  The cr_handle was obtained from a
- * previous c2wr_ae_connection_request_t AE sent by the adapter.
- */
-struct  c2wr_cr_reject_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;          /* LLP handle to reject */
-} __attribute__((packed));
-
-/*
- * Dunno if this is needed, but we'll add it for now.  The adapter will
- * send the reject_reply after the LLP endpoint has been destroyed.
- */
-struct  c2wr_cr_reject_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_reject {
-       struct c2wr_cr_reject_req req;
-       struct c2wr_cr_reject_rep rep;
-} __attribute__((packed));
-
-/*
- * console command.  Used to implement a debug console over the verbs
- * request and reply queues.
- */
-
-/*
- * Console request message.  It contains:
- *     - message hdr with id = CCWR_CONSOLE
- *     - the physaddr/len of host memory to be used for the reply.
- *     - the command string.  eg:  "netstat -s" or "zoneinfo"
- */
-struct c2wr_console_req {
-       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
-       u64 reply_buf;          /* pinned host buf for reply */
-       u32 reply_buf_len;      /* length of reply buffer */
-       u8 command[0];          /* NUL terminated ascii string */
-       /* containing the command req */
-} __attribute__((packed));
-
-/*
- * flags used in the console reply.
- */
-enum c2_console_flags {
-       CONS_REPLY_TRUNCATED = 0x00000001       /* reply was truncated */
-} __attribute__((packed));
-
-/*
- * Console reply message.
- * hdr.result contains the c2_status_t error if the reply was _not_ generated,
- * or C2_OK if the reply was generated.
- */
-struct c2wr_console_rep {
-       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
-       u32 flags;
-} __attribute__((packed));
-
-union c2wr_console {
-       struct c2wr_console_req req;
-       struct c2wr_console_rep rep;
-} __attribute__((packed));
-
-
-/*
- * Giant union with all WRs.  Makes life easier...
- */
-union c2wr {
-       struct c2wr_hdr hdr;
-       struct c2wr_user_hdr user_hdr;
-       union c2wr_rnic_open rnic_open;
-       union c2wr_rnic_query rnic_query;
-       union c2wr_rnic_getconfig rnic_getconfig;
-       union c2wr_rnic_setconfig rnic_setconfig;
-       union c2wr_rnic_close rnic_close;
-       union c2wr_cq_create cq_create;
-       union c2wr_cq_modify cq_modify;
-       union c2wr_cq_destroy cq_destroy;
-       union c2wr_pd_alloc pd_alloc;
-       union c2wr_pd_dealloc pd_dealloc;
-       union c2wr_srq_create srq_create;
-       union c2wr_srq_destroy srq_destroy;
-       union c2wr_qp_create qp_create;
-       union c2wr_qp_query qp_query;
-       union c2wr_qp_modify qp_modify;
-       union c2wr_qp_destroy qp_destroy;
-       struct c2wr_qp_connect qp_connect;
-       union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
-       union c2wr_nsmr_register nsmr_register;
-       union c2wr_nsmr_pbl nsmr_pbl;
-       union c2wr_mr_query mr_query;
-       union c2wr_mw_query mw_query;
-       union c2wr_stag_dealloc stag_dealloc;
-       union c2wr_sqwr sqwr;
-       struct c2wr_rqwr rqwr;
-       struct c2wr_ce ce;
-       union c2wr_ae ae;
-       union c2wr_init init;
-       union c2wr_ep_listen_create ep_listen_create;
-       union c2wr_ep_listen_destroy ep_listen_destroy;
-       union c2wr_cr_accept cr_accept;
-       union c2wr_cr_reject cr_reject;
-       union c2wr_console console;
-       union c2wr_flash_init flash_init;
-       union c2wr_flash flash;
-       union c2wr_buf_alloc buf_alloc;
-       union c2wr_buf_free buf_free;
-       union c2wr_flash_write flash_write;
-} __attribute__((packed));
-
-
-/*
- * Accessors for the wr fields that are packed together tightly to
- * reduce the wr message size.  The wr arguments are void* so that
- * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
- * in the struct c2wr union can be passed in.
- */
-static __inline__ u8 c2_wr_get_id(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->id;
-}
-static __inline__ void c2_wr_set_id(void *wr, u8 id)
-{
-       ((struct c2wr_hdr *) wr)->id = id;
-}
-static __inline__ u8 c2_wr_get_result(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->result;
-}
-static __inline__ void c2_wr_set_result(void *wr, u8 result)
-{
-       ((struct c2wr_hdr *) wr)->result = result;
-}
-static __inline__ u8 c2_wr_get_flags(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->flags;
-}
-static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
-{
-       ((struct c2wr_hdr *) wr)->flags = flags;
-}
-static __inline__ u8 c2_wr_get_sge_count(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->sge_count;
-}
-static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
-{
-       ((struct c2wr_hdr *) wr)->sge_count = sge_count;
-}
-static __inline__ __be32 c2_wr_get_wqe_count(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->wqe_count;
-}
-static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
-{
-       ((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
-}
-
-#endif                         /* _C2_WR_H_ */
diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig
deleted file mode 100644 (file)
index 3fadd2a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-config INFINIBAND_EHCA
-       tristate "eHCA support"
-       depends on IBMEBUS
-       ---help---
-       This driver supports the deprecated IBM pSeries eHCA InfiniBand
-       adapter.
-
-       To compile the driver as a module, choose M here. The module
-       will be called ib_ehca.
-
diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile
deleted file mode 100644 (file)
index 74d284e..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#  Authors: Heiko J Schick <schickhj@de.ibm.com>
-#           Christoph Raisch <raisch@de.ibm.com>
-#           Joachim Fenkes <fenkes@de.ibm.com>
-#
-#  Copyright (c) 2005 IBM Corporation
-#
-#  All rights reserved.
-#
-#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
-
-obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
-
-ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
-               ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
-               ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
-
diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO
deleted file mode 100644 (file)
index 199a4a6..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-9/2015
-
-The ehca driver has been deprecated and moved to drivers/staging/rdma.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c
deleted file mode 100644 (file)
index 94e088c..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  address vector functions
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *av_cache;
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-                 enum ib_rate path_rate, u32 *ipd)
-{
-       int path = ib_rate_to_mult(path_rate);
-       int link, ret;
-       struct ib_port_attr pa;
-
-       if (path_rate == IB_RATE_PORT_CURRENT) {
-               *ipd = 0;
-               return 0;
-       }
-
-       if (unlikely(path < 0)) {
-               ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x",
-                        path_rate);
-               return -EINVAL;
-       }
-
-       ret = ehca_query_port(&shca->ib_device, port, &pa);
-       if (unlikely(ret < 0)) {
-               ehca_err(&shca->ib_device, "Failed to query port  ret=%i", ret);
-               return ret;
-       }
-
-       link = ib_width_enum_to_int(pa.active_width) * pa.active_speed;
-
-       if (path >= link)
-               /* no need to throttle if path faster than link */
-               *ipd = 0;
-       else
-               /* IPD = round((link / path) - 1) */
-               *ipd = ((link + (path >> 1)) / path) - 1;
-
-       return 0;
-}
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-       int ret;
-       struct ehca_av *av;
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-
-       av = kmem_cache_alloc(av_cache, GFP_KERNEL);
-       if (!av) {
-               ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
-                        pd, ah_attr);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       av->av.sl = ah_attr->sl;
-       av->av.dlid = ah_attr->dlid;
-       av->av.slid_path_bits = ah_attr->src_path_bits;
-
-       if (ehca_static_rate < 0) {
-               u32 ipd;
-
-               if (ehca_calc_ipd(shca, ah_attr->port_num,
-                                 ah_attr->static_rate, &ipd)) {
-                       ret = -EINVAL;
-                       goto create_ah_exit1;
-               }
-               av->av.ipd = ipd;
-       } else
-               av->av.ipd = ehca_static_rate;
-
-       av->av.lnh = ah_attr->ah_flags;
-       av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
-                                           ah_attr->grh.traffic_class);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-                                           ah_attr->grh.flow_label);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-                                           ah_attr->grh.hop_limit);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
-       /* set sgid in grh.word_1 */
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               int rc;
-               struct ib_port_attr port_attr;
-               union ib_gid gid;
-
-               memset(&port_attr, 0, sizeof(port_attr));
-               rc = ehca_query_port(pd->device, ah_attr->port_num,
-                                    &port_attr);
-               if (rc) { /* invalid port number */
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Invalid port number "
-                                "ehca_query_port() returned %x "
-                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
-                       goto create_ah_exit1;
-               }
-               memset(&gid, 0, sizeof(gid));
-               rc = ehca_query_gid(pd->device,
-                                   ah_attr->port_num,
-                                   ah_attr->grh.sgid_index, &gid);
-               if (rc) {
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Failed to retrieve sgid "
-                                "ehca_query_gid() returned %x "
-                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
-                       goto create_ah_exit1;
-               }
-               memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
-       }
-       av->av.pmtu = shca->max_mtu;
-
-       /* dgid comes in grh.word_3 */
-       memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
-              sizeof(ah_attr->grh.dgid));
-
-       return &av->ib_ah;
-
-create_ah_exit1:
-       kmem_cache_free(av_cache, av);
-
-       return ERR_PTR(ret);
-}
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-       struct ehca_av *av;
-       struct ehca_ud_av new_ehca_av;
-       struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
-                                             ib_device);
-
-       memset(&new_ehca_av, 0, sizeof(new_ehca_av));
-       new_ehca_av.sl = ah_attr->sl;
-       new_ehca_av.dlid = ah_attr->dlid;
-       new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
-       new_ehca_av.ipd = ah_attr->static_rate;
-       new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
-                                        (ah_attr->ah_flags & IB_AH_GRH) > 0);
-       new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
-                                               ah_attr->grh.traffic_class);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-                                                ah_attr->grh.flow_label);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-                                                ah_attr->grh.hop_limit);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
-
-       /* set sgid in grh.word_1 */
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               int rc;
-               struct ib_port_attr port_attr;
-               union ib_gid gid;
-
-               memset(&port_attr, 0, sizeof(port_attr));
-               rc = ehca_query_port(ah->device, ah_attr->port_num,
-                                    &port_attr);
-               if (rc) { /* invalid port number */
-                       ehca_err(ah->device, "Invalid port number "
-                                "ehca_query_port() returned %x "
-                                "ah=%p ah_attr=%p port_num=%x",
-                                rc, ah, ah_attr, ah_attr->port_num);
-                       return -EINVAL;
-               }
-               memset(&gid, 0, sizeof(gid));
-               rc = ehca_query_gid(ah->device,
-                                   ah_attr->port_num,
-                                   ah_attr->grh.sgid_index, &gid);
-               if (rc) {
-                       ehca_err(ah->device, "Failed to retrieve sgid "
-                                "ehca_query_gid() returned %x "
-                                "ah=%p ah_attr=%p port_num=%x "
-                                "sgid_index=%x",
-                                rc, ah, ah_attr, ah_attr->port_num,
-                                ah_attr->grh.sgid_index);
-                       return -EINVAL;
-               }
-               memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
-       }
-
-       new_ehca_av.pmtu = shca->max_mtu;
-
-       memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
-              sizeof(ah_attr->grh.dgid));
-
-       av = container_of(ah, struct ehca_av, ib_ah);
-       av->av = new_ehca_av;
-
-       return 0;
-}
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-       struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
-
-       memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
-              sizeof(ah_attr->grh.dgid));
-       ah_attr->sl = av->av.sl;
-
-       ah_attr->dlid = av->av.dlid;
-
-       ah_attr->src_path_bits = av->av.slid_path_bits;
-       ah_attr->static_rate = av->av.ipd;
-       ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
-       ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
-                                                   av->av.grh.word_0);
-       ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
-                                               av->av.grh.word_0);
-       ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
-                                                av->av.grh.word_0);
-
-       return 0;
-}
-
-int ehca_destroy_ah(struct ib_ah *ah)
-{
-       kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
-
-       return 0;
-}
-
-int ehca_init_av_cache(void)
-{
-       av_cache = kmem_cache_create("ehca_cache_av",
-                                  sizeof(struct ehca_av), 0,
-                                  SLAB_HWCACHE_ALIGN,
-                                  NULL);
-       if (!av_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_av_cache(void)
-{
-       kmem_cache_destroy(av_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
deleted file mode 100644 (file)
index bd45e0f..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Struct definition for eHCA internal structures
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_H__
-#define __EHCA_CLASSES_H__
-
-struct ehca_module;
-struct ehca_qp;
-struct ehca_cq;
-struct ehca_eq;
-struct ehca_mr;
-struct ehca_mw;
-struct ehca_pd;
-struct ehca_av;
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
-
-#ifdef CONFIG_PPC64
-#include "ehca_classes_pSeries.h"
-#endif
-#include "ipz_pt_fn.h"
-#include "ehca_qes.h"
-#include "ehca_irq.h"
-
-#define EHCA_EQE_CACHE_SIZE 20
-#define EHCA_MAX_NUM_QUEUES 0xffff
-
-struct ehca_eqe_cache_entry {
-       struct ehca_eqe *eqe;
-       struct ehca_cq *cq;
-};
-
-struct ehca_eq {
-       u32 length;
-       struct ipz_queue ipz_queue;
-       struct ipz_eq_handle ipz_eq_handle;
-       struct work_struct work;
-       struct h_galpas galpas;
-       int is_initialized;
-       struct ehca_pfeq pf;
-       spinlock_t spinlock;
-       struct tasklet_struct interrupt_task;
-       u32 ist;
-       spinlock_t irq_spinlock;
-       struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
-};
-
-struct ehca_sma_attr {
-       u16 lid, lmc, sm_sl, sm_lid;
-       u16 pkey_tbl_len, pkeys[16];
-};
-
-struct ehca_sport {
-       struct ib_cq *ibcq_aqp1;
-       struct ib_qp *ibqp_sqp[2];
-       /* lock to serialze modify_qp() calls for sqp in normal
-        * and irq path (when event PORT_ACTIVE is received first time)
-        */
-       spinlock_t mod_sqp_lock;
-       enum ib_port_state port_state;
-       struct ehca_sma_attr saved_attr;
-       u32 pma_qp_nr;
-};
-
-#define HCA_CAP_MR_PGSIZE_4K  0x80000000
-#define HCA_CAP_MR_PGSIZE_64K 0x40000000
-#define HCA_CAP_MR_PGSIZE_1M  0x20000000
-#define HCA_CAP_MR_PGSIZE_16M 0x10000000
-
-struct ehca_shca {
-       struct ib_device ib_device;
-       struct platform_device *ofdev;
-       u8 num_ports;
-       int hw_level;
-       struct list_head shca_list;
-       struct ipz_adapter_handle ipz_hca_handle;
-       struct ehca_sport sport[2];
-       struct ehca_eq eq;
-       struct ehca_eq neq;
-       struct ehca_mr *maxmr;
-       struct ehca_pd *pd;
-       struct h_galpas galpas;
-       struct mutex modify_mutex;
-       u64 hca_cap;
-       /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
-       u32 hca_cap_mr_pgsize;
-       int max_mtu;
-       int max_num_qps;
-       int max_num_cqs;
-       atomic_t num_cqs;
-       atomic_t num_qps;
-};
-
-struct ehca_pd {
-       struct ib_pd ib_pd;
-       struct ipz_pd fw_pd;
-       /* small queue mgmt */
-       struct mutex lock;
-       struct list_head free[2];
-       struct list_head full[2];
-};
-
-enum ehca_ext_qp_type {
-       EQPT_NORMAL    = 0,
-       EQPT_LLQP      = 1,
-       EQPT_SRQBASE   = 2,
-       EQPT_SRQ       = 3,
-};
-
-/* struct to cache modify_qp()'s parms for GSI/SMI qp */
-struct ehca_mod_qp_parm {
-       int mask;
-       struct ib_qp_attr attr;
-};
-
-#define EHCA_MOD_QP_PARM_MAX 4
-
-#define QMAP_IDX_MASK 0xFFFFULL
-
-/* struct for tracking if cqes have been reported to the application */
-struct ehca_qmap_entry {
-       u16 app_wr_id;
-       u8 reported;
-       u8 cqe_req;
-};
-
-struct ehca_queue_map {
-       struct ehca_qmap_entry *map;
-       unsigned int entries;
-       unsigned int tail;
-       unsigned int left_to_poll;
-       unsigned int next_wqe_idx;   /* Idx to first wqe to be flushed */
-};
-
-/* function to calculate the next index for the qmap */
-static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
-{
-       unsigned int temp = cur_index + 1;
-       return (temp == limit) ? 0 : temp;
-}
-
-struct ehca_qp {
-       union {
-               struct ib_qp ib_qp;
-               struct ib_srq ib_srq;
-       };
-       u32 qp_type;
-       enum ehca_ext_qp_type ext_type;
-       enum ib_qp_state state;
-       struct ipz_queue ipz_squeue;
-       struct ehca_queue_map sq_map;
-       struct ipz_queue ipz_rqueue;
-       struct ehca_queue_map rq_map;
-       struct h_galpas galpas;
-       u32 qkey;
-       u32 real_qp_num;
-       u32 token;
-       spinlock_t spinlock_s;
-       spinlock_t spinlock_r;
-       u32 sq_max_inline_data_size;
-       struct ipz_qp_handle ipz_qp_handle;
-       struct ehca_pfqp pf;
-       struct ib_qp_init_attr init_attr;
-       struct ehca_cq *send_cq;
-       struct ehca_cq *recv_cq;
-       unsigned int sqerr_purgeflag;
-       struct hlist_node list_entries;
-       /* array to cache modify_qp()'s parms for GSI/SMI qp */
-       struct ehca_mod_qp_parm *mod_qp_parm;
-       int mod_qp_parm_idx;
-       /* mmap counter for resources mapped into user space */
-       u32 mm_count_squeue;
-       u32 mm_count_rqueue;
-       u32 mm_count_galpa;
-       /* unsolicited ack circumvention */
-       int unsol_ack_circ;
-       int mtu_shift;
-       u32 message_count;
-       u32 packet_count;
-       atomic_t nr_events; /* events seen */
-       wait_queue_head_t wait_completion;
-       int mig_armed;
-       struct list_head sq_err_node;
-       struct list_head rq_err_node;
-};
-
-#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
-#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
-#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
-
-/* must be power of 2 */
-#define QP_HASHTAB_LEN 8
-
-struct ehca_cq {
-       struct ib_cq ib_cq;
-       struct ipz_queue ipz_queue;
-       struct h_galpas galpas;
-       spinlock_t spinlock;
-       u32 cq_number;
-       u32 token;
-       u32 nr_of_entries;
-       struct ipz_cq_handle ipz_cq_handle;
-       struct ehca_pfcq pf;
-       spinlock_t cb_lock;
-       struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
-       struct list_head entry;
-       u32 nr_callbacks;   /* #events assigned to cpu by scaling code */
-       atomic_t nr_events; /* #events seen */
-       wait_queue_head_t wait_completion;
-       spinlock_t task_lock;
-       /* mmap counter for resources mapped into user space */
-       u32 mm_count_queue;
-       u32 mm_count_galpa;
-       struct list_head sqp_err_list;
-       struct list_head rqp_err_list;
-};
-
-enum ehca_mr_flag {
-       EHCA_MR_FLAG_FMR = 0x80000000,   /* FMR, created with ehca_alloc_fmr */
-       EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
-};
-
-struct ehca_mr {
-       union {
-               struct ib_mr ib_mr;     /* must always be first in ehca_mr */
-               struct ib_fmr ib_fmr;   /* must always be first in ehca_mr */
-       } ib;
-       struct ib_umem *umem;
-       spinlock_t mrlock;
-
-       enum ehca_mr_flag flags;
-       u32 num_kpages;         /* number of kernel pages */
-       u32 num_hwpages;        /* number of hw pages to form MR */
-       u64 hwpage_size;        /* hw page size used for this MR */
-       int acl;                /* ACL (stored here for usage in reregister) */
-       u64 *start;             /* virtual start address (stored here for */
-                               /* usage in reregister) */
-       u64 size;               /* size (stored here for usage in reregister) */
-       u32 fmr_page_size;      /* page size for FMR */
-       u32 fmr_max_pages;      /* max pages for FMR */
-       u32 fmr_max_maps;       /* max outstanding maps for FMR */
-       u32 fmr_map_cnt;        /* map counter for FMR */
-       /* fw specific data */
-       struct ipz_mrmw_handle ipz_mr_handle;   /* MR handle for h-calls */
-       struct h_galpas galpas;
-};
-
-struct ehca_mw {
-       struct ib_mw ib_mw;     /* gen2 mw, must always be first in ehca_mw */
-       spinlock_t mwlock;
-
-       u8 never_bound;         /* indication MW was never bound */
-       struct ipz_mrmw_handle ipz_mw_handle;   /* MW handle for h-calls */
-       struct h_galpas galpas;
-};
-
-enum ehca_mr_pgi_type {
-       EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
-                                 * ehca_rereg_phys_mr,
-                                 * ehca_reg_internal_maxmr */
-       EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
-       EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
-};
-
-struct ehca_mr_pginfo {
-       enum ehca_mr_pgi_type type;
-       u64 num_kpages;
-       u64 kpage_cnt;
-       u64 hwpage_size;     /* hw page size used for this MR */
-       u64 num_hwpages;     /* number of hw pages */
-       u64 hwpage_cnt;      /* counter for hw pages */
-       u64 next_hwpage;     /* next hw page in buffer/chunk/listelem */
-
-       union {
-               struct { /* type EHCA_MR_PGI_PHYS section */
-                       int num_phys_buf;
-                       struct ib_phys_buf *phys_buf_array;
-                       u64 next_buf;
-               } phy;
-               struct { /* type EHCA_MR_PGI_USER section */
-                       struct ib_umem *region;
-                       struct scatterlist *next_sg;
-                       u64 next_nmap;
-               } usr;
-               struct { /* type EHCA_MR_PGI_FMR section */
-                       u64 fmr_pgsize;
-                       u64 *page_list;
-                       u64 next_listelem;
-               } fmr;
-       } u;
-};
-
-/* output parameters for MR/FMR hipz calls */
-struct ehca_mr_hipzout_parms {
-       struct ipz_mrmw_handle handle;
-       u32 lkey;
-       u32 rkey;
-       u64 len;
-       u64 vaddr;
-       u32 acl;
-};
-
-/* output parameters for MW hipz calls */
-struct ehca_mw_hipzout_parms {
-       struct ipz_mrmw_handle handle;
-       u32 rkey;
-};
-
-struct ehca_av {
-       struct ib_ah ib_ah;
-       struct ehca_ud_av av;
-};
-
-struct ehca_ucontext {
-       struct ib_ucontext ib_ucontext;
-};
-
-int ehca_init_pd_cache(void);
-void ehca_cleanup_pd_cache(void);
-int ehca_init_cq_cache(void);
-void ehca_cleanup_cq_cache(void);
-int ehca_init_qp_cache(void);
-void ehca_cleanup_qp_cache(void);
-int ehca_init_av_cache(void);
-void ehca_cleanup_av_cache(void);
-int ehca_init_mrmw_cache(void);
-void ehca_cleanup_mrmw_cache(void);
-int ehca_init_small_qp_cache(void);
-void ehca_cleanup_small_qp_cache(void);
-
-extern rwlock_t ehca_qp_idr_lock;
-extern rwlock_t ehca_cq_idr_lock;
-extern struct idr ehca_qp_idr;
-extern struct idr ehca_cq_idr;
-extern spinlock_t shca_list_lock;
-
-extern int ehca_static_rate;
-extern int ehca_port_act_time;
-extern bool ehca_use_hp_mr;
-extern bool ehca_scaling_code;
-extern int ehca_lock_hcalls;
-extern int ehca_nr_ports;
-extern int ehca_max_cq;
-extern int ehca_max_qp;
-
-struct ipzu_queue_resp {
-       u32 qe_size;      /* queue entry size */
-       u32 act_nr_of_sg;
-       u32 queue_length; /* queue length allocated in bytes */
-       u32 pagesize;
-       u32 toggle_state;
-       u32 offset; /* save offset within a page for small_qp */
-};
-
-struct ehca_create_cq_resp {
-       u32 cq_number;
-       u32 token;
-       struct ipzu_queue_resp ipz_queue;
-       u32 fw_handle_ofs;
-       u32 dummy;
-};
-
-struct ehca_create_qp_resp {
-       u32 qp_num;
-       u32 token;
-       u32 qp_type;
-       u32 ext_type;
-       u32 qkey;
-       /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
-       u32 real_qp_num;
-       u32 fw_handle_ofs;
-       u32 dummy;
-       struct ipzu_queue_resp ipz_squeue;
-       struct ipzu_queue_resp ipz_rqueue;
-};
-
-struct ehca_alloc_cq_parms {
-       u32 nr_cqe;
-       u32 act_nr_of_entries;
-       u32 act_pages;
-       struct ipz_eq_handle eq_handle;
-};
-
-enum ehca_service_type {
-       ST_RC  = 0,
-       ST_UC  = 1,
-       ST_RD  = 2,
-       ST_UD  = 3,
-};
-
-enum ehca_ll_comp_flags {
-       LLQP_SEND_COMP = 0x20,
-       LLQP_RECV_COMP = 0x40,
-       LLQP_COMP_MASK = 0x60,
-};
-
-struct ehca_alloc_queue_parms {
-       /* input parameters */
-       int max_wr;
-       int max_sge;
-       int page_size;
-       int is_small;
-
-       /* output parameters */
-       u16 act_nr_wqes;
-       u8  act_nr_sges;
-       u32 queue_size; /* bytes for small queues, pages otherwise */
-};
-
-struct ehca_alloc_qp_parms {
-       struct ehca_alloc_queue_parms squeue;
-       struct ehca_alloc_queue_parms rqueue;
-
-       /* input parameters */
-       enum ehca_service_type servicetype;
-       int qp_storage;
-       int sigtype;
-       enum ehca_ext_qp_type ext_type;
-       enum ehca_ll_comp_flags ll_comp_flags;
-       int ud_av_l_key_ctl;
-
-       u32 token;
-       struct ipz_eq_handle eq_handle;
-       struct ipz_pd pd;
-       struct ipz_cq_handle send_cq_handle, recv_cq_handle;
-
-       u32 srq_qpn, srq_token, srq_limit;
-
-       /* output parameters */
-       u32 real_qp_num;
-       struct ipz_qp_handle qp_handle;
-       struct h_galpas galpas;
-};
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
deleted file mode 100644 (file)
index 689c357..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  pSeries interface definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_PSERIES_H__
-#define __EHCA_CLASSES_PSERIES_H__
-
-#include "hcp_phyp.h"
-#include "ipz_pt_fn.h"
-
-
-struct ehca_pfqp {
-       struct ipz_qpt sqpt;
-       struct ipz_qpt rqpt;
-};
-
-struct ehca_pfcq {
-       struct ipz_qpt qpt;
-       u32 cqnr;
-};
-
-struct ehca_pfeq {
-       struct ipz_qpt qpt;
-       struct h_galpa galpa;
-       u32 eqnr;
-};
-
-struct ipz_adapter_handle {
-       u64 handle;
-};
-
-struct ipz_cq_handle {
-       u64 handle;
-};
-
-struct ipz_eq_handle {
-       u64 handle;
-};
-
-struct ipz_qp_handle {
-       u64 handle;
-};
-struct ipz_mrmw_handle {
-       u64 handle;
-};
-
-struct ipz_pd {
-       u32 value;
-};
-
-struct hcp_modify_qp_control_block {
-       u32 qkey;                      /* 00 */
-       u32 rdd;                       /* reliable datagram domain */
-       u32 send_psn;                  /* 02 */
-       u32 receive_psn;               /* 03 */
-       u32 prim_phys_port;            /* 04 */
-       u32 alt_phys_port;             /* 05 */
-       u32 prim_p_key_idx;            /* 06 */
-       u32 alt_p_key_idx;             /* 07 */
-       u32 rdma_atomic_ctrl;          /* 08 */
-       u32 qp_state;                  /* 09 */
-       u32 reserved_10;               /* 10 */
-       u32 rdma_nr_atomic_resp_res;   /* 11 */
-       u32 path_migration_state;      /* 12 */
-       u32 rdma_atomic_outst_dest_qp; /* 13 */
-       u32 dest_qp_nr;                /* 14 */
-       u32 min_rnr_nak_timer_field;   /* 15 */
-       u32 service_level;             /* 16 */
-       u32 send_grh_flag;             /* 17 */
-       u32 retry_count;               /* 18 */
-       u32 timeout;                   /* 19 */
-       u32 path_mtu;                  /* 20 */
-       u32 max_static_rate;           /* 21 */
-       u32 dlid;                      /* 22 */
-       u32 rnr_retry_count;           /* 23 */
-       u32 source_path_bits;          /* 24 */
-       u32 traffic_class;             /* 25 */
-       u32 hop_limit;                 /* 26 */
-       u32 source_gid_idx;            /* 27 */
-       u32 flow_label;                /* 28 */
-       u32 reserved_29;               /* 29 */
-       union {                        /* 30 */
-               u64 dw[2];
-               u8 byte[16];
-       } dest_gid;
-       u32 service_level_al;          /* 34 */
-       u32 send_grh_flag_al;          /* 35 */
-       u32 retry_count_al;            /* 36 */
-       u32 timeout_al;                /* 37 */
-       u32 max_static_rate_al;        /* 38 */
-       u32 dlid_al;                   /* 39 */
-       u32 rnr_retry_count_al;        /* 40 */
-       u32 source_path_bits_al;       /* 41 */
-       u32 traffic_class_al;          /* 42 */
-       u32 hop_limit_al;              /* 43 */
-       u32 source_gid_idx_al;         /* 44 */
-       u32 flow_label_al;             /* 45 */
-       u32 reserved_46;               /* 46 */
-       u32 reserved_47;               /* 47 */
-       union {                        /* 48 */
-               u64 dw[2];
-               u8 byte[16];
-       } dest_gid_al;
-       u32 max_nr_outst_send_wr;      /* 52 */
-       u32 max_nr_outst_recv_wr;      /* 53 */
-       u32 disable_ete_credit_check;  /* 54 */
-       u32 qp_number;                 /* 55 */
-       u64 send_queue_handle;         /* 56 */
-       u64 recv_queue_handle;         /* 58 */
-       u32 actual_nr_sges_in_sq_wqe;  /* 60 */
-       u32 actual_nr_sges_in_rq_wqe;  /* 61 */
-       u32 qp_enable;                 /* 62 */
-       u32 curr_srq_limit;            /* 63 */
-       u64 qp_aff_asyn_ev_log_reg;    /* 64 */
-       u64 shared_rq_hndl;            /* 66 */
-       u64 trigg_doorbell_qp_hndl;    /* 68 */
-       u32 reserved_70_127[58];       /* 70 */
-};
-
-#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM( 0,  0)
-#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM( 2,  2)
-#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM( 3,  3)
-#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM( 4,  4)
-#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM( 5,  5)
-#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM( 6,  6)
-#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM( 7,  7)
-#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM( 8,  8)
-#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM( 9,  9)
-#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11, 11)
-#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12, 12)
-#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13, 13)
-#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14, 14)
-#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15, 15)
-#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16, 16)
-#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17, 17)
-#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18, 18)
-#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19, 19)
-#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20, 20)
-#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21, 21)
-#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22, 22)
-#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23, 23)
-#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24, 24)
-#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25, 25)
-#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26, 26)
-#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27, 27)
-#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28, 28)
-#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30, 30)
-#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31, 31)
-#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32, 32)
-#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33, 33)
-#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34, 34)
-#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35, 35)
-#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36, 36)
-#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37, 37)
-#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38, 38)
-#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39, 39)
-#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40, 40)
-#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41, 41)
-#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42, 42)
-#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44, 44)
-#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45, 45)
-#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46, 46)
-#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47, 47)
-#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48, 48)
-#define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49, 49)
-#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50, 50)
-#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51, 51)
-
-#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c
deleted file mode 100644 (file)
index 1aa7931..0000000
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Completion queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *cq_cache;
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
-{
-       unsigned int qp_num = qp->real_qp_num;
-       unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-       hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-
-       ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
-                cq->cq_number, qp_num);
-
-       return 0;
-}
-
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
-{
-       int ret = -EINVAL;
-       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-       struct hlist_node *iter;
-       struct ehca_qp *qp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-       hlist_for_each(iter, &cq->qp_hashtab[key]) {
-               qp = hlist_entry(iter, struct ehca_qp, list_entries);
-               if (qp->real_qp_num == real_qp_num) {
-                       hlist_del(iter);
-                       ehca_dbg(cq->ib_cq.device,
-                                "removed qp from cq .cq_num=%x real_qp_num=%x",
-                                cq->cq_number, real_qp_num);
-                       ret = 0;
-                       break;
-               }
-       }
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-       if (ret)
-               ehca_err(cq->ib_cq.device,
-                        "qp not found cq_num=%x real_qp_num=%x",
-                        cq->cq_number, real_qp_num);
-
-       return ret;
-}
-
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
-{
-       struct ehca_qp *ret = NULL;
-       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-       struct hlist_node *iter;
-       struct ehca_qp *qp;
-       hlist_for_each(iter, &cq->qp_hashtab[key]) {
-               qp = hlist_entry(iter, struct ehca_qp, list_entries);
-               if (qp->real_qp_num == real_qp_num) {
-                       ret = qp;
-                       break;
-               }
-       }
-       return ret;
-}
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-                            const struct ib_cq_init_attr *attr,
-                            struct ib_ucontext *context,
-                            struct ib_udata *udata)
-{
-       int cqe = attr->cqe;
-       static const u32 additional_cqe = 20;
-       struct ib_cq *cq;
-       struct ehca_cq *my_cq;
-       struct ehca_shca *shca =
-               container_of(device, struct ehca_shca, ib_device);
-       struct ipz_adapter_handle adapter_handle;
-       struct ehca_alloc_cq_parms param; /* h_call's out parameters */
-       struct h_galpa gal;
-       void *vpage;
-       u32 counter;
-       u64 rpage, cqx_fec, h_ret;
-       int rc, i;
-       unsigned long flags;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
-               return ERR_PTR(-EINVAL);
-
-       if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
-               ehca_err(device, "Unable to create CQ, max number of %i "
-                       "CQs reached.", shca->max_num_cqs);
-               ehca_err(device, "To increase the maximum number of CQs "
-                       "use the number_of_cqs module parameter.\n");
-               return ERR_PTR(-ENOSPC);
-       }
-
-       my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
-       if (!my_cq) {
-               ehca_err(device, "Out of memory for ehca_cq struct device=%p",
-                        device);
-               atomic_dec(&shca->num_cqs);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
-
-       spin_lock_init(&my_cq->spinlock);
-       spin_lock_init(&my_cq->cb_lock);
-       spin_lock_init(&my_cq->task_lock);
-       atomic_set(&my_cq->nr_events, 0);
-       init_waitqueue_head(&my_cq->wait_completion);
-
-       cq = &my_cq->ib_cq;
-
-       adapter_handle = shca->ipz_hca_handle;
-       param.eq_handle = shca->eq.ipz_eq_handle;
-
-       idr_preload(GFP_KERNEL);
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       rc = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-       idr_preload_end();
-
-       if (rc < 0) {
-               cq = ERR_PTR(-ENOMEM);
-               ehca_err(device, "Can't allocate new idr entry. device=%p",
-                        device);
-               goto create_cq_exit1;
-       }
-       my_cq->token = rc;
-
-       /*
-        * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
-        * for receiving errors CQEs.
-        */
-       param.nr_cqe = cqe + additional_cqe;
-       h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(device, "hipz_h_alloc_resource_cq() failed "
-                        "h_ret=%lli device=%p", h_ret, device);
-               cq = ERR_PTR(ehca2ib_return_code(h_ret));
-               goto create_cq_exit2;
-       }
-
-       rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
-                               EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
-       if (!rc) {
-               ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
-                        rc, device);
-               cq = ERR_PTR(-EINVAL);
-               goto create_cq_exit3;
-       }
-
-       for (counter = 0; counter < param.act_pages; counter++) {
-               vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-               if (!vpage) {
-                       ehca_err(device, "ipz_qpageit_get_inc() "
-                                "returns NULL device=%p", device);
-                       cq = ERR_PTR(-EAGAIN);
-                       goto create_cq_exit4;
-               }
-               rpage = __pa(vpage);
-
-               h_ret = hipz_h_register_rpage_cq(adapter_handle,
-                                                my_cq->ipz_cq_handle,
-                                                &my_cq->pf,
-                                                0,
-                                                0,
-                                                rpage,
-                                                1,
-                                                my_cq->galpas.
-                                                kernel);
-
-               if (h_ret < H_SUCCESS) {
-                       ehca_err(device, "hipz_h_register_rpage_cq() failed "
-                                "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i "
-                                "act_pages=%i", my_cq, my_cq->cq_number,
-                                h_ret, counter, param.act_pages);
-                       cq = ERR_PTR(-EINVAL);
-                       goto create_cq_exit4;
-               }
-
-               if (counter == (param.act_pages - 1)) {
-                       vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-                       if ((h_ret != H_SUCCESS) || vpage) {
-                               ehca_err(device, "Registration of pages not "
-                                        "complete ehca_cq=%p cq_num=%x "
-                                        "h_ret=%lli", my_cq, my_cq->cq_number,
-                                        h_ret);
-                               cq = ERR_PTR(-EAGAIN);
-                               goto create_cq_exit4;
-                       }
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED) {
-                               ehca_err(device, "Registration of page failed "
-                                        "ehca_cq=%p cq_num=%x h_ret=%lli "
-                                        "counter=%i act_pages=%i",
-                                        my_cq, my_cq->cq_number,
-                                        h_ret, counter, param.act_pages);
-                               cq = ERR_PTR(-ENOMEM);
-                               goto create_cq_exit4;
-                       }
-               }
-       }
-
-       ipz_qeit_reset(&my_cq->ipz_queue);
-
-       gal = my_cq->galpas.kernel;
-       cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
-       ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx",
-                my_cq, my_cq->cq_number, cqx_fec);
-
-       my_cq->ib_cq.cqe = my_cq->nr_of_entries =
-               param.act_nr_of_entries - additional_cqe;
-       my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
-
-       for (i = 0; i < QP_HASHTAB_LEN; i++)
-               INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
-
-       INIT_LIST_HEAD(&my_cq->sqp_err_list);
-       INIT_LIST_HEAD(&my_cq->rqp_err_list);
-
-       if (context) {
-               struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
-               struct ehca_create_cq_resp resp;
-               memset(&resp, 0, sizeof(resp));
-               resp.cq_number = my_cq->cq_number;
-               resp.token = my_cq->token;
-               resp.ipz_queue.qe_size = ipz_queue->qe_size;
-               resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
-               resp.ipz_queue.queue_length = ipz_queue->queue_length;
-               resp.ipz_queue.pagesize = ipz_queue->pagesize;
-               resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-               resp.fw_handle_ofs = (u32)
-                       (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
-               if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-                       ehca_err(device, "Copy to udata failed.");
-                       cq = ERR_PTR(-EFAULT);
-                       goto create_cq_exit4;
-               }
-       }
-
-       return cq;
-
-create_cq_exit4:
-       ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-
-create_cq_exit3:
-       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-       if (h_ret != H_SUCCESS)
-               ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
-                        "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret);
-
-create_cq_exit2:
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       idr_remove(&ehca_cq_idr, my_cq->token);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-create_cq_exit1:
-       kmem_cache_free(cq_cache, my_cq);
-
-       atomic_dec(&shca->num_cqs);
-       return cq;
-}
-
-int ehca_destroy_cq(struct ib_cq *cq)
-{
-       u64 h_ret;
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int cq_num = my_cq->cq_number;
-       struct ib_device *device = cq->device;
-       struct ehca_shca *shca = container_of(device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       unsigned long flags;
-
-       if (cq->uobject) {
-               if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
-                       ehca_err(device, "Resources still referenced in "
-                                "user space cq_num=%x", my_cq->cq_number);
-                       return -EINVAL;
-               }
-       }
-
-       /*
-        * remove the CQ from the idr first to make sure
-        * no more interrupt tasklets will touch this CQ
-        */
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       idr_remove(&ehca_cq_idr, my_cq->token);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-       /* now wait until all pending events have completed */
-       wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
-
-       /* nobody's using our CQ any longer -- we can destroy it */
-       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
-       if (h_ret == H_R_STATE) {
-               /* cq in err: read err data and destroy it forcibly */
-               ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err "
-                        "state. Try to delete it forcibly.",
-                        my_cq, cq_num, my_cq->ipz_cq_handle.handle);
-               ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
-               h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-               if (h_ret == H_SUCCESS)
-                       ehca_dbg(device, "cq_num=%x deleted successfully.",
-                                cq_num);
-       }
-       if (h_ret != H_SUCCESS) {
-               ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli "
-                        "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
-               return ehca2ib_return_code(h_ret);
-       }
-       ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-       kmem_cache_free(cq_cache, my_cq);
-
-       atomic_dec(&shca->num_cqs);
-       return 0;
-}
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-       /* TODO: proper resize needs to be done */
-       ehca_err(cq->device, "not implemented yet");
-
-       return -EFAULT;
-}
-
-int ehca_init_cq_cache(void)
-{
-       cq_cache = kmem_cache_create("ehca_cache_cq",
-                                    sizeof(struct ehca_cq), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!cq_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_cq_cache(void)
-{
-       kmem_cache_destroy(cq_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c
deleted file mode 100644 (file)
index 90da674..0000000
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Event queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_qes.h"
-#include "hcp_if.h"
-#include "ipz_pt_fn.h"
-
-int ehca_create_eq(struct ehca_shca *shca,
-                  struct ehca_eq *eq,
-                  const enum ehca_eq_type type, const u32 length)
-{
-       int ret;
-       u64 h_ret;
-       u32 nr_pages;
-       u32 i;
-       void *vpage;
-       struct ib_device *ib_dev = &shca->ib_device;
-
-       spin_lock_init(&eq->spinlock);
-       spin_lock_init(&eq->irq_spinlock);
-       eq->is_initialized = 0;
-
-       if (type != EHCA_EQ && type != EHCA_NEQ) {
-               ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
-               return -EINVAL;
-       }
-       if (!length) {
-               ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
-               return -EINVAL;
-       }
-
-       h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
-                                        &eq->pf,
-                                        type,
-                                        length,
-                                        &eq->ipz_eq_handle,
-                                        &eq->length,
-                                        &nr_pages, &eq->ist);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
-               return -EINVAL;
-       }
-
-       ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
-                            EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
-       if (!ret) {
-               ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
-               goto create_eq_exit1;
-       }
-
-       for (i = 0; i < nr_pages; i++) {
-               u64 rpage;
-
-               vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-               if (!vpage)
-                       goto create_eq_exit2;
-
-               rpage = __pa(vpage);
-               h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
-                                                eq->ipz_eq_handle,
-                                                &eq->pf,
-                                                0, 0, rpage, 1);
-
-               if (i == (nr_pages - 1)) {
-                       /* last page */
-                       vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-                       if (h_ret != H_SUCCESS || vpage)
-                               goto create_eq_exit2;
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED)
-                               goto create_eq_exit2;
-               }
-       }
-
-       ipz_qeit_reset(&eq->ipz_queue);
-
-       /* register interrupt handlers and initialize work queues */
-       if (type == EHCA_EQ) {
-               tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
-
-               ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
-                                         0, "ehca_eq",
-                                         (void *)shca);
-               if (ret < 0)
-                       ehca_err(ib_dev, "Can't map interrupt handler.");
-       } else if (type == EHCA_NEQ) {
-               tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
-
-               ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
-                                         0, "ehca_neq",
-                                         (void *)shca);
-               if (ret < 0)
-                       ehca_err(ib_dev, "Can't map interrupt handler.");
-       }
-
-       eq->is_initialized = 1;
-
-       return 0;
-
-create_eq_exit2:
-       ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-create_eq_exit1:
-       hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-       return -EINVAL;
-}
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-       unsigned long flags;
-       void *eqe;
-
-       spin_lock_irqsave(&eq->spinlock, flags);
-       eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
-       spin_unlock_irqrestore(&eq->spinlock, flags);
-
-       return eqe;
-}
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-       unsigned long flags;
-       u64 h_ret;
-
-       ibmebus_free_irq(eq->ist, (void *)shca);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       eq->is_initialized = 0;
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       tasklet_kill(&eq->interrupt_task);
-
-       h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't free EQ resources.");
-               return -EINVAL;
-       }
-       ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c
deleted file mode 100644 (file)
index e8b1bb6..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HCA query functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/gfp.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static unsigned int limit_uint(unsigned int value)
-{
-       return min_t(unsigned int, value, INT_MAX);
-}
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                     struct ib_udata *uhw)
-{
-       int i, ret = 0;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_hca *rblock;
-
-       static const u32 cap_mapping[] = {
-               IB_DEVICE_RESIZE_MAX_WR,      HCA_CAP_WQE_RESIZE,
-               IB_DEVICE_BAD_PKEY_CNTR,      HCA_CAP_BAD_P_KEY_CTR,
-               IB_DEVICE_BAD_QKEY_CNTR,      HCA_CAP_Q_KEY_VIOL_CTR,
-               IB_DEVICE_RAW_MULTI,          HCA_CAP_RAW_PACKET_MCAST,
-               IB_DEVICE_AUTO_PATH_MIG,      HCA_CAP_AUTO_PATH_MIG,
-               IB_DEVICE_CHANGE_PHY_PORT,    HCA_CAP_SQD_RTS_PORT_CHANGE,
-               IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
-               IB_DEVICE_CURR_QP_STATE_MOD,  HCA_CAP_CUR_QP_STATE_MOD,
-               IB_DEVICE_SHUTDOWN_PORT,      HCA_CAP_SHUTDOWN_PORT,
-               IB_DEVICE_INIT_TYPE,          HCA_CAP_INIT_TYPE,
-               IB_DEVICE_PORT_ACTIVE_EVENT,  HCA_CAP_PORT_ACTIVE_EVENT,
-       };
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query device properties");
-               ret = -EINVAL;
-               goto query_device1;
-       }
-
-       memset(props, 0, sizeof(struct ib_device_attr));
-       props->page_size_cap   = shca->hca_cap_mr_pgsize;
-       props->fw_ver          = rblock->hw_ver;
-       props->max_mr_size     = rblock->max_mr_size;
-       props->vendor_id       = rblock->vendor_id >> 8;
-       props->vendor_part_id  = rblock->vendor_part_id >> 16;
-       props->hw_ver          = rblock->hw_ver;
-       props->max_qp          = limit_uint(rblock->max_qp);
-       props->max_qp_wr       = limit_uint(rblock->max_wqes_wq);
-       props->max_sge         = limit_uint(rblock->max_sge);
-       props->max_sge_rd      = limit_uint(rblock->max_sge_rd);
-       props->max_cq          = limit_uint(rblock->max_cq);
-       props->max_cqe         = limit_uint(rblock->max_cqe);
-       props->max_mr          = limit_uint(rblock->max_mr);
-       props->max_mw          = limit_uint(rblock->max_mw);
-       props->max_pd          = limit_uint(rblock->max_pd);
-       props->max_ah          = limit_uint(rblock->max_ah);
-       props->max_ee          = limit_uint(rblock->max_rd_ee_context);
-       props->max_rdd         = limit_uint(rblock->max_rd_domain);
-       props->max_fmr         = limit_uint(rblock->max_mr);
-       props->max_qp_rd_atom  = limit_uint(rblock->max_rr_qp);
-       props->max_ee_rd_atom  = limit_uint(rblock->max_rr_ee_context);
-       props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
-       props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
-       props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
-
-       if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-               props->max_srq         = limit_uint(props->max_qp);
-               props->max_srq_wr      = limit_uint(props->max_qp_wr);
-               props->max_srq_sge     = 3;
-       }
-
-       props->max_pkeys           = 16;
-       /* Some FW versions say 0 here; insert sensible value in that case */
-       props->local_ca_ack_delay  = rblock->local_ca_ack_delay ?
-               min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
-       props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
-       props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
-       props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
-       props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
-       props->max_total_mcast_qp_attach
-               = limit_uint(rblock->max_total_mcast_qp_attach);
-
-       /* translate device capabilities */
-       props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
-               IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
-       for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
-               if (rblock->hca_cap_indicators & cap_mapping[i + 1])
-                       props->device_cap_flags |= cap_mapping[i];
-
-query_device1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
-{
-       switch (fw_mtu) {
-       case 0x1:
-               return IB_MTU_256;
-       case 0x2:
-               return IB_MTU_512;
-       case 0x3:
-               return IB_MTU_1024;
-       case 0x4:
-               return IB_MTU_2048;
-       case 0x5:
-               return IB_MTU_4096;
-       default:
-               ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
-                        fw_mtu);
-               return 0;
-       }
-}
-
-static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
-{
-       switch (vl_cap) {
-       case 0x1:
-               return 1;
-       case 0x2:
-               return 2;
-       case 0x3:
-               return 4;
-       case 0x4:
-               return 8;
-       case 0x5:
-               return 15;
-       default:
-               ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
-                        vl_cap);
-               return 0;
-       }
-}
-
-int ehca_query_port(struct ib_device *ibdev,
-                   u8 port, struct ib_port_attr *props)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_port *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_port1;
-       }
-
-       memset(props, 0, sizeof(struct ib_port_attr));
-
-       props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
-       props->port_cap_flags  = rblock->capability_mask;
-       props->gid_tbl_len     = rblock->gid_tbl_len;
-       if (rblock->max_msg_sz)
-               props->max_msg_sz      = rblock->max_msg_sz;
-       else
-               props->max_msg_sz      = 0x1 << 31;
-       props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
-       props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
-       props->pkey_tbl_len    = rblock->pkey_tbl_len;
-       props->lid             = rblock->lid;
-       props->sm_lid          = rblock->sm_lid;
-       props->lmc             = rblock->lmc;
-       props->sm_sl           = rblock->sm_sl;
-       props->subnet_timeout  = rblock->subnet_timeout;
-       props->init_type_reply = rblock->init_type_reply;
-       props->max_vl_num      = map_number_of_vls(shca, rblock->vl_cap);
-
-       if (rblock->state && rblock->phys_width) {
-               props->phys_state      = rblock->phys_pstate;
-               props->state           = rblock->phys_state;
-               props->active_width    = rblock->phys_width;
-               props->active_speed    = rblock->phys_speed;
-       } else {
-               /* old firmware releases don't report physical
-                * port info, so use default values
-                */
-               props->phys_state      = 5;
-               props->state           = rblock->state;
-               props->active_width    = IB_WIDTH_12X;
-               props->active_speed    = IB_SPEED_SDR;
-       }
-
-query_port1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_sma_attr(struct ehca_shca *shca,
-                       u8 port, struct ehca_sma_attr *attr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct hipz_query_port *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_sma_attr1;
-       }
-
-       memset(attr, 0, sizeof(struct ehca_sma_attr));
-
-       attr->lid    = rblock->lid;
-       attr->lmc    = rblock->lmc;
-       attr->sm_sl  = rblock->sm_sl;
-       attr->sm_lid = rblock->sm_lid;
-
-       attr->pkey_tbl_len = rblock->pkey_tbl_len;
-       memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
-
-query_sma_attr1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca;
-       struct hipz_query_port *rblock;
-
-       shca = container_of(ibdev, struct ehca_shca, ib_device);
-       if (index > 16) {
-               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-               return -EINVAL;
-       }
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_pkey1;
-       }
-
-       memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
-
-query_pkey1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port,
-                  int index, union ib_gid *gid)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_port *rblock;
-
-       if (index < 0 || index > 255) {
-               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-               return -EINVAL;
-       }
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_gid1;
-       }
-
-       memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
-       memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
-
-query_gid1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-static const u32 allowed_port_caps = (
-       IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
-       IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
-       IB_PORT_VENDOR_CLASS_SUP);
-
-int ehca_modify_port(struct ib_device *ibdev,
-                    u8 port, int port_modify_mask,
-                    struct ib_port_modify *props)
-{
-       int ret = 0;
-       struct ehca_shca *shca;
-       struct hipz_query_port *rblock;
-       u32 cap;
-       u64 hret;
-
-       shca = container_of(ibdev, struct ehca_shca, ib_device);
-       if ((props->set_port_cap_mask | props->clr_port_cap_mask)
-           & ~allowed_port_caps) {
-               ehca_err(&shca->ib_device, "Non-changeable bits set in masks  "
-                        "set=%x  clr=%x  allowed=%x", props->set_port_cap_mask,
-                        props->clr_port_cap_mask, allowed_port_caps);
-               return -EINVAL;
-       }
-
-       if (mutex_lock_interruptible(&shca->modify_mutex))
-               return -ERESTARTSYS;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-               ret = -ENOMEM;
-               goto modify_port1;
-       }
-
-       hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (hret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto modify_port2;
-       }
-
-       cap = (rblock->capability_mask | props->set_port_cap_mask)
-               & ~props->clr_port_cap_mask;
-
-       hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
-                                 cap, props->init_type, port_modify_mask);
-       if (hret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Modify port failed  h_ret=%lli",
-                        hret);
-               ret = -EINVAL;
-       }
-
-modify_port2:
-       ehca_free_fw_ctrlblock(rblock);
-
-modify_port1:
-       mutex_unlock(&shca->modify_mutex);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c
deleted file mode 100644 (file)
index 8615d7c..0000000
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Functions for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <linux/smpboot.h>
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
-#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
-#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
-#define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
-#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
-
-#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
-#define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
-#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
-#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
-#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
-#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
-
-#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
-#define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
-
-static void queue_comp_task(struct ehca_cq *__cq);
-
-static struct ehca_comp_pool *pool;
-
-static inline void comp_event_callback(struct ehca_cq *cq)
-{
-       if (!cq->ib_cq.comp_handler)
-               return;
-
-       spin_lock(&cq->cb_lock);
-       cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
-       spin_unlock(&cq->cb_lock);
-
-       return;
-}
-
-static void print_error_data(struct ehca_shca *shca, void *data,
-                            u64 *rblock, int length)
-{
-       u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
-       u64 resource = rblock[1];
-
-       switch (type) {
-       case 0x1: /* Queue Pair */
-       {
-               struct ehca_qp *qp = (struct ehca_qp *)data;
-
-               /* only print error data if AER is set */
-               if (rblock[6] == 0)
-                       return;
-
-               ehca_err(&shca->ib_device,
-                        "QP 0x%x (resource=%llx) has errors.",
-                        qp->ib_qp.qp_num, resource);
-               break;
-       }
-       case 0x4: /* Completion Queue */
-       {
-               struct ehca_cq *cq = (struct ehca_cq *)data;
-
-               ehca_err(&shca->ib_device,
-                        "CQ 0x%x (resource=%llx) has errors.",
-                        cq->cq_number, resource);
-               break;
-       }
-       default:
-               ehca_err(&shca->ib_device,
-                        "Unknown error type: %llx on %s.",
-                        type, shca->ib_device.name);
-               break;
-       }
-
-       ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
-       ehca_err(&shca->ib_device, "EHCA ----- error data begin "
-                "---------------------------------------------------");
-       ehca_dmp(rblock, length, "resource=%llx", resource);
-       ehca_err(&shca->ib_device, "EHCA ----- error data end "
-                "----------------------------------------------------");
-
-       return;
-}
-
-int ehca_error_data(struct ehca_shca *shca, void *data,
-                   u64 resource)
-{
-
-       unsigned long ret;
-       u64 *rblock;
-       unsigned long block_count;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
-               ret = -ENOMEM;
-               goto error_data1;
-       }
-
-       /* rblock must be 4K aligned and should be 4K large */
-       ret = hipz_h_error_data(shca->ipz_hca_handle,
-                               resource,
-                               rblock,
-                               &block_count);
-
-       if (ret == H_R_STATE)
-               ehca_err(&shca->ib_device,
-                        "No error data is available: %llx.", resource);
-       else if (ret == H_SUCCESS) {
-               int length;
-
-               length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
-
-               if (length > EHCA_PAGESIZE)
-                       length = EHCA_PAGESIZE;
-
-               print_error_data(shca, data, rblock, length);
-       } else
-               ehca_err(&shca->ib_device,
-                        "Error data could not be fetched: %llx", resource);
-
-       ehca_free_fw_ctrlblock(rblock);
-
-error_data1:
-       return ret;
-
-}
-
-static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
-                             enum ib_event_type event_type)
-{
-       struct ib_event event;
-
-       /* PATH_MIG without the QP ever having been armed is false alarm */
-       if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
-               return;
-
-       event.device = &shca->ib_device;
-       event.event = event_type;
-
-       if (qp->ext_type == EQPT_SRQ) {
-               if (!qp->ib_srq.event_handler)
-                       return;
-
-               event.element.srq = &qp->ib_srq;
-               qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
-       } else {
-               if (!qp->ib_qp.event_handler)
-                       return;
-
-               event.element.qp = &qp->ib_qp;
-               qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
-       }
-}
-
-static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
-                             enum ib_event_type event_type, int fatal)
-{
-       struct ehca_qp *qp;
-       u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
-
-       read_lock(&ehca_qp_idr_lock);
-       qp = idr_find(&ehca_qp_idr, token);
-       if (qp)
-               atomic_inc(&qp->nr_events);
-       read_unlock(&ehca_qp_idr_lock);
-
-       if (!qp)
-               return;
-
-       if (fatal)
-               ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
-
-       dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
-                         IB_EVENT_SRQ_ERR : event_type);
-
-       /*
-        * eHCA only processes one WQE at a time for SRQ base QPs,
-        * so the last WQE has been processed as soon as the QP enters
-        * error state.
-        */
-       if (fatal && qp->ext_type == EQPT_SRQBASE)
-               dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
-
-       if (atomic_dec_and_test(&qp->nr_events))
-               wake_up(&qp->wait_completion);
-       return;
-}
-
-static void cq_event_callback(struct ehca_shca *shca,
-                             u64 eqe)
-{
-       struct ehca_cq *cq;
-       u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
-
-       read_lock(&ehca_cq_idr_lock);
-       cq = idr_find(&ehca_cq_idr, token);
-       if (cq)
-               atomic_inc(&cq->nr_events);
-       read_unlock(&ehca_cq_idr_lock);
-
-       if (!cq)
-               return;
-
-       ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
-
-       if (atomic_dec_and_test(&cq->nr_events))
-               wake_up(&cq->wait_completion);
-
-       return;
-}
-
-static void parse_identifier(struct ehca_shca *shca, u64 eqe)
-{
-       u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
-
-       switch (identifier) {
-       case 0x02: /* path migrated */
-               qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
-               break;
-       case 0x03: /* communication established */
-               qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
-               break;
-       case 0x04: /* send queue drained */
-               qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
-               break;
-       case 0x05: /* QP error */
-       case 0x06: /* QP error */
-               qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
-               break;
-       case 0x07: /* CQ error */
-       case 0x08: /* CQ error */
-               cq_event_callback(shca, eqe);
-               break;
-       case 0x09: /* MRMWPTE error */
-               ehca_err(&shca->ib_device, "MRMWPTE error.");
-               break;
-       case 0x0A: /* port event */
-               ehca_err(&shca->ib_device, "Port event.");
-               break;
-       case 0x0B: /* MR access error */
-               ehca_err(&shca->ib_device, "MR access error.");
-               break;
-       case 0x0C: /* EQ error */
-               ehca_err(&shca->ib_device, "EQ error.");
-               break;
-       case 0x0D: /* P/Q_Key mismatch */
-               ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
-               break;
-       case 0x10: /* sampling complete */
-               ehca_err(&shca->ib_device, "Sampling complete.");
-               break;
-       case 0x11: /* unaffiliated access error */
-               ehca_err(&shca->ib_device, "Unaffiliated access error.");
-               break;
-       case 0x12: /* path migrating */
-               ehca_err(&shca->ib_device, "Path migrating.");
-               break;
-       case 0x13: /* interface trace stopped */
-               ehca_err(&shca->ib_device, "Interface trace stopped.");
-               break;
-       case 0x14: /* first error capture info available */
-               ehca_info(&shca->ib_device, "First error capture available");
-               break;
-       case 0x15: /* SRQ limit reached */
-               qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
-                        identifier, shca->ib_device.name);
-               break;
-       }
-
-       return;
-}
-
-static void dispatch_port_event(struct ehca_shca *shca, int port_num,
-                               enum ib_event_type type, const char *msg)
-{
-       struct ib_event event;
-
-       ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
-       event.device = &shca->ib_device;
-       event.event = type;
-       event.element.port_num = port_num;
-       ib_dispatch_event(&event);
-}
-
-static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
-{
-       struct ehca_sma_attr  new_attr;
-       struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
-
-       ehca_query_sma_attr(shca, port_num, &new_attr);
-
-       if (new_attr.sm_sl  != old_attr->sm_sl ||
-           new_attr.sm_lid != old_attr->sm_lid)
-               dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
-                                   "SM changed");
-
-       if (new_attr.lid != old_attr->lid ||
-           new_attr.lmc != old_attr->lmc)
-               dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
-                                   "LID changed");
-
-       if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
-           memcmp(new_attr.pkeys, old_attr->pkeys,
-                  sizeof(u16) * new_attr.pkey_tbl_len))
-               dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
-                                   "P_Key changed");
-
-       *old_attr = new_attr;
-}
-
-/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
-static int replay_modify_qp(struct ehca_sport *sport)
-{
-       int aqp1_destroyed;
-       unsigned long flags;
-
-       spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-
-       aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
-
-       if (sport->ibqp_sqp[IB_QPT_SMI])
-               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
-       if (!aqp1_destroyed)
-               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
-
-       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-
-       return aqp1_destroyed;
-}
-
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
-{
-       u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
-       u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
-       u8 spec_event;
-       struct ehca_sport *sport = &shca->sport[port - 1];
-
-       switch (ec) {
-       case 0x30: /* port availability change */
-               if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-                       /* only replay modify_qp calls in autodetect mode;
-                        * if AQP1 was destroyed, the port is already down
-                        * again and we can drop the event.
-                        */
-                       if (ehca_nr_ports < 0)
-                               if (replay_modify_qp(sport))
-                                       break;
-
-                       sport->port_state = IB_PORT_ACTIVE;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-                                           "is active");
-                       ehca_query_sma_attr(shca, port, &sport->saved_attr);
-               } else {
-                       sport->port_state = IB_PORT_DOWN;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-                                           "is inactive");
-               }
-               break;
-       case 0x31:
-               /* port configuration change
-                * disruptive change is caused by
-                * LID, PKEY or SM change
-                */
-               if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
-                       ehca_warn(&shca->ib_device, "disruptive port "
-                                 "%d configuration change", port);
-
-                       sport->port_state = IB_PORT_DOWN;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-                                           "is inactive");
-
-                       sport->port_state = IB_PORT_ACTIVE;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-                                           "is active");
-                       ehca_query_sma_attr(shca, port,
-                                           &sport->saved_attr);
-               } else
-                       notify_port_conf_change(shca, port);
-               break;
-       case 0x32: /* adapter malfunction */
-               ehca_err(&shca->ib_device, "Adapter malfunction.");
-               break;
-       case 0x33:  /* trace stopped */
-               ehca_err(&shca->ib_device, "Traced stopped.");
-               break;
-       case 0x34: /* util async event */
-               spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
-               if (spec_event == 0x80) /* client reregister required */
-                       dispatch_port_event(shca, port,
-                                           IB_EVENT_CLIENT_REREGISTER,
-                                           "client reregister req.");
-               else
-                       ehca_warn(&shca->ib_device, "Unknown util async "
-                                 "event %x on port %x", spec_event, port);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
-                        ec, shca->ib_device.name);
-               break;
-       }
-
-       return;
-}
-
-static inline void reset_eq_pending(struct ehca_cq *cq)
-{
-       u64 CQx_EP;
-       struct h_galpa gal = cq->galpas.kernel;
-
-       hipz_galpa_store_cq(gal, cqx_ep, 0x0);
-       CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
-
-       return;
-}
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-       tasklet_hi_schedule(&shca->neq.interrupt_task);
-
-       return IRQ_HANDLED;
-}
-
-void ehca_tasklet_neq(unsigned long data)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)data;
-       struct ehca_eqe *eqe;
-       u64 ret;
-
-       eqe = ehca_poll_eq(shca, &shca->neq);
-
-       while (eqe) {
-               if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
-                       parse_ec(shca, eqe->entry);
-
-               eqe = ehca_poll_eq(shca, &shca->neq);
-       }
-
-       ret = hipz_h_reset_event(shca->ipz_hca_handle,
-                                shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
-
-       if (ret != H_SUCCESS)
-               ehca_err(&shca->ib_device, "Can't clear notification events.");
-
-       return;
-}
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-       tasklet_hi_schedule(&shca->eq.interrupt_task);
-
-       return IRQ_HANDLED;
-}
-
-
-static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
-{
-       u64 eqe_value;
-       u32 token;
-       struct ehca_cq *cq;
-
-       eqe_value = eqe->entry;
-       ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
-       if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-               ehca_dbg(&shca->ib_device, "Got completion event");
-               token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-               read_lock(&ehca_cq_idr_lock);
-               cq = idr_find(&ehca_cq_idr, token);
-               if (cq)
-                       atomic_inc(&cq->nr_events);
-               read_unlock(&ehca_cq_idr_lock);
-               if (cq == NULL) {
-                       ehca_err(&shca->ib_device,
-                                "Invalid eqe for non-existing cq token=%x",
-                                token);
-                       return;
-               }
-               reset_eq_pending(cq);
-               if (ehca_scaling_code)
-                       queue_comp_task(cq);
-               else {
-                       comp_event_callback(cq);
-                       if (atomic_dec_and_test(&cq->nr_events))
-                               wake_up(&cq->wait_completion);
-               }
-       } else {
-               ehca_dbg(&shca->ib_device, "Got non completion event");
-               parse_identifier(shca, eqe_value);
-       }
-}
-
-void ehca_process_eq(struct ehca_shca *shca, int is_irq)
-{
-       struct ehca_eq *eq = &shca->eq;
-       struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
-       u64 eqe_value, ret;
-       int eqe_cnt, i;
-       int eq_empty = 0;
-
-       spin_lock(&eq->irq_spinlock);
-       if (is_irq) {
-               const int max_query_cnt = 100;
-               int query_cnt = 0;
-               int int_state = 1;
-               do {
-                       int_state = hipz_h_query_int_state(
-                               shca->ipz_hca_handle, eq->ist);
-                       query_cnt++;
-                       iosync();
-               } while (int_state && query_cnt < max_query_cnt);
-               if (unlikely((query_cnt == max_query_cnt)))
-                       ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
-                                int_state, query_cnt);
-       }
-
-       /* read out all eqes */
-       eqe_cnt = 0;
-       do {
-               u32 token;
-               eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
-               if (!eqe_cache[eqe_cnt].eqe)
-                       break;
-               eqe_value = eqe_cache[eqe_cnt].eqe->entry;
-               if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-                       token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-                       read_lock(&ehca_cq_idr_lock);
-                       eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
-                       if (eqe_cache[eqe_cnt].cq)
-                               atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
-                       read_unlock(&ehca_cq_idr_lock);
-                       if (!eqe_cache[eqe_cnt].cq) {
-                               ehca_err(&shca->ib_device,
-                                        "Invalid eqe for non-existing cq "
-                                        "token=%x", token);
-                               continue;
-                       }
-               } else
-                       eqe_cache[eqe_cnt].cq = NULL;
-               eqe_cnt++;
-       } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
-       if (!eqe_cnt) {
-               if (is_irq)
-                       ehca_dbg(&shca->ib_device,
-                                "No eqe found for irq event");
-               goto unlock_irq_spinlock;
-       } else if (!is_irq) {
-               ret = hipz_h_eoi(eq->ist);
-               if (ret != H_SUCCESS)
-                       ehca_err(&shca->ib_device,
-                                "bad return code EOI -rc = %lld\n", ret);
-               ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
-       }
-       if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
-               ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
-       /* enable irq for new packets */
-       for (i = 0; i < eqe_cnt; i++) {
-               if (eq->eqe_cache[i].cq)
-                       reset_eq_pending(eq->eqe_cache[i].cq);
-       }
-       /* check eq */
-       spin_lock(&eq->spinlock);
-       eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
-       spin_unlock(&eq->spinlock);
-       /* call completion handler for cached eqes */
-       for (i = 0; i < eqe_cnt; i++)
-               if (eq->eqe_cache[i].cq) {
-                       if (ehca_scaling_code)
-                               queue_comp_task(eq->eqe_cache[i].cq);
-                       else {
-                               struct ehca_cq *cq = eq->eqe_cache[i].cq;
-                               comp_event_callback(cq);
-                               if (atomic_dec_and_test(&cq->nr_events))
-                                       wake_up(&cq->wait_completion);
-                       }
-               } else {
-                       ehca_dbg(&shca->ib_device, "Got non completion event");
-                       parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
-               }
-       /* poll eq if not empty */
-       if (eq_empty)
-               goto unlock_irq_spinlock;
-       do {
-               struct ehca_eqe *eqe;
-               eqe = ehca_poll_eq(shca, &shca->eq);
-               if (!eqe)
-                       break;
-               process_eqe(shca, eqe);
-       } while (1);
-
-unlock_irq_spinlock:
-       spin_unlock(&eq->irq_spinlock);
-}
-
-void ehca_tasklet_eq(unsigned long data)
-{
-       ehca_process_eq((struct ehca_shca*)data, 1);
-}
-
-static int find_next_online_cpu(struct ehca_comp_pool *pool)
-{
-       int cpu;
-       unsigned long flags;
-
-       WARN_ON_ONCE(!in_interrupt());
-       if (ehca_debug_level >= 3)
-               ehca_dmp(cpu_online_mask, cpumask_size(), "");
-
-       spin_lock_irqsave(&pool->last_cpu_lock, flags);
-       do {
-               cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
-               if (cpu >= nr_cpu_ids)
-                       cpu = cpumask_first(cpu_online_mask);
-               pool->last_cpu = cpu;
-       } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
-       spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
-
-       return cpu;
-}
-
-static void __queue_comp_task(struct ehca_cq *__cq,
-                             struct ehca_cpu_comp_task *cct,
-                             struct task_struct *thread)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&cct->task_lock, flags);
-       spin_lock(&__cq->task_lock);
-
-       if (__cq->nr_callbacks == 0) {
-               __cq->nr_callbacks++;
-               list_add_tail(&__cq->entry, &cct->cq_list);
-               cct->cq_jobs++;
-               wake_up_process(thread);
-       } else
-               __cq->nr_callbacks++;
-
-       spin_unlock(&__cq->task_lock);
-       spin_unlock_irqrestore(&cct->task_lock, flags);
-}
-
-static void queue_comp_task(struct ehca_cq *__cq)
-{
-       int cpu_id;
-       struct ehca_cpu_comp_task *cct;
-       struct task_struct *thread;
-       int cq_jobs;
-       unsigned long flags;
-
-       cpu_id = find_next_online_cpu(pool);
-       BUG_ON(!cpu_online(cpu_id));
-
-       cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-       thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-       BUG_ON(!cct || !thread);
-
-       spin_lock_irqsave(&cct->task_lock, flags);
-       cq_jobs = cct->cq_jobs;
-       spin_unlock_irqrestore(&cct->task_lock, flags);
-       if (cq_jobs > 0) {
-               cpu_id = find_next_online_cpu(pool);
-               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-               thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-               BUG_ON(!cct || !thread);
-       }
-       __queue_comp_task(__cq, cct, thread);
-}
-
-static void run_comp_task(struct ehca_cpu_comp_task *cct)
-{
-       struct ehca_cq *cq;
-
-       while (!list_empty(&cct->cq_list)) {
-               cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-               spin_unlock_irq(&cct->task_lock);
-
-               comp_event_callback(cq);
-               if (atomic_dec_and_test(&cq->nr_events))
-                       wake_up(&cq->wait_completion);
-
-               spin_lock_irq(&cct->task_lock);
-               spin_lock(&cq->task_lock);
-               cq->nr_callbacks--;
-               if (!cq->nr_callbacks) {
-                       list_del_init(cct->cq_list.next);
-                       cct->cq_jobs--;
-               }
-               spin_unlock(&cq->task_lock);
-       }
-}
-
-static void comp_task_park(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-       struct ehca_cpu_comp_task *target;
-       struct task_struct *thread;
-       struct ehca_cq *cq, *tmp;
-       LIST_HEAD(list);
-
-       spin_lock_irq(&cct->task_lock);
-       cct->cq_jobs = 0;
-       cct->active = 0;
-       list_splice_init(&cct->cq_list, &list);
-       spin_unlock_irq(&cct->task_lock);
-
-       cpu = find_next_online_cpu(pool);
-       target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-       thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
-       spin_lock_irq(&target->task_lock);
-       list_for_each_entry_safe(cq, tmp, &list, entry) {
-               list_del(&cq->entry);
-               __queue_comp_task(cq, target, thread);
-       }
-       spin_unlock_irq(&target->task_lock);
-}
-
-static void comp_task_stop(unsigned int cpu, bool online)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-       spin_lock_irq(&cct->task_lock);
-       cct->cq_jobs = 0;
-       cct->active = 0;
-       WARN_ON(!list_empty(&cct->cq_list));
-       spin_unlock_irq(&cct->task_lock);
-}
-
-static int comp_task_should_run(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-       return cct->cq_jobs;
-}
-
-static void comp_task(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
-       int cql_empty;
-
-       spin_lock_irq(&cct->task_lock);
-       cql_empty = list_empty(&cct->cq_list);
-       if (!cql_empty) {
-               __set_current_state(TASK_RUNNING);
-               run_comp_task(cct);
-       }
-       spin_unlock_irq(&cct->task_lock);
-}
-
-static struct smp_hotplug_thread comp_pool_threads = {
-       .thread_should_run      = comp_task_should_run,
-       .thread_fn              = comp_task,
-       .thread_comm            = "ehca_comp/%u",
-       .cleanup                = comp_task_stop,
-       .park                   = comp_task_park,
-};
-
-int ehca_create_comp_pool(void)
-{
-       int cpu, ret = -ENOMEM;
-
-       if (!ehca_scaling_code)
-               return 0;
-
-       pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
-       if (pool == NULL)
-               return -ENOMEM;
-
-       spin_lock_init(&pool->last_cpu_lock);
-       pool->last_cpu = cpumask_any(cpu_online_mask);
-
-       pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
-       if (!pool->cpu_comp_tasks)
-               goto out_pool;
-
-       pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
-       if (!pool->cpu_comp_threads)
-               goto out_tasks;
-
-       for_each_present_cpu(cpu) {
-               struct ehca_cpu_comp_task *cct;
-
-               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-               spin_lock_init(&cct->task_lock);
-               INIT_LIST_HEAD(&cct->cq_list);
-       }
-
-       comp_pool_threads.store = pool->cpu_comp_threads;
-       ret = smpboot_register_percpu_thread(&comp_pool_threads);
-       if (ret)
-               goto out_threads;
-
-       pr_info("eHCA scaling code enabled\n");
-       return ret;
-
-out_threads:
-       free_percpu(pool->cpu_comp_threads);
-out_tasks:
-       free_percpu(pool->cpu_comp_tasks);
-out_pool:
-       kfree(pool);
-       return ret;
-}
-
-void ehca_destroy_comp_pool(void)
-{
-       if (!ehca_scaling_code)
-               return;
-
-       smpboot_unregister_percpu_thread(&comp_pool_threads);
-
-       free_percpu(pool->cpu_comp_threads);
-       free_percpu(pool->cpu_comp_tasks);
-       kfree(pool);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h
deleted file mode 100644 (file)
index 5370199..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions and structs for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IRQ_H
-#define __EHCA_IRQ_H
-
-
-struct ehca_shca;
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-
-int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
-void ehca_tasklet_neq(unsigned long data);
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
-void ehca_tasklet_eq(unsigned long data);
-void ehca_process_eq(struct ehca_shca *shca, int is_irq);
-
-struct ehca_cpu_comp_task {
-       struct list_head cq_list;
-       spinlock_t task_lock;
-       int cq_jobs;
-       int active;
-};
-
-struct ehca_comp_pool {
-       struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
-       struct task_struct * __percpu *cpu_comp_threads;
-       int last_cpu;
-       spinlock_t last_cpu_lock;
-};
-
-int ehca_create_comp_pool(void);
-void ehca_destroy_comp_pool(void);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
deleted file mode 100644 (file)
index 80e6a3d..0000000
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions for internal functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Dietmar Decker <ddecker@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IVERBS_H__
-#define __EHCA_IVERBS_H__
-
-#include "ehca_classes.h"
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                     struct ib_udata *uhw);
-
-int ehca_query_port(struct ib_device *ibdev, u8 port,
-                   struct ib_port_attr *props);
-
-enum rdma_protocol_type
-ehca_query_protocol(struct ib_device *device, u8 port_num);
-
-int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
-                       struct ehca_sma_attr *attr);
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
-                  union ib_gid *gid);
-
-int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
-                    struct ib_port_modify *props);
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-                           struct ib_ucontext *context,
-                           struct ib_udata *udata);
-
-int ehca_dealloc_pd(struct ib_pd *pd);
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_destroy_ah(struct ib_ah *ah);
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *phys_buf_array,
-                              int num_phys_buf,
-                              int mr_access_flags, u64 *iova_start);
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                              u64 virt, int mr_access_flags,
-                              struct ib_udata *udata);
-
-int ehca_rereg_phys_mr(struct ib_mr *mr,
-                      int mr_rereg_mask,
-                      struct ib_pd *pd,
-                      struct ib_phys_buf *phys_buf_array,
-                      int num_phys_buf, int mr_access_flags, u64 *iova_start);
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
-int ehca_dereg_mr(struct ib_mr *mr);
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind);
-
-int ehca_dealloc_mw(struct ib_mw *mw);
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-                             int mr_access_flags,
-                             struct ib_fmr_attr *fmr_attr);
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-                     u64 *page_list, int list_len, u64 iova);
-
-int ehca_unmap_fmr(struct list_head *fmr_list);
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr);
-
-enum ehca_eq_type {
-       EHCA_EQ = 0, /* Event Queue              */
-       EHCA_NEQ     /* Notification Event Queue */
-};
-
-int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
-                  enum ehca_eq_type type, const u32 length);
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-                            const struct ib_cq_init_attr *attr,
-                            struct ib_ucontext *context,
-                            struct ib_udata *udata);
-
-int ehca_destroy_cq(struct ib_cq *cq);
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
-
-int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-                            struct ib_qp_init_attr *init_attr,
-                            struct ib_udata *udata);
-
-int ehca_destroy_qp(struct ib_qp *qp);
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-                  struct ib_udata *udata);
-
-int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
-                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
-                  struct ib_send_wr **bad_send_wr);
-
-int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-                  struct ib_recv_wr **bad_recv_wr);
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-                      struct ib_recv_wr *recv_wr,
-                      struct ib_recv_wr **bad_recv_wr);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-                              struct ib_srq_init_attr *init_attr,
-                              struct ib_udata *udata);
-
-int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
-                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-
-int ehca_destroy_srq(struct ib_srq *srq);
-
-u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
-                   struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-                                       struct ib_udata *udata);
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context);
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                    const struct ib_mad_hdr *in, size_t in_mad_size,
-                    struct ib_mad_hdr *out, size_t *out_mad_size,
-                    u16 *out_mad_pkey_index);
-
-void ehca_poll_eqs(unsigned long data);
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-                 enum ib_rate path_rate, u32 *ipd);
-
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
-
-#ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(gfp_t flags);
-void ehca_free_fw_ctrlblock(void *ptr);
-#else
-#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
-#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
-#endif
-
-void ehca_recover_sqp(struct ib_qp *sqp);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
deleted file mode 100644 (file)
index 860b974..0000000
+++ /dev/null
@@ -1,1122 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  module start stop, hca detection
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <linux/slab.h>
-#endif
-
-#include <linux/notifier.h>
-#include <linux/memory.h>
-#include <rdma/ib_mad.h>
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-#define HCAD_VERSION "0029"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
-MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION(HCAD_VERSION);
-
-static bool ehca_open_aqp1    = 0;
-static int ehca_hw_level      = 0;
-static bool ehca_poll_all_eqs = 1;
-
-int ehca_debug_level   = 0;
-int ehca_nr_ports      = -1;
-bool ehca_use_hp_mr    = 0;
-int ehca_port_act_time = 30;
-int ehca_static_rate   = -1;
-bool ehca_scaling_code = 0;
-int ehca_lock_hcalls   = -1;
-int ehca_max_cq        = -1;
-int ehca_max_qp        = -1;
-
-module_param_named(open_aqp1,     ehca_open_aqp1,     bool, S_IRUGO);
-module_param_named(debug_level,   ehca_debug_level,   int,  S_IRUGO);
-module_param_named(hw_level,      ehca_hw_level,      int,  S_IRUGO);
-module_param_named(nr_ports,      ehca_nr_ports,      int,  S_IRUGO);
-module_param_named(use_hp_mr,     ehca_use_hp_mr,     bool, S_IRUGO);
-module_param_named(port_act_time, ehca_port_act_time, int,  S_IRUGO);
-module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
-module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
-module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
-module_param_named(lock_hcalls,   ehca_lock_hcalls,   bint, S_IRUGO);
-module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
-module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
-
-MODULE_PARM_DESC(open_aqp1,
-                "Open AQP1 on startup (default: no)");
-MODULE_PARM_DESC(debug_level,
-                "Amount of debug output (0: none (default), 1: traces, "
-                "2: some dumps, 3: lots)");
-MODULE_PARM_DESC(hw_level,
-                "Hardware level (0: autosensing (default), "
-                "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
-MODULE_PARM_DESC(nr_ports,
-                "number of connected ports (-1: autodetect (default), "
-                "1: port one only, 2: two ports)");
-MODULE_PARM_DESC(use_hp_mr,
-                "Use high performance MRs (default: no)");
-MODULE_PARM_DESC(port_act_time,
-                "Time to wait for port activation (default: 30 sec)");
-MODULE_PARM_DESC(poll_all_eqs,
-                "Poll all event queues periodically (default: yes)");
-MODULE_PARM_DESC(static_rate,
-                "Set permanent static rate (default: no static rate)");
-MODULE_PARM_DESC(scaling_code,
-                "Enable scaling code (default: no)");
-MODULE_PARM_DESC(lock_hcalls,
-                "Serialize all hCalls made by the driver "
-                "(default: autodetect)");
-MODULE_PARM_DESC(number_of_cqs,
-               "Max number of CQs which can be allocated "
-               "(default: autodetect)");
-MODULE_PARM_DESC(number_of_qps,
-               "Max number of QPs which can be allocated "
-               "(default: autodetect)");
-
-DEFINE_RWLOCK(ehca_qp_idr_lock);
-DEFINE_RWLOCK(ehca_cq_idr_lock);
-DEFINE_IDR(ehca_qp_idr);
-DEFINE_IDR(ehca_cq_idr);
-
-static LIST_HEAD(shca_list); /* list of all registered ehcas */
-DEFINE_SPINLOCK(shca_list_lock);
-
-static struct timer_list poll_eqs_timer;
-
-#ifdef CONFIG_PPC_64K_PAGES
-static struct kmem_cache *ctblk_cache;
-
-void *ehca_alloc_fw_ctrlblock(gfp_t flags)
-{
-       void *ret = kmem_cache_zalloc(ctblk_cache, flags);
-       if (!ret)
-               ehca_gen_err("Out of memory for ctblk");
-       return ret;
-}
-
-void ehca_free_fw_ctrlblock(void *ptr)
-{
-       if (ptr)
-               kmem_cache_free(ctblk_cache, ptr);
-
-}
-#endif
-
-int ehca2ib_return_code(u64 ehca_rc)
-{
-       switch (ehca_rc) {
-       case H_SUCCESS:
-               return 0;
-       case H_RESOURCE:             /* Resource in use */
-       case H_BUSY:
-               return -EBUSY;
-       case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-       case H_CONSTRAINED:          /* resource constraint */
-       case H_NO_MEM:
-               return -ENOMEM;
-       default:
-               return -EINVAL;
-       }
-}
-
-static int ehca_create_slab_caches(void)
-{
-       int ret;
-
-       ret = ehca_init_pd_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create PD SLAB cache.");
-               return ret;
-       }
-
-       ret = ehca_init_cq_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create CQ SLAB cache.");
-               goto create_slab_caches2;
-       }
-
-       ret = ehca_init_qp_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create QP SLAB cache.");
-               goto create_slab_caches3;
-       }
-
-       ret = ehca_init_av_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create AV SLAB cache.");
-               goto create_slab_caches4;
-       }
-
-       ret = ehca_init_mrmw_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create MR&MW SLAB cache.");
-               goto create_slab_caches5;
-       }
-
-       ret = ehca_init_small_qp_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create small queue SLAB cache.");
-               goto create_slab_caches6;
-       }
-
-#ifdef CONFIG_PPC_64K_PAGES
-       ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
-                                       EHCA_PAGESIZE, H_CB_ALIGNMENT,
-                                       SLAB_HWCACHE_ALIGN,
-                                       NULL);
-       if (!ctblk_cache) {
-               ehca_gen_err("Cannot create ctblk SLAB cache.");
-               ehca_cleanup_small_qp_cache();
-               ret = -ENOMEM;
-               goto create_slab_caches6;
-       }
-#endif
-       return 0;
-
-create_slab_caches6:
-       ehca_cleanup_mrmw_cache();
-
-create_slab_caches5:
-       ehca_cleanup_av_cache();
-
-create_slab_caches4:
-       ehca_cleanup_qp_cache();
-
-create_slab_caches3:
-       ehca_cleanup_cq_cache();
-
-create_slab_caches2:
-       ehca_cleanup_pd_cache();
-
-       return ret;
-}
-
-static void ehca_destroy_slab_caches(void)
-{
-       ehca_cleanup_small_qp_cache();
-       ehca_cleanup_mrmw_cache();
-       ehca_cleanup_av_cache();
-       ehca_cleanup_qp_cache();
-       ehca_cleanup_cq_cache();
-       ehca_cleanup_pd_cache();
-#ifdef CONFIG_PPC_64K_PAGES
-       kmem_cache_destroy(ctblk_cache);
-#endif
-}
-
-#define EHCA_HCAAVER  EHCA_BMASK_IBM(32, 39)
-#define EHCA_REVID    EHCA_BMASK_IBM(40, 63)
-
-static struct cap_descr {
-       u64 mask;
-       char *descr;
-} hca_cap_descr[] = {
-       { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
-       { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
-       { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
-       { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
-       { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
-       { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
-       { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
-       { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
-       { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
-       { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
-       { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
-       { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
-       { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
-       { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
-       { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
-       { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
-       { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
-       { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
-};
-
-static int ehca_sense_attributes(struct ehca_shca *shca)
-{
-       int i, ret = 0;
-       u64 h_ret;
-       struct hipz_query_hca *rblock;
-       struct hipz_query_port *port;
-       const char *loc_code;
-
-       static const u32 pgsize_map[] = {
-               HCA_CAP_MR_PGSIZE_4K,  0x1000,
-               HCA_CAP_MR_PGSIZE_64K, 0x10000,
-               HCA_CAP_MR_PGSIZE_1M,  0x100000,
-               HCA_CAP_MR_PGSIZE_16M, 0x1000000,
-       };
-
-       ehca_gen_dbg("Probing adapter %s...",
-                    shca->ofdev->dev.of_node->full_name);
-       loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
-                                  NULL);
-       if (loc_code)
-               ehca_gen_dbg(" ... location lode=%s", loc_code);
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_gen_err("Cannot allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query device properties. h_ret=%lli",
-                            h_ret);
-               ret = -EPERM;
-               goto sense_attributes1;
-       }
-
-       if (ehca_nr_ports == 1)
-               shca->num_ports = 1;
-       else
-               shca->num_ports = (u8)rblock->num_ports;
-
-       ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
-
-       if (ehca_hw_level == 0) {
-               u32 hcaaver;
-               u32 revid;
-
-               hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
-               revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
-
-               ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
-
-               if (hcaaver == 1) {
-                       if (revid <= 3)
-                               shca->hw_level = 0x10 | (revid + 1);
-                       else
-                               shca->hw_level = 0x14;
-               } else if (hcaaver == 2) {
-                       if (revid == 0)
-                               shca->hw_level = 0x21;
-                       else if (revid == 0x10)
-                               shca->hw_level = 0x22;
-                       else if (revid == 0x20 || revid == 0x21)
-                               shca->hw_level = 0x23;
-               }
-
-               if (!shca->hw_level) {
-                       ehca_gen_warn("unknown hardware version"
-                                     " - assuming default level");
-                       shca->hw_level = 0x22;
-               }
-       } else
-               shca->hw_level = ehca_hw_level;
-       ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
-
-       shca->hca_cap = rblock->hca_cap_indicators;
-       ehca_gen_dbg(" ... HCA capabilities:");
-       for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
-               if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
-                       ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
-
-       /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
-        * a firmware property, so it's valid across all adapters
-        */
-       if (ehca_lock_hcalls == -1)
-               ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
-                                       shca->hca_cap);
-
-       /* translate supported MR page sizes; always support 4K */
-       shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
-       for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
-               if (rblock->memory_page_size_supported & pgsize_map[i])
-                       shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
-
-       /* Set maximum number of CQs and QPs to calculate EQ size */
-       if (shca->max_num_qps == -1)
-               shca->max_num_qps = min_t(int, rblock->max_qp,
-                                         EHCA_MAX_NUM_QUEUES);
-       else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
-               ehca_gen_warn("The requested number of QPs is out of range "
-                             "(1 - %i) specified by HW. Value is set to %i",
-                             rblock->max_qp, rblock->max_qp);
-               shca->max_num_qps = rblock->max_qp;
-       }
-
-       if (shca->max_num_cqs == -1)
-               shca->max_num_cqs = min_t(int, rblock->max_cq,
-                                         EHCA_MAX_NUM_QUEUES);
-       else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
-               ehca_gen_warn("The requested number of CQs is out of range "
-                             "(1 - %i) specified by HW. Value is set to %i",
-                             rblock->max_cq, rblock->max_cq);
-       }
-
-       /* query max MTU from first port -- it's the same for all ports */
-       port = (struct hipz_query_port *)rblock;
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
-       if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query port properties. h_ret=%lli",
-                            h_ret);
-               ret = -EPERM;
-               goto sense_attributes1;
-       }
-
-       shca->max_mtu = port->max_mtu;
-
-sense_attributes1:
-       ehca_free_fw_ctrlblock(rblock);
-       return ret;
-}
-
-static int init_node_guid(struct ehca_shca *shca)
-{
-       int ret = 0;
-       struct hipz_query_hca *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query device properties");
-               ret = -EINVAL;
-               goto init_node_guid1;
-       }
-
-       memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
-
-init_node_guid1:
-       ehca_free_fw_ctrlblock(rblock);
-       return ret;
-}
-
-static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num,
-                              struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = ehca_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-       return 0;
-}
-
-static int ehca_init_device(struct ehca_shca *shca)
-{
-       int ret;
-
-       ret = init_node_guid(shca);
-       if (ret)
-               return ret;
-
-       strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
-       shca->ib_device.owner               = THIS_MODULE;
-
-       shca->ib_device.uverbs_abi_ver      = 8;
-       shca->ib_device.uverbs_cmd_mask     =
-               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
-               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-
-       shca->ib_device.node_type           = RDMA_NODE_IB_CA;
-       shca->ib_device.phys_port_cnt       = shca->num_ports;
-       shca->ib_device.num_comp_vectors    = 1;
-       shca->ib_device.dma_device          = &shca->ofdev->dev;
-       shca->ib_device.query_device        = ehca_query_device;
-       shca->ib_device.query_port          = ehca_query_port;
-       shca->ib_device.query_gid           = ehca_query_gid;
-       shca->ib_device.query_pkey          = ehca_query_pkey;
-       /* shca->in_device.modify_device    = ehca_modify_device    */
-       shca->ib_device.modify_port         = ehca_modify_port;
-       shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
-       shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
-       shca->ib_device.alloc_pd            = ehca_alloc_pd;
-       shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
-       shca->ib_device.create_ah           = ehca_create_ah;
-       /* shca->ib_device.modify_ah        = ehca_modify_ah;       */
-       shca->ib_device.query_ah            = ehca_query_ah;
-       shca->ib_device.destroy_ah          = ehca_destroy_ah;
-       shca->ib_device.create_qp           = ehca_create_qp;
-       shca->ib_device.modify_qp           = ehca_modify_qp;
-       shca->ib_device.query_qp            = ehca_query_qp;
-       shca->ib_device.destroy_qp          = ehca_destroy_qp;
-       shca->ib_device.post_send           = ehca_post_send;
-       shca->ib_device.post_recv           = ehca_post_recv;
-       shca->ib_device.create_cq           = ehca_create_cq;
-       shca->ib_device.destroy_cq          = ehca_destroy_cq;
-       shca->ib_device.resize_cq           = ehca_resize_cq;
-       shca->ib_device.poll_cq             = ehca_poll_cq;
-       /* shca->ib_device.peek_cq          = ehca_peek_cq;         */
-       shca->ib_device.req_notify_cq       = ehca_req_notify_cq;
-       /* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
-       shca->ib_device.get_dma_mr          = ehca_get_dma_mr;
-       shca->ib_device.reg_phys_mr         = ehca_reg_phys_mr;
-       shca->ib_device.reg_user_mr         = ehca_reg_user_mr;
-       shca->ib_device.query_mr            = ehca_query_mr;
-       shca->ib_device.dereg_mr            = ehca_dereg_mr;
-       shca->ib_device.rereg_phys_mr       = ehca_rereg_phys_mr;
-       shca->ib_device.alloc_mw            = ehca_alloc_mw;
-       shca->ib_device.bind_mw             = ehca_bind_mw;
-       shca->ib_device.dealloc_mw          = ehca_dealloc_mw;
-       shca->ib_device.alloc_fmr           = ehca_alloc_fmr;
-       shca->ib_device.map_phys_fmr        = ehca_map_phys_fmr;
-       shca->ib_device.unmap_fmr           = ehca_unmap_fmr;
-       shca->ib_device.dealloc_fmr         = ehca_dealloc_fmr;
-       shca->ib_device.attach_mcast        = ehca_attach_mcast;
-       shca->ib_device.detach_mcast        = ehca_detach_mcast;
-       shca->ib_device.process_mad         = ehca_process_mad;
-       shca->ib_device.mmap                = ehca_mmap;
-       shca->ib_device.dma_ops             = &ehca_dma_mapping_ops;
-       shca->ib_device.get_port_immutable  = ehca_port_immutable;
-
-       if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-               shca->ib_device.uverbs_cmd_mask |=
-                       (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
-
-               shca->ib_device.create_srq          = ehca_create_srq;
-               shca->ib_device.modify_srq          = ehca_modify_srq;
-               shca->ib_device.query_srq           = ehca_query_srq;
-               shca->ib_device.destroy_srq         = ehca_destroy_srq;
-               shca->ib_device.post_srq_recv       = ehca_post_srq_recv;
-       }
-
-       return ret;
-}
-
-static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
-{
-       struct ehca_sport *sport = &shca->sport[port - 1];
-       struct ib_cq *ibcq;
-       struct ib_qp *ibqp;
-       struct ib_qp_init_attr qp_init_attr;
-       struct ib_cq_init_attr cq_attr = {};
-       int ret;
-
-       if (sport->ibcq_aqp1) {
-               ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
-               return -EPERM;
-       }
-
-       cq_attr.cqe = 10;
-       ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1),
-                           &cq_attr);
-       if (IS_ERR(ibcq)) {
-               ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
-               return PTR_ERR(ibcq);
-       }
-       sport->ibcq_aqp1 = ibcq;
-
-       if (sport->ibqp_sqp[IB_QPT_GSI]) {
-               ehca_err(&shca->ib_device, "AQP1 QP is already created.");
-               ret = -EPERM;
-               goto create_aqp1;
-       }
-
-       memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
-       qp_init_attr.send_cq          = ibcq;
-       qp_init_attr.recv_cq          = ibcq;
-       qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
-       qp_init_attr.cap.max_send_wr  = 100;
-       qp_init_attr.cap.max_recv_wr  = 100;
-       qp_init_attr.cap.max_send_sge = 2;
-       qp_init_attr.cap.max_recv_sge = 1;
-       qp_init_attr.qp_type          = IB_QPT_GSI;
-       qp_init_attr.port_num         = port;
-       qp_init_attr.qp_context       = NULL;
-       qp_init_attr.event_handler    = NULL;
-       qp_init_attr.srq              = NULL;
-
-       ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
-       if (IS_ERR(ibqp)) {
-               ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
-               ret = PTR_ERR(ibqp);
-               goto create_aqp1;
-       }
-       sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
-
-       return 0;
-
-create_aqp1:
-       ib_destroy_cq(sport->ibcq_aqp1);
-       return ret;
-}
-
-static int ehca_destroy_aqp1(struct ehca_sport *sport)
-{
-       int ret;
-
-       ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
-       if (ret) {
-               ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
-               return ret;
-       }
-
-       ret = ib_destroy_cq(sport->ibcq_aqp1);
-       if (ret)
-               ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
-
-       return ret;
-}
-
-static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
-{
-       return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
-}
-
-static ssize_t ehca_store_debug_level(struct device_driver *ddp,
-                                     const char *buf, size_t count)
-{
-       int value = (*buf) - '0';
-       if (value >= 0 && value <= 9)
-               ehca_debug_level = value;
-       return 1;
-}
-
-static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
-                  ehca_show_debug_level, ehca_store_debug_level);
-
-static struct attribute *ehca_drv_attrs[] = {
-       &driver_attr_debug_level.attr,
-       NULL
-};
-
-static struct attribute_group ehca_drv_attr_grp = {
-       .attrs = ehca_drv_attrs
-};
-
-static const struct attribute_group *ehca_drv_attr_groups[] = {
-       &ehca_drv_attr_grp,
-       NULL,
-};
-
-#define EHCA_RESOURCE_ATTR(name)                                           \
-static ssize_t  ehca_show_##name(struct device *dev,                       \
-                                struct device_attribute *attr,            \
-                                char *buf)                                \
-{                                                                         \
-       struct ehca_shca *shca;                                            \
-       struct hipz_query_hca *rblock;                                     \
-       int data;                                                          \
-                                                                          \
-       shca = dev_get_drvdata(dev);                                       \
-                                                                          \
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);                      \
-       if (!rblock) {                                                     \
-               dev_err(dev, "Can't allocate rblock memory.\n");           \
-               return 0;                                                  \
-       }                                                                  \
-                                                                          \
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
-               dev_err(dev, "Can't query device properties\n");           \
-               ehca_free_fw_ctrlblock(rblock);                            \
-               return 0;                                                  \
-       }                                                                  \
-                                                                          \
-       data = rblock->name;                                               \
-       ehca_free_fw_ctrlblock(rblock);                                    \
-                                                                          \
-       if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))     \
-               return snprintf(buf, 256, "1\n");                          \
-       else                                                               \
-               return snprintf(buf, 256, "%d\n", data);                   \
-                                                                          \
-}                                                                         \
-static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
-
-EHCA_RESOURCE_ATTR(num_ports);
-EHCA_RESOURCE_ATTR(hw_ver);
-EHCA_RESOURCE_ATTR(max_eq);
-EHCA_RESOURCE_ATTR(cur_eq);
-EHCA_RESOURCE_ATTR(max_cq);
-EHCA_RESOURCE_ATTR(cur_cq);
-EHCA_RESOURCE_ATTR(max_qp);
-EHCA_RESOURCE_ATTR(cur_qp);
-EHCA_RESOURCE_ATTR(max_mr);
-EHCA_RESOURCE_ATTR(cur_mr);
-EHCA_RESOURCE_ATTR(max_mw);
-EHCA_RESOURCE_ATTR(cur_mw);
-EHCA_RESOURCE_ATTR(max_pd);
-EHCA_RESOURCE_ATTR(max_ah);
-
-static ssize_t ehca_show_adapter_handle(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
-{
-       struct ehca_shca *shca = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
-
-}
-static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
-
-static struct attribute *ehca_dev_attrs[] = {
-       &dev_attr_adapter_handle.attr,
-       &dev_attr_num_ports.attr,
-       &dev_attr_hw_ver.attr,
-       &dev_attr_max_eq.attr,
-       &dev_attr_cur_eq.attr,
-       &dev_attr_max_cq.attr,
-       &dev_attr_cur_cq.attr,
-       &dev_attr_max_qp.attr,
-       &dev_attr_cur_qp.attr,
-       &dev_attr_max_mr.attr,
-       &dev_attr_cur_mr.attr,
-       &dev_attr_max_mw.attr,
-       &dev_attr_cur_mw.attr,
-       &dev_attr_max_pd.attr,
-       &dev_attr_max_ah.attr,
-       NULL
-};
-
-static struct attribute_group ehca_dev_attr_grp = {
-       .attrs = ehca_dev_attrs
-};
-
-static int ehca_probe(struct platform_device *dev)
-{
-       struct ehca_shca *shca;
-       const u64 *handle;
-       struct ib_pd *ibpd;
-       int ret, i, eq_size;
-       unsigned long flags;
-
-       handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
-       if (!handle) {
-               ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
-                            dev->dev.of_node->full_name);
-               return -ENODEV;
-       }
-
-       if (!(*handle)) {
-               ehca_gen_err("Wrong eHCA handle for adapter: %s.",
-                            dev->dev.of_node->full_name);
-               return -ENODEV;
-       }
-
-       shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
-       if (!shca) {
-               ehca_gen_err("Cannot allocate shca memory.");
-               return -ENOMEM;
-       }
-
-       mutex_init(&shca->modify_mutex);
-       atomic_set(&shca->num_cqs, 0);
-       atomic_set(&shca->num_qps, 0);
-       shca->max_num_qps = ehca_max_qp;
-       shca->max_num_cqs = ehca_max_cq;
-
-       for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
-               spin_lock_init(&shca->sport[i].mod_sqp_lock);
-
-       shca->ofdev = dev;
-       shca->ipz_hca_handle.handle = *handle;
-       dev_set_drvdata(&dev->dev, shca);
-
-       ret = ehca_sense_attributes(shca);
-       if (ret < 0) {
-               ehca_gen_err("Cannot sense eHCA attributes.");
-               goto probe1;
-       }
-
-       ret = ehca_init_device(shca);
-       if (ret) {
-               ehca_gen_err("Cannot init ehca  device struct");
-               goto probe1;
-       }
-
-       eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
-       /* create event queues */
-       ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create EQ.");
-               goto probe1;
-       }
-
-       ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create NEQ.");
-               goto probe3;
-       }
-
-       /* create internal protection domain */
-       ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
-       if (IS_ERR(ibpd)) {
-               ehca_err(&shca->ib_device, "Cannot create internal PD.");
-               ret = PTR_ERR(ibpd);
-               goto probe4;
-       }
-
-       shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
-       shca->pd->ib_pd.device = &shca->ib_device;
-
-       /* create internal max MR */
-       ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
-
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
-                        ret);
-               goto probe5;
-       }
-
-       ret = ib_register_device(&shca->ib_device, NULL);
-       if (ret) {
-               ehca_err(&shca->ib_device,
-                        "ib_register_device() failed ret=%i", ret);
-               goto probe6;
-       }
-
-       /* create AQP1 for port 1 */
-       if (ehca_open_aqp1 == 1) {
-               shca->sport[0].port_state = IB_PORT_DOWN;
-               ret = ehca_create_aqp1(shca, 1);
-               if (ret) {
-                       ehca_err(&shca->ib_device,
-                                "Cannot create AQP1 for port 1.");
-                       goto probe7;
-               }
-       }
-
-       /* create AQP1 for port 2 */
-       if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
-               shca->sport[1].port_state = IB_PORT_DOWN;
-               ret = ehca_create_aqp1(shca, 2);
-               if (ret) {
-                       ehca_err(&shca->ib_device,
-                                "Cannot create AQP1 for port 2.");
-                       goto probe8;
-               }
-       }
-
-       ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-       if (ret) /* only complain; we can live without attributes */
-               ehca_err(&shca->ib_device,
-                        "Cannot create device attributes  ret=%d", ret);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       list_add(&shca->shca_list, &shca_list);
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       return 0;
-
-probe8:
-       ret = ehca_destroy_aqp1(&shca->sport[0]);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy AQP1 for port 1. ret=%i", ret);
-
-probe7:
-       ib_unregister_device(&shca->ib_device);
-
-probe6:
-       ret = ehca_dereg_internal_maxmr(shca);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal MR. ret=%x", ret);
-
-probe5:
-       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal PD. ret=%x", ret);
-
-probe4:
-       ret = ehca_destroy_eq(shca, &shca->neq);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy NEQ. ret=%x", ret);
-
-probe3:
-       ret = ehca_destroy_eq(shca, &shca->eq);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy EQ. ret=%x", ret);
-
-probe1:
-       ib_dealloc_device(&shca->ib_device);
-
-       return -EINVAL;
-}
-
-static int ehca_remove(struct platform_device *dev)
-{
-       struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
-       unsigned long flags;
-       int ret;
-
-       sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-
-       if (ehca_open_aqp1 == 1) {
-               int i;
-               for (i = 0; i < shca->num_ports; i++) {
-                       ret = ehca_destroy_aqp1(&shca->sport[i]);
-                       if (ret)
-                               ehca_err(&shca->ib_device,
-                                        "Cannot destroy AQP1 for port %x "
-                                        "ret=%i", ret, i);
-               }
-       }
-
-       ib_unregister_device(&shca->ib_device);
-
-       ret = ehca_dereg_internal_maxmr(shca);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal MR. ret=%i", ret);
-
-       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal PD. ret=%i", ret);
-
-       ret = ehca_destroy_eq(shca, &shca->eq);
-       if (ret)
-               ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
-
-       ret = ehca_destroy_eq(shca, &shca->neq);
-       if (ret)
-               ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
-
-       ib_dealloc_device(&shca->ib_device);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       list_del(&shca->shca_list);
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       return ret;
-}
-
-static struct of_device_id ehca_device_table[] =
-{
-       {
-               .name       = "lhca",
-               .compatible = "IBM,lhca",
-       },
-       {},
-};
-MODULE_DEVICE_TABLE(of, ehca_device_table);
-
-static struct platform_driver ehca_driver = {
-       .probe       = ehca_probe,
-       .remove      = ehca_remove,
-       .driver = {
-               .name = "ehca",
-               .owner = THIS_MODULE,
-               .groups = ehca_drv_attr_groups,
-               .of_match_table = ehca_device_table,
-       },
-};
-
-void ehca_poll_eqs(unsigned long data)
-{
-       struct ehca_shca *shca;
-
-       spin_lock(&shca_list_lock);
-       list_for_each_entry(shca, &shca_list, shca_list) {
-               if (shca->eq.is_initialized) {
-                       /* call deadman proc only if eq ptr does not change */
-                       struct ehca_eq *eq = &shca->eq;
-                       int max = 3;
-                       volatile u64 q_ofs, q_ofs2;
-                       unsigned long flags;
-                       spin_lock_irqsave(&eq->spinlock, flags);
-                       q_ofs = eq->ipz_queue.current_q_offset;
-                       spin_unlock_irqrestore(&eq->spinlock, flags);
-                       do {
-                               spin_lock_irqsave(&eq->spinlock, flags);
-                               q_ofs2 = eq->ipz_queue.current_q_offset;
-                               spin_unlock_irqrestore(&eq->spinlock, flags);
-                               max--;
-                       } while (q_ofs == q_ofs2 && max > 0);
-                       if (q_ofs == q_ofs2)
-                               ehca_process_eq(shca, 0);
-               }
-       }
-       mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
-       spin_unlock(&shca_list_lock);
-}
-
-static int ehca_mem_notifier(struct notifier_block *nb,
-                            unsigned long action, void *data)
-{
-       static unsigned long ehca_dmem_warn_time;
-       unsigned long flags;
-
-       switch (action) {
-       case MEM_CANCEL_OFFLINE:
-       case MEM_CANCEL_ONLINE:
-       case MEM_ONLINE:
-       case MEM_OFFLINE:
-               return NOTIFY_OK;
-       case MEM_GOING_ONLINE:
-       case MEM_GOING_OFFLINE:
-               /* only ok if no hca is attached to the lpar */
-               spin_lock_irqsave(&shca_list_lock, flags);
-               if (list_empty(&shca_list)) {
-                       spin_unlock_irqrestore(&shca_list_lock, flags);
-                       return NOTIFY_OK;
-               } else {
-                       spin_unlock_irqrestore(&shca_list_lock, flags);
-                       if (printk_timed_ratelimit(&ehca_dmem_warn_time,
-                                                  30 * 1000))
-                               ehca_gen_err("DMEM operations are not allowed"
-                                            "in conjunction with eHCA");
-                       return NOTIFY_BAD;
-               }
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block ehca_mem_nb = {
-       .notifier_call = ehca_mem_notifier,
-};
-
-static int __init ehca_module_init(void)
-{
-       int ret;
-
-       printk(KERN_INFO "eHCA Infiniband Device Driver "
-              "(Version " HCAD_VERSION ")\n");
-
-       ret = ehca_create_comp_pool();
-       if (ret) {
-               ehca_gen_err("Cannot create comp pool.");
-               return ret;
-       }
-
-       ret = ehca_create_slab_caches();
-       if (ret) {
-               ehca_gen_err("Cannot create SLAB caches");
-               ret = -ENOMEM;
-               goto module_init1;
-       }
-
-       ret = ehca_create_busmap();
-       if (ret) {
-               ehca_gen_err("Cannot create busmap.");
-               goto module_init2;
-       }
-
-       ret = ibmebus_register_driver(&ehca_driver);
-       if (ret) {
-               ehca_gen_err("Cannot register eHCA device driver");
-               ret = -EINVAL;
-               goto module_init3;
-       }
-
-       ret = register_memory_notifier(&ehca_mem_nb);
-       if (ret) {
-               ehca_gen_err("Failed registering memory add/remove notifier");
-               goto module_init4;
-       }
-
-       if (ehca_poll_all_eqs != 1) {
-               ehca_gen_err("WARNING!!!");
-               ehca_gen_err("It is possible to lose interrupts.");
-       } else {
-               init_timer(&poll_eqs_timer);
-               poll_eqs_timer.function = ehca_poll_eqs;
-               poll_eqs_timer.expires = jiffies + HZ;
-               add_timer(&poll_eqs_timer);
-       }
-
-       return 0;
-
-module_init4:
-       ibmebus_unregister_driver(&ehca_driver);
-
-module_init3:
-       ehca_destroy_busmap();
-
-module_init2:
-       ehca_destroy_slab_caches();
-
-module_init1:
-       ehca_destroy_comp_pool();
-       return ret;
-};
-
-static void __exit ehca_module_exit(void)
-{
-       if (ehca_poll_all_eqs == 1)
-               del_timer_sync(&poll_eqs_timer);
-
-       ibmebus_unregister_driver(&ehca_driver);
-
-       unregister_memory_notifier(&ehca_mem_nb);
-
-       ehca_destroy_busmap();
-
-       ehca_destroy_slab_caches();
-
-       ehca_destroy_comp_pool();
-
-       idr_destroy(&ehca_cq_idr);
-       idr_destroy(&ehca_qp_idr);
-};
-
-module_init(ehca_module_init);
-module_exit(ehca_module_exit);
diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c
deleted file mode 100644 (file)
index cec1815..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  mcast  functions
- *
- *  Authors: Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define MAX_MC_LID 0xFFFE
-#define MIN_MC_LID 0xC000      /* Multicast limits */
-#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
-#define EHCA_VALID_MULTICAST_LID(lid) \
-       (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
-
-int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-                                             ib_device);
-       union ib_gid my_gid;
-       u64 subnet_prefix, interface_id, h_ret;
-
-       if (ibqp->qp_type != IB_QPT_UD) {
-               ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
-               return -EINVAL;
-       }
-
-       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-               ehca_err(ibqp->device, "invalid mulitcast gid");
-               return -EINVAL;
-       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-               return -EINVAL;
-       }
-
-       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-       interface_id = be64_to_cpu(my_gid.global.interface_id);
-       h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
-                                  my_qp->ipz_qp_handle,
-                                  my_qp->galpas.kernel,
-                                  lid, subnet_prefix, interface_id);
-       if (h_ret != H_SUCCESS)
-               ehca_err(ibqp->device,
-                        "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
-                        "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-       return ehca2ib_return_code(h_ret);
-}
-
-int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(ibqp->pd->device,
-                                             struct ehca_shca, ib_device);
-       union ib_gid my_gid;
-       u64 subnet_prefix, interface_id, h_ret;
-
-       if (ibqp->qp_type != IB_QPT_UD) {
-               ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
-               return -EINVAL;
-       }
-
-       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-               ehca_err(ibqp->device, "invalid mulitcast gid");
-               return -EINVAL;
-       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-               return -EINVAL;
-       }
-
-       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-       interface_id = be64_to_cpu(my_gid.global.interface_id);
-       h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
-                                  my_qp->ipz_qp_handle,
-                                  my_qp->galpas.kernel,
-                                  lid, subnet_prefix, interface_id);
-       if (h_ret != H_SUCCESS)
-               ehca_err(ibqp->device,
-                        "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
-                        "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-       return ehca2ib_return_code(h_ret);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
deleted file mode 100644 (file)
index 553e883..0000000
+++ /dev/null
@@ -1,2591 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <rdma/ib_umem.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "hcp_if.h"
-#include "hipz_hw.h"
-
-#define NUM_CHUNKS(length, chunk_size) \
-       (((length) + (chunk_size - 1)) / (chunk_size))
-
-/* max number of rpages (per hcall register_rpages) */
-#define MAX_RPAGES 512
-
-/* DMEM toleration management */
-#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
-#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
-#define EHCA_HUGEPAGESHIFT     34
-#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
-#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
-#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
-#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
-#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
-#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
-#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
-#define EHCA_DIR_MAP_SIZE (0x10000)
-#define EHCA_ENT_MAP_SIZE (0x10000)
-#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
-
-static unsigned long ehca_mr_len;
-
-/*
- * Memory map data structures
- */
-struct ehca_dir_bmap {
-       u64 ent[EHCA_MAP_ENTRIES];
-};
-struct ehca_top_bmap {
-       struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
-};
-struct ehca_bmap {
-       struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
-};
-
-static struct ehca_bmap *ehca_bmap;
-
-static struct kmem_cache *mr_cache;
-static struct kmem_cache *mw_cache;
-
-enum ehca_mr_pgsize {
-       EHCA_MR_PGSIZE4K  = 0x1000L,
-       EHCA_MR_PGSIZE64K = 0x10000L,
-       EHCA_MR_PGSIZE1M  = 0x100000L,
-       EHCA_MR_PGSIZE16M = 0x1000000L
-};
-
-#define EHCA_MR_PGSHIFT4K  12
-#define EHCA_MR_PGSHIFT64K 16
-#define EHCA_MR_PGSHIFT1M  20
-#define EHCA_MR_PGSHIFT16M 24
-
-static u64 ehca_map_vaddr(void *caddr);
-
-static u32 ehca_encode_hwpage_size(u32 pgsize)
-{
-       int log = ilog2(pgsize);
-       WARN_ON(log < 12 || log > 24 || log & 3);
-       return (log - 12) / 4;
-}
-
-static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
-{
-       return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
-}
-
-static struct ehca_mr *ehca_mr_new(void)
-{
-       struct ehca_mr *me;
-
-       me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
-       if (me)
-               spin_lock_init(&me->mrlock);
-       else
-               ehca_gen_err("alloc failed");
-
-       return me;
-}
-
-static void ehca_mr_delete(struct ehca_mr *me)
-{
-       kmem_cache_free(mr_cache, me);
-}
-
-static struct ehca_mw *ehca_mw_new(void)
-{
-       struct ehca_mw *me;
-
-       me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
-       if (me)
-               spin_lock_init(&me->mwlock);
-       else
-               ehca_gen_err("alloc failed");
-
-       return me;
-}
-
-static void ehca_mw_delete(struct ehca_mw *me)
-{
-       kmem_cache_free(mw_cache, me);
-}
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
-{
-       struct ib_mr *ib_mr;
-       int ret;
-       struct ehca_mr *e_maxmr;
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-
-       if (shca->maxmr) {
-               e_maxmr = ehca_mr_new();
-               if (!e_maxmr) {
-                       ehca_err(&shca->ib_device, "out of memory");
-                       ib_mr = ERR_PTR(-ENOMEM);
-                       goto get_dma_mr_exit0;
-               }
-
-               ret = ehca_reg_maxmr(shca, e_maxmr,
-                                    (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
-                                    mr_access_flags, e_pd,
-                                    &e_maxmr->ib.ib_mr.lkey,
-                                    &e_maxmr->ib.ib_mr.rkey);
-               if (ret) {
-                       ehca_mr_delete(e_maxmr);
-                       ib_mr = ERR_PTR(ret);
-                       goto get_dma_mr_exit0;
-               }
-               ib_mr = &e_maxmr->ib.ib_mr;
-       } else {
-               ehca_err(&shca->ib_device, "no internal max-MR exist!");
-               ib_mr = ERR_PTR(-EINVAL);
-               goto get_dma_mr_exit0;
-       }
-
-get_dma_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
-                        PTR_ERR(ib_mr), pd, mr_access_flags);
-       return ib_mr;
-} /* end ehca_get_dma_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *phys_buf_array,
-                              int num_phys_buf,
-                              int mr_access_flags,
-                              u64 *iova_start)
-{
-       struct ib_mr *ib_mr;
-       int ret;
-       struct ehca_mr *e_mr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-
-       u64 size;
-
-       if ((num_phys_buf <= 0) || !phys_buf_array) {
-               ehca_err(pd->device, "bad input values: num_phys_buf=%x "
-                        "phys_buf_array=%p", num_phys_buf, phys_buf_array);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-
-       /* check physical buffer list and calculate size */
-       ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
-                                           iova_start, &size);
-       if (ret) {
-               ib_mr = ERR_PTR(ret);
-               goto reg_phys_mr_exit0;
-       }
-       if ((size == 0) ||
-           (((u64)iova_start + size) < (u64)iova_start)) {
-               ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
-                        size, iova_start);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(pd->device, "out of memory");
-               ib_mr = ERR_PTR(-ENOMEM);
-               goto reg_phys_mr_exit0;
-       }
-
-       /* register MR on HCA */
-       if (ehca_mr_is_maxmr(size, iova_start)) {
-               e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-               ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
-                                    e_pd, &e_mr->ib.ib_mr.lkey,
-                                    &e_mr->ib.ib_mr.rkey);
-               if (ret) {
-                       ib_mr = ERR_PTR(ret);
-                       goto reg_phys_mr_exit1;
-               }
-       } else {
-               struct ehca_mr_pginfo pginfo;
-               u32 num_kpages;
-               u32 num_hwpages;
-               u64 hw_pgsize;
-
-               num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
-                                       PAGE_SIZE);
-               /* for kernel space we try most possible pgsize */
-               hw_pgsize = ehca_get_max_hwpage_size(shca);
-               num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
-                                        hw_pgsize);
-               memset(&pginfo, 0, sizeof(pginfo));
-               pginfo.type = EHCA_MR_PGI_PHYS;
-               pginfo.num_kpages = num_kpages;
-               pginfo.hwpage_size = hw_pgsize;
-               pginfo.num_hwpages = num_hwpages;
-               pginfo.u.phy.num_phys_buf = num_phys_buf;
-               pginfo.u.phy.phys_buf_array = phys_buf_array;
-               pginfo.next_hwpage =
-                       ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-
-               ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
-                                 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-                                 &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-               if (ret) {
-                       ib_mr = ERR_PTR(ret);
-                       goto reg_phys_mr_exit1;
-               }
-       }
-
-       /* successful registration of all pages */
-       return &e_mr->ib.ib_mr;
-
-reg_phys_mr_exit1:
-       ehca_mr_delete(e_mr);
-reg_phys_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
-                        "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
-                        PTR_ERR(ib_mr), pd, phys_buf_array,
-                        num_phys_buf, mr_access_flags, iova_start);
-       return ib_mr;
-} /* end ehca_reg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                              u64 virt, int mr_access_flags,
-                              struct ib_udata *udata)
-{
-       struct ib_mr *ib_mr;
-       struct ehca_mr *e_mr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_mr_pginfo pginfo;
-       int ret, page_shift;
-       u32 num_kpages;
-       u32 num_hwpages;
-       u64 hwpage_size;
-
-       if (!pd) {
-               ehca_gen_err("bad pd=%p", pd);
-               return ERR_PTR(-EFAULT);
-       }
-
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
-
-       if (length == 0 || virt + length < virt) {
-               ehca_err(pd->device, "bad input values: length=%llx "
-                        "virt_base=%llx", length, virt);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(pd->device, "out of memory");
-               ib_mr = ERR_PTR(-ENOMEM);
-               goto reg_user_mr_exit0;
-       }
-
-       e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
-                                mr_access_flags, 0);
-       if (IS_ERR(e_mr->umem)) {
-               ib_mr = (void *)e_mr->umem;
-               goto reg_user_mr_exit1;
-       }
-
-       if (e_mr->umem->page_size != PAGE_SIZE) {
-               ehca_err(pd->device, "page size not supported, "
-                        "e_mr->umem->page_size=%x", e_mr->umem->page_size);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit2;
-       }
-
-       /* determine number of MR pages */
-       num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
-       /* select proper hw_pgsize */
-       page_shift = PAGE_SHIFT;
-       if (e_mr->umem->hugetlb) {
-               /* determine page_shift, clamp between 4K and 16M */
-               page_shift = (fls64(length - 1) + 3) & ~3;
-               page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
-                                EHCA_MR_PGSHIFT16M);
-       }
-       hwpage_size = 1UL << page_shift;
-
-       /* now that we have the desired page size, shift until it's
-        * supported, too. 4K is always supported, so this terminates.
-        */
-       while (!(hwpage_size & shca->hca_cap_mr_pgsize))
-               hwpage_size >>= 4;
-
-reg_user_mr_fallback:
-       num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
-       /* register MR on HCA */
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_USER;
-       pginfo.hwpage_size = hwpage_size;
-       pginfo.num_kpages = num_kpages;
-       pginfo.num_hwpages = num_hwpages;
-       pginfo.u.usr.region = e_mr->umem;
-       pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
-       pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
-       ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
-                         e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-                         &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-       if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
-               ehca_warn(pd->device, "failed to register mr "
-                         "with hwpage_size=%llx", hwpage_size);
-               ehca_info(pd->device, "try to register mr with "
-                         "kpage_size=%lx", PAGE_SIZE);
-               /*
-                * this means kpages are not contiguous for a hw page
-                * try kernel page size as fallback solution
-                */
-               hwpage_size = PAGE_SIZE;
-               goto reg_user_mr_fallback;
-       }
-       if (ret) {
-               ib_mr = ERR_PTR(ret);
-               goto reg_user_mr_exit2;
-       }
-
-       /* successful registration of all pages */
-       return &e_mr->ib.ib_mr;
-
-reg_user_mr_exit2:
-       ib_umem_release(e_mr->umem);
-reg_user_mr_exit1:
-       ehca_mr_delete(e_mr);
-reg_user_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
-                        PTR_ERR(ib_mr), pd, mr_access_flags, udata);
-       return ib_mr;
-} /* end ehca_reg_user_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_phys_mr(struct ib_mr *mr,
-                      int mr_rereg_mask,
-                      struct ib_pd *pd,
-                      struct ib_phys_buf *phys_buf_array,
-                      int num_phys_buf,
-                      int mr_access_flags,
-                      u64 *iova_start)
-{
-       int ret;
-
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-       u64 new_size;
-       u64 *new_start;
-       u32 new_acl;
-       struct ehca_pd *new_pd;
-       u32 tmp_lkey, tmp_rkey;
-       unsigned long sl_flags;
-       u32 num_kpages = 0;
-       u32 num_hwpages = 0;
-       struct ehca_mr_pginfo pginfo;
-
-       if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
-               /* TODO not supported, because PHYP rereg hCall needs pages */
-               ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
-                        "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       if (mr_rereg_mask & IB_MR_REREG_PD) {
-               if (!pd) {
-                       ehca_err(mr->device, "rereg with bad pd, pd=%p "
-                                "mr_rereg_mask=%x", pd, mr_rereg_mask);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-       }
-
-       if ((mr_rereg_mask &
-            ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
-           (mr_rereg_mask == 0)) {
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       /* check other parameters */
-       if (e_mr == shca->maxmr) {
-               /* should be impossible, however reject to be sure */
-               ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
-                        "shca->maxmr=%p mr->lkey=%x",
-                        mr, shca->maxmr, mr->lkey);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
-               if (e_mr->flags & EHCA_MR_FLAG_FMR) {
-                       ehca_err(mr->device, "not supported for FMR, mr=%p "
-                                "flags=%x", mr, e_mr->flags);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-               if (!phys_buf_array || num_phys_buf <= 0) {
-                       ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
-                                " phys_buf_array=%p num_phys_buf=%x",
-                                mr_rereg_mask, phys_buf_array, num_phys_buf);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-       }
-       if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
-           (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
-                        "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       /* set requested values dependent on rereg request */
-       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-       new_start = e_mr->start;
-       new_size = e_mr->size;
-       new_acl = e_mr->acl;
-       new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
-
-               new_start = iova_start; /* change address */
-               /* check physical buffer list and calculate size */
-               ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
-                                                   num_phys_buf, iova_start,
-                                                   &new_size);
-               if (ret)
-                       goto rereg_phys_mr_exit1;
-               if ((new_size == 0) ||
-                   (((u64)iova_start + new_size) < (u64)iova_start)) {
-                       ehca_err(mr->device, "bad input values: new_size=%llx "
-                                "iova_start=%p", new_size, iova_start);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit1;
-               }
-               num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
-                                       new_size, PAGE_SIZE);
-               num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
-                                        new_size, hw_pgsize);
-               memset(&pginfo, 0, sizeof(pginfo));
-               pginfo.type = EHCA_MR_PGI_PHYS;
-               pginfo.num_kpages = num_kpages;
-               pginfo.hwpage_size = hw_pgsize;
-               pginfo.num_hwpages = num_hwpages;
-               pginfo.u.phy.num_phys_buf = num_phys_buf;
-               pginfo.u.phy.phys_buf_array = phys_buf_array;
-               pginfo.next_hwpage =
-                       ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               new_acl = mr_access_flags;
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               new_pd = container_of(pd, struct ehca_pd, ib_pd);
-
-       ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
-                           new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-       if (ret)
-               goto rereg_phys_mr_exit1;
-
-       /* successful reregistration */
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mr->pd = pd;
-       mr->lkey = tmp_lkey;
-       mr->rkey = tmp_rkey;
-
-rereg_phys_mr_exit1:
-       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-rereg_phys_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
-                        "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
-                        "iova_start=%p",
-                        ret, mr, mr_rereg_mask, pd, phys_buf_array,
-                        num_phys_buf, mr_access_flags, iova_start);
-       return ret;
-} /* end ehca_rereg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-       unsigned long sl_flags;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-               ret = -EINVAL;
-               goto query_mr_exit0;
-       }
-
-       memset(mr_attr, 0, sizeof(struct ib_mr_attr));
-       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-
-       h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
-                        "hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, mr, shca->ipz_hca_handle.handle,
-                        e_mr->ipz_mr_handle.handle, mr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto query_mr_exit1;
-       }
-       mr_attr->pd = mr->pd;
-       mr_attr->device_virt_addr = hipzout.vaddr;
-       mr_attr->size = hipzout.len;
-       mr_attr->lkey = hipzout.lkey;
-       mr_attr->rkey = hipzout.rkey;
-       ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
-
-query_mr_exit1:
-       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-query_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
-                        ret, mr, mr_attr);
-       return ret;
-} /* end ehca_query_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_mr(struct ib_mr *mr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-
-       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-               ret = -EINVAL;
-               goto dereg_mr_exit0;
-       } else if (e_mr == shca->maxmr) {
-               /* should be impossible, however reject to be sure */
-               ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
-                        "shca->maxmr=%p mr->lkey=%x",
-                        mr, shca->maxmr, mr->lkey);
-               ret = -EINVAL;
-               goto dereg_mr_exit0;
-       }
-
-       /* TODO: BUSY: MR still has bound window(s) */
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
-                        "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
-                        h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
-                        e_mr->ipz_mr_handle.handle, mr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto dereg_mr_exit0;
-       }
-
-       if (e_mr->umem)
-               ib_umem_release(e_mr->umem);
-
-       /* successful deregistration */
-       ehca_mr_delete(e_mr);
-
-dereg_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
-       return ret;
-} /* end ehca_dereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-       struct ib_mw *ib_mw;
-       u64 h_ret;
-       struct ehca_mw *e_mw;
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_mw_hipzout_parms hipzout;
-
-       if (type != IB_MW_TYPE_1)
-               return ERR_PTR(-EINVAL);
-
-       e_mw = ehca_mw_new();
-       if (!e_mw) {
-               ib_mw = ERR_PTR(-ENOMEM);
-               goto alloc_mw_exit0;
-       }
-
-       h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
-                                        e_pd->fw_pd, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
-                        "shca=%p hca_hndl=%llx mw=%p",
-                        h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
-               ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
-               goto alloc_mw_exit1;
-       }
-       /* successful MW allocation */
-       e_mw->ipz_mw_handle = hipzout.handle;
-       e_mw->ib_mw.rkey    = hipzout.rkey;
-       return &e_mw->ib_mw;
-
-alloc_mw_exit1:
-       ehca_mw_delete(e_mw);
-alloc_mw_exit0:
-       if (IS_ERR(ib_mw))
-               ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
-       return ib_mw;
-} /* end ehca_alloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_bind_mw(struct ib_qp *qp,
-                struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind)
-{
-       /* TODO: not supported up to now */
-       ehca_gen_err("bind MW currently not supported by HCAD");
-
-       return -EPERM;
-} /* end ehca_bind_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_mw(struct ib_mw *mw)
-{
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mw->device, struct ehca_shca, ib_device);
-       struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
-
-       h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
-                        "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
-                        h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
-                        e_mw->ipz_mw_handle.handle);
-               return ehca2ib_return_code(h_ret);
-       }
-       /* successful deallocation */
-       ehca_mw_delete(e_mw);
-       return 0;
-} /* end ehca_dealloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-                             int mr_access_flags,
-                             struct ib_fmr_attr *fmr_attr)
-{
-       struct ib_fmr *ib_fmr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_mr *e_fmr;
-       int ret;
-       u32 tmp_lkey, tmp_rkey;
-       struct ehca_mr_pginfo pginfo;
-       u64 hw_pgsize;
-
-       /* check other parameters */
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-       if (mr_access_flags & IB_ACCESS_MW_BIND) {
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-       if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
-               ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
-                        "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
-                        fmr_attr->max_pages, fmr_attr->max_maps,
-                        fmr_attr->page_shift);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-
-       hw_pgsize = 1 << fmr_attr->page_shift;
-       if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
-               ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
-                        fmr_attr->page_shift);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-
-       e_fmr = ehca_mr_new();
-       if (!e_fmr) {
-               ib_fmr = ERR_PTR(-ENOMEM);
-               goto alloc_fmr_exit0;
-       }
-       e_fmr->flags |= EHCA_MR_FLAG_FMR;
-
-       /* register MR on HCA */
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.hwpage_size = hw_pgsize;
-       /*
-        * pginfo.num_hwpages==0, ie register_rpages() will not be called
-        * but deferred to map_phys_fmr()
-        */
-       ret = ehca_reg_mr(shca, e_fmr, NULL,
-                         fmr_attr->max_pages * (1 << fmr_attr->page_shift),
-                         mr_access_flags, e_pd, &pginfo,
-                         &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
-       if (ret) {
-               ib_fmr = ERR_PTR(ret);
-               goto alloc_fmr_exit1;
-       }
-
-       /* successful */
-       e_fmr->hwpage_size = hw_pgsize;
-       e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
-       e_fmr->fmr_max_pages = fmr_attr->max_pages;
-       e_fmr->fmr_max_maps = fmr_attr->max_maps;
-       e_fmr->fmr_map_cnt = 0;
-       return &e_fmr->ib.ib_fmr;
-
-alloc_fmr_exit1:
-       ehca_mr_delete(e_fmr);
-alloc_fmr_exit0:
-       return ib_fmr;
-} /* end ehca_alloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-                     u64 *page_list,
-                     int list_len,
-                     u64 iova)
-{
-       int ret;
-       struct ehca_shca *shca =
-               container_of(fmr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-       struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
-       struct ehca_mr_pginfo pginfo;
-       u32 tmp_lkey, tmp_rkey;
-
-       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-                        e_fmr, e_fmr->flags);
-               ret = -EINVAL;
-               goto map_phys_fmr_exit0;
-       }
-       ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
-       if (ret)
-               goto map_phys_fmr_exit0;
-       if (iova % e_fmr->fmr_page_size) {
-               /* only whole-numbered pages */
-               ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
-                        iova, e_fmr->fmr_page_size);
-               ret = -EINVAL;
-               goto map_phys_fmr_exit0;
-       }
-       if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
-               /* HCAD does not limit the maps, however trace this anyway */
-               ehca_info(fmr->device, "map limit exceeded, fmr=%p "
-                         "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
-                         fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
-       }
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_FMR;
-       pginfo.num_kpages = list_len;
-       pginfo.hwpage_size = e_fmr->hwpage_size;
-       pginfo.num_hwpages =
-               list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
-       pginfo.u.fmr.page_list = page_list;
-       pginfo.next_hwpage =
-               (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
-       pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
-
-       ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
-                           list_len * e_fmr->fmr_page_size,
-                           e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-       if (ret)
-               goto map_phys_fmr_exit0;
-
-       /* successful reregistration */
-       e_fmr->fmr_map_cnt++;
-       e_fmr->ib.ib_fmr.lkey = tmp_lkey;
-       e_fmr->ib.ib_fmr.rkey = tmp_rkey;
-       return 0;
-
-map_phys_fmr_exit0:
-       if (ret)
-               ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
-                        "iova=%llx", ret, fmr, page_list, list_len, iova);
-       return ret;
-} /* end ehca_map_phys_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_fmr(struct list_head *fmr_list)
-{
-       int ret = 0;
-       struct ib_fmr *ib_fmr;
-       struct ehca_shca *shca = NULL;
-       struct ehca_shca *prev_shca;
-       struct ehca_mr *e_fmr;
-       u32 num_fmr = 0;
-       u32 unmap_fmr_cnt = 0;
-
-       /* check all FMR belong to same SHCA, and check internal flag */
-       list_for_each_entry(ib_fmr, fmr_list, list) {
-               prev_shca = shca;
-               shca = container_of(ib_fmr->device, struct ehca_shca,
-                                   ib_device);
-               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-               if ((shca != prev_shca) && prev_shca) {
-                       ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
-                                "prev_shca=%p e_fmr=%p",
-                                shca, prev_shca, e_fmr);
-                       ret = -EINVAL;
-                       goto unmap_fmr_exit0;
-               }
-               if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-                       ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
-                                "e_fmr->flags=%x", e_fmr, e_fmr->flags);
-                       ret = -EINVAL;
-                       goto unmap_fmr_exit0;
-               }
-               num_fmr++;
-       }
-
-       /* loop over all FMRs to unmap */
-       list_for_each_entry(ib_fmr, fmr_list, list) {
-               unmap_fmr_cnt++;
-               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-               shca = container_of(ib_fmr->device, struct ehca_shca,
-                                   ib_device);
-               ret = ehca_unmap_one_fmr(shca, e_fmr);
-               if (ret) {
-                       /* unmap failed, stop unmapping of rest of FMRs */
-                       ehca_err(&shca->ib_device, "unmap of one FMR failed, "
-                                "stop rest, e_fmr=%p num_fmr=%x "
-                                "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
-                                unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
-                       goto unmap_fmr_exit0;
-               }
-       }
-
-unmap_fmr_exit0:
-       if (ret)
-               ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
-                            ret, fmr_list, num_fmr, unmap_fmr_cnt);
-       return ret;
-} /* end ehca_unmap_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr)
-{
-       int ret;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(fmr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-
-       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-                        e_fmr, e_fmr->flags);
-               ret = -EINVAL;
-               goto free_fmr_exit0;
-       }
-
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
-                        "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle, fmr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto free_fmr_exit0;
-       }
-       /* successful deregistration */
-       ehca_mr_delete(e_fmr);
-       return 0;
-
-free_fmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
-       return ret;
-} /* end ehca_dealloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-                                  struct ehca_mr *e_mr,
-                                  struct ehca_mr_pginfo *pginfo);
-
-int ehca_reg_mr(struct ehca_shca *shca,
-               struct ehca_mr *e_mr,
-               u64 *iova_start,
-               u64 size,
-               int acl,
-               struct ehca_pd *e_pd,
-               struct ehca_mr_pginfo *pginfo,
-               u32 *lkey, /*OUT*/
-               u32 *rkey, /*OUT*/
-               enum ehca_reg_type reg_type)
-{
-       int ret;
-       u64 h_ret;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-       if (ehca_use_hp_mr == 1)
-               hipz_acl |= 0x00000001;
-
-       h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
-                                        (u64)iova_start, size, hipz_acl,
-                                        e_pd->fw_pd, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
-                        "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_reg_mr_exit0;
-       }
-
-       e_mr->ipz_mr_handle = hipzout.handle;
-
-       if (reg_type == EHCA_REG_BUSMAP_MR)
-               ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
-       else if (reg_type == EHCA_REG_MR)
-               ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
-       else
-               ret = -EINVAL;
-
-       if (ret)
-               goto ehca_reg_mr_exit1;
-
-       /* successful registration */
-       e_mr->num_kpages = pginfo->num_kpages;
-       e_mr->num_hwpages = pginfo->num_hwpages;
-       e_mr->hwpage_size = pginfo->hwpage_size;
-       e_mr->start = iova_start;
-       e_mr->size = size;
-       e_mr->acl = acl;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-
-ehca_reg_mr_exit1:
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
-                        "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
-                        h_ret, shca, e_mr, iova_start, size, acl, e_pd,
-                        hipzout.lkey, pginfo, pginfo->num_kpages,
-                        pginfo->num_hwpages, ret);
-               ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
-                        "not recoverable");
-       }
-ehca_reg_mr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-                        "num_kpages=%llx num_hwpages=%llx",
-                        ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
-                        pginfo->num_kpages, pginfo->num_hwpages);
-       return ret;
-} /* end ehca_reg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-                      struct ehca_mr *e_mr,
-                      struct ehca_mr_pginfo *pginfo)
-{
-       int ret = 0;
-       u64 h_ret;
-       u32 rnum;
-       u64 rpage;
-       u32 i;
-       u64 *kpage;
-
-       if (!pginfo->num_hwpages) /* in case of fmr */
-               return 0;
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               ret = -ENOMEM;
-               goto ehca_reg_mr_rpages_exit0;
-       }
-
-       /* max MAX_RPAGES ehca mr pages per register call */
-       for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
-
-               if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-                       rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
-                       if (rnum == 0)
-                               rnum = MAX_RPAGES;      /* last shot is full */
-               } else
-                       rnum = MAX_RPAGES;
-
-               ret = ehca_set_pagebuf(pginfo, rnum, kpage);
-               if (ret) {
-                       ehca_err(&shca->ib_device, "ehca_set_pagebuf "
-                                "bad rc, ret=%i rnum=%x kpage=%p",
-                                ret, rnum, kpage);
-                       goto ehca_reg_mr_rpages_exit1;
-               }
-
-               if (rnum > 1) {
-                       rpage = __pa(kpage);
-                       if (!rpage) {
-                               ehca_err(&shca->ib_device, "kpage=%p i=%x",
-                                        kpage, i);
-                               ret = -EFAULT;
-                               goto ehca_reg_mr_rpages_exit1;
-                       }
-               } else
-                       rpage = *kpage;
-
-               h_ret = hipz_h_register_rpage_mr(
-                       shca->ipz_hca_handle, e_mr,
-                       ehca_encode_hwpage_size(pginfo->hwpage_size),
-                       0, rpage, rnum);
-
-               if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-                       /*
-                        * check for 'registration complete'==H_SUCCESS
-                        * and for 'page registered'==H_PAGE_REGISTERED
-                        */
-                       if (h_ret != H_SUCCESS) {
-                               ehca_err(&shca->ib_device, "last "
-                                        "hipz_reg_rpage_mr failed, h_ret=%lli "
-                                        "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
-                                        " lkey=%x", h_ret, e_mr, i,
-                                        shca->ipz_hca_handle.handle,
-                                        e_mr->ipz_mr_handle.handle,
-                                        e_mr->ib.ib_mr.lkey);
-                               ret = ehca2ib_return_code(h_ret);
-                               break;
-                       } else
-                               ret = 0;
-               } else if (h_ret != H_PAGE_REGISTERED) {
-                       ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
-                                "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
-                                "mr_hndl=%llx", h_ret, e_mr, i,
-                                e_mr->ib.ib_mr.lkey,
-                                shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle);
-                       ret = ehca2ib_return_code(h_ret);
-                       break;
-               } else
-                       ret = 0;
-       } /* end for(i) */
-
-
-ehca_reg_mr_rpages_exit1:
-       ehca_free_fw_ctrlblock(kpage);
-ehca_reg_mr_rpages_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
-                        "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
-                        pginfo, pginfo->num_kpages, pginfo->num_hwpages);
-       return ret;
-} /* end ehca_reg_mr_rpages() */
-
-/*----------------------------------------------------------------------*/
-
-inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
-                               struct ehca_mr *e_mr,
-                               u64 *iova_start,
-                               u64 size,
-                               u32 acl,
-                               struct ehca_pd *e_pd,
-                               struct ehca_mr_pginfo *pginfo,
-                               u32 *lkey, /*OUT*/
-                               u32 *rkey) /*OUT*/
-{
-       int ret;
-       u64 h_ret;
-       u32 hipz_acl;
-       u64 *kpage;
-       u64 rpage;
-       struct ehca_mr_pginfo pginfo_save;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               ret = -ENOMEM;
-               goto ehca_rereg_mr_rereg1_exit0;
-       }
-
-       pginfo_save = *pginfo;
-       ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
-       if (ret) {
-               ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
-                        "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
-                        "kpage=%p", e_mr, pginfo, pginfo->type,
-                        pginfo->num_kpages, pginfo->num_hwpages, kpage);
-               goto ehca_rereg_mr_rereg1_exit1;
-       }
-       rpage = __pa(kpage);
-       if (!rpage) {
-               ehca_err(&shca->ib_device, "kpage=%p", kpage);
-               ret = -EFAULT;
-               goto ehca_rereg_mr_rereg1_exit1;
-       }
-       h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
-                                     (u64)iova_start, size, hipz_acl,
-                                     e_pd->fw_pd, rpage, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               /*
-                * reregistration unsuccessful, try it again with the 3 hCalls,
-                * e.g. this is required in case H_MR_CONDITION
-                * (MW bound or MR is shared)
-                */
-               ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
-                         "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
-               *pginfo = pginfo_save;
-               ret = -EAGAIN;
-       } else if ((u64 *)hipzout.vaddr != iova_start) {
-               ehca_err(&shca->ib_device, "PHYP changed iova_start in "
-                        "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
-                        "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
-                        hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
-                        e_mr->ib.ib_mr.lkey, hipzout.lkey);
-               ret = -EFAULT;
-       } else {
-               /*
-                * successful reregistration
-                * note: start and start_out are identical for eServer HCAs
-                */
-               e_mr->num_kpages = pginfo->num_kpages;
-               e_mr->num_hwpages = pginfo->num_hwpages;
-               e_mr->hwpage_size = pginfo->hwpage_size;
-               e_mr->start = iova_start;
-               e_mr->size = size;
-               e_mr->acl = acl;
-               *lkey = hipzout.lkey;
-               *rkey = hipzout.rkey;
-       }
-
-ehca_rereg_mr_rereg1_exit1:
-       ehca_free_fw_ctrlblock(kpage);
-ehca_rereg_mr_rereg1_exit0:
-       if ( ret && (ret != -EAGAIN) )
-               ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
-                        "pginfo=%p num_kpages=%llx num_hwpages=%llx",
-                        ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
-                        pginfo->num_hwpages);
-       return ret;
-} /* end ehca_rereg_mr_rereg1() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-                 struct ehca_mr *e_mr,
-                 u64 *iova_start,
-                 u64 size,
-                 int acl,
-                 struct ehca_pd *e_pd,
-                 struct ehca_mr_pginfo *pginfo,
-                 u32 *lkey,
-                 u32 *rkey)
-{
-       int ret = 0;
-       u64 h_ret;
-       int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
-       int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
-
-       /* first determine reregistration hCall(s) */
-       if ((pginfo->num_hwpages > MAX_RPAGES) ||
-           (e_mr->num_hwpages > MAX_RPAGES) ||
-           (pginfo->num_hwpages > e_mr->num_hwpages)) {
-               ehca_dbg(&shca->ib_device, "Rereg3 case, "
-                        "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
-                        pginfo->num_hwpages, e_mr->num_hwpages);
-               rereg_1_hcall = 0;
-               rereg_3_hcall = 1;
-       }
-
-       if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
-               rereg_1_hcall = 0;
-               rereg_3_hcall = 1;
-               e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
-               ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
-                        e_mr);
-       }
-
-       if (rereg_1_hcall) {
-               ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
-                                          acl, e_pd, pginfo, lkey, rkey);
-               if (ret) {
-                       if (ret == -EAGAIN)
-                               rereg_3_hcall = 1;
-                       else
-                               goto ehca_rereg_mr_exit0;
-               }
-       }
-
-       if (rereg_3_hcall) {
-               struct ehca_mr save_mr;
-
-               /* first deregister old MR */
-               h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-               if (h_ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                                "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
-                                "mr->lkey=%x",
-                                h_ret, e_mr, shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle,
-                                e_mr->ib.ib_mr.lkey);
-                       ret = ehca2ib_return_code(h_ret);
-                       goto ehca_rereg_mr_exit0;
-               }
-               /* clean ehca_mr_t, without changing struct ib_mr and lock */
-               save_mr = *e_mr;
-               ehca_mr_deletenew(e_mr);
-
-               /* set some MR values */
-               e_mr->flags = save_mr.flags;
-               e_mr->hwpage_size = save_mr.hwpage_size;
-               e_mr->fmr_page_size = save_mr.fmr_page_size;
-               e_mr->fmr_max_pages = save_mr.fmr_max_pages;
-               e_mr->fmr_max_maps = save_mr.fmr_max_maps;
-               e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
-
-               ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
-                                 e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
-               if (ret) {
-                       u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
-                       memcpy(&e_mr->flags, &(save_mr.flags),
-                              sizeof(struct ehca_mr) - offset);
-                       goto ehca_rereg_mr_exit0;
-               }
-       }
-
-ehca_rereg_mr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-                        "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
-                        "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
-                        acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
-                        rereg_1_hcall, rereg_3_hcall);
-       return ret;
-} /* end ehca_rereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-                      struct ehca_mr *e_fmr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_pd *e_pd =
-               container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
-       struct ehca_mr save_fmr;
-       u32 tmp_lkey, tmp_rkey;
-       struct ehca_mr_pginfo pginfo;
-       struct ehca_mr_hipzout_parms hipzout;
-       struct ehca_mr save_mr;
-
-       if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
-               /*
-                * note: after using rereg hcall with len=0,
-                * rereg hcall must be used again for registering pages
-                */
-               h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
-                                             0, 0, e_pd->fw_pd, 0, &hipzout);
-               if (h_ret == H_SUCCESS) {
-                       /* successful reregistration */
-                       e_fmr->start = NULL;
-                       e_fmr->size = 0;
-                       tmp_lkey = hipzout.lkey;
-                       tmp_rkey = hipzout.rkey;
-                       return 0;
-               }
-               /*
-                * should not happen, because length checked above,
-                * FMRs are not shared and no MW bound to FMRs
-                */
-               ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
-                        "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
-                        "mr_hndl=%llx lkey=%x lkey_out=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle,
-                        e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
-               /* try free and rereg */
-       }
-
-       /* first free old FMR */
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                        "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
-                        "lkey=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle,
-                        e_fmr->ib.ib_fmr.lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_unmap_one_fmr_exit0;
-       }
-       /* clean ehca_mr_t, without changing lock */
-       save_fmr = *e_fmr;
-       ehca_mr_deletenew(e_fmr);
-
-       /* set some MR values */
-       e_fmr->flags = save_fmr.flags;
-       e_fmr->hwpage_size = save_fmr.hwpage_size;
-       e_fmr->fmr_page_size = save_fmr.fmr_page_size;
-       e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
-       e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
-       e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
-       e_fmr->acl = save_fmr.acl;
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_FMR;
-       ret = ehca_reg_mr(shca, e_fmr, NULL,
-                         (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
-                         e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
-                         &tmp_rkey, EHCA_REG_MR);
-       if (ret) {
-               u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
-               memcpy(&e_fmr->flags, &(save_mr.flags),
-                      sizeof(struct ehca_mr) - offset);
-       }
-
-ehca_unmap_one_fmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
-                        "fmr_max_pages=%x",
-                        ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
-       return ret;
-} /* end ehca_unmap_one_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_smr(struct ehca_shca *shca,
-                struct ehca_mr *e_origmr,
-                struct ehca_mr *e_newmr,
-                u64 *iova_start,
-                int acl,
-                struct ehca_pd *e_pd,
-                u32 *lkey, /*OUT*/
-                u32 *rkey) /*OUT*/
-{
-       int ret = 0;
-       u64 h_ret;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
-                                   &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-                        "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
-                        "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
-                        shca->ipz_hca_handle.handle,
-                        e_origmr->ipz_mr_handle.handle,
-                        e_origmr->ib.ib_mr.lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_reg_smr_exit0;
-       }
-       /* successful registration */
-       e_newmr->num_kpages = e_origmr->num_kpages;
-       e_newmr->num_hwpages = e_origmr->num_hwpages;
-       e_newmr->hwpage_size   = e_origmr->hwpage_size;
-       e_newmr->start = iova_start;
-       e_newmr->size = e_origmr->size;
-       e_newmr->acl = acl;
-       e_newmr->ipz_mr_handle = hipzout.handle;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-
-ehca_reg_smr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
-                        "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
-                        ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
-       return ret;
-} /* end ehca_reg_smr() */
-
-/*----------------------------------------------------------------------*/
-static inline void *ehca_calc_sectbase(int top, int dir, int idx)
-{
-       unsigned long ret = idx;
-       ret |= dir << EHCA_DIR_INDEX_SHIFT;
-       ret |= top << EHCA_TOP_INDEX_SHIFT;
-       return __va(ret << SECTION_SIZE_BITS);
-}
-
-#define ehca_bmap_valid(entry) \
-       ((u64)entry != (u64)EHCA_INVAL_ADDR)
-
-static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
-                              struct ehca_shca *shca, struct ehca_mr *mr,
-                              struct ehca_mr_pginfo *pginfo)
-{
-       u64 h_ret = 0;
-       unsigned long page = 0;
-       u64 rpage = __pa(kpage);
-       int page_count;
-
-       void *sectbase = ehca_calc_sectbase(top, dir, idx);
-       if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
-               ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
-                                          "hwpage_size does not fit to "
-                                          "section start address");
-       }
-       page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
-
-       while (page < page_count) {
-               u64 rnum;
-               for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
-                    rnum++) {
-                       void *pg = sectbase + ((page++) * pginfo->hwpage_size);
-                       kpage[rnum] = __pa(pg);
-               }
-
-               h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
-                       ehca_encode_hwpage_size(pginfo->hwpage_size),
-                       0, rpage, rnum);
-
-               if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
-                       ehca_err(&shca->ib_device, "register_rpage_mr failed");
-                       return h_ret;
-               }
-       }
-       return h_ret;
-}
-
-static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
-                               struct ehca_shca *shca, struct ehca_mr *mr,
-                               struct ehca_mr_pginfo *pginfo)
-{
-       u64 hret = H_SUCCESS;
-       int idx;
-
-       for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
-                       continue;
-
-               hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
-                                          pginfo);
-               if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-                               return hret;
-       }
-       return hret;
-}
-
-static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
-                                   struct ehca_mr *mr,
-                                   struct ehca_mr_pginfo *pginfo)
-{
-       u64 hret = H_SUCCESS;
-       int dir;
-
-       for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-                       continue;
-
-               hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
-               if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-                               return hret;
-       }
-       return hret;
-}
-
-/* register internal max-MR to internal SHCA */
-int ehca_reg_internal_maxmr(
-       struct ehca_shca *shca,
-       struct ehca_pd *e_pd,
-       struct ehca_mr **e_maxmr)  /*OUT*/
-{
-       int ret;
-       struct ehca_mr *e_mr;
-       u64 *iova_start;
-       u64 size_maxmr;
-       struct ehca_mr_pginfo pginfo;
-       struct ib_phys_buf ib_pbuf;
-       u32 num_kpages;
-       u32 num_hwpages;
-       u64 hw_pgsize;
-
-       if (!ehca_bmap) {
-               ret = -EFAULT;
-               goto ehca_reg_internal_maxmr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(&shca->ib_device, "out of memory");
-               ret = -ENOMEM;
-               goto ehca_reg_internal_maxmr_exit0;
-       }
-       e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-
-       /* register internal max-MR on HCA */
-       size_maxmr = ehca_mr_len;
-       iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
-       ib_pbuf.addr = 0;
-       ib_pbuf.size = size_maxmr;
-       num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
-                               PAGE_SIZE);
-       hw_pgsize = ehca_get_max_hwpage_size(shca);
-       num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
-                                hw_pgsize);
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_PHYS;
-       pginfo.num_kpages = num_kpages;
-       pginfo.num_hwpages = num_hwpages;
-       pginfo.hwpage_size = hw_pgsize;
-       pginfo.u.phy.num_phys_buf = 1;
-       pginfo.u.phy.phys_buf_array = &ib_pbuf;
-
-       ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
-                         &pginfo, &e_mr->ib.ib_mr.lkey,
-                         &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
-       if (ret) {
-               ehca_err(&shca->ib_device, "reg of internal max MR failed, "
-                        "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
-                        "num_hwpages=%x", e_mr, iova_start, size_maxmr,
-                        num_kpages, num_hwpages);
-               goto ehca_reg_internal_maxmr_exit1;
-       }
-
-       /* successful registration of all pages */
-       e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
-       e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
-       e_mr->ib.ib_mr.uobject = NULL;
-       atomic_inc(&(e_pd->ib_pd.usecnt));
-       atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
-       *e_maxmr = e_mr;
-       return 0;
-
-ehca_reg_internal_maxmr_exit1:
-       ehca_mr_delete(e_mr);
-ehca_reg_internal_maxmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
-                        ret, shca, e_pd, e_maxmr);
-       return ret;
-} /* end ehca_reg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-                  struct ehca_mr *e_newmr,
-                  u64 *iova_start,
-                  int acl,
-                  struct ehca_pd *e_pd,
-                  u32 *lkey,
-                  u32 *rkey)
-{
-       u64 h_ret;
-       struct ehca_mr *e_origmr = shca->maxmr;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
-                                   &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-                        "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, e_origmr, shca->ipz_hca_handle.handle,
-                        e_origmr->ipz_mr_handle.handle,
-                        e_origmr->ib.ib_mr.lkey);
-               return ehca2ib_return_code(h_ret);
-       }
-       /* successful registration */
-       e_newmr->num_kpages = e_origmr->num_kpages;
-       e_newmr->num_hwpages = e_origmr->num_hwpages;
-       e_newmr->hwpage_size = e_origmr->hwpage_size;
-       e_newmr->start = iova_start;
-       e_newmr->size = e_origmr->size;
-       e_newmr->acl = acl;
-       e_newmr->ipz_mr_handle = hipzout.handle;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-} /* end ehca_reg_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
-{
-       int ret;
-       struct ehca_mr *e_maxmr;
-       struct ib_pd *ib_pd;
-
-       if (!shca->maxmr) {
-               ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
-               ret = -EINVAL;
-               goto ehca_dereg_internal_maxmr_exit0;
-       }
-
-       e_maxmr = shca->maxmr;
-       ib_pd = e_maxmr->ib.ib_mr.pd;
-       shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
-
-       ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
-       if (ret) {
-               ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
-                        "ret=%i e_maxmr=%p shca=%p lkey=%x",
-                        ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
-               shca->maxmr = e_maxmr;
-               goto ehca_dereg_internal_maxmr_exit0;
-       }
-
-       atomic_dec(&ib_pd->usecnt);
-
-ehca_dereg_internal_maxmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
-                        ret, shca, shca->maxmr);
-       return ret;
-} /* end ehca_dereg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check physical buffer array of MR verbs for validness and
- * calculates MR size
- */
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
-                                 int num_phys_buf,
-                                 u64 *iova_start,
-                                 u64 *size)
-{
-       struct ib_phys_buf *pbuf = phys_buf_array;
-       u64 size_count = 0;
-       u32 i;
-
-       if (num_phys_buf == 0) {
-               ehca_gen_err("bad phys buf array len, num_phys_buf=0");
-               return -EINVAL;
-       }
-       /* check first buffer */
-       if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
-               ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
-                            "pbuf->addr=%llx pbuf->size=%llx",
-                            iova_start, pbuf->addr, pbuf->size);
-               return -EINVAL;
-       }
-       if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
-           (num_phys_buf > 1)) {
-               ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
-                            "pbuf->size=%llx", pbuf->addr, pbuf->size);
-               return -EINVAL;
-       }
-
-       for (i = 0; i < num_phys_buf; i++) {
-               if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
-                       ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
-                                    "pbuf->size=%llx",
-                                    i, pbuf->addr, pbuf->size);
-                       return -EINVAL;
-               }
-               if (((i > 0) && /* not 1st */
-                    (i < (num_phys_buf - 1)) &&        /* not last */
-                    (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
-                       ehca_gen_err("bad size, i=%x pbuf->size=%llx",
-                                    i, pbuf->size);
-                       return -EINVAL;
-               }
-               size_count += pbuf->size;
-               pbuf++;
-       }
-
-       *size = size_count;
-       return 0;
-} /* end ehca_mr_chk_buf_and_calc_size() */
-
-/*----------------------------------------------------------------------*/
-
-/* check page list of map FMR verb for validness */
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-                            u64 *page_list,
-                            int list_len)
-{
-       u32 i;
-       u64 *page;
-
-       if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
-               ehca_gen_err("bad list_len, list_len=%x "
-                            "e_fmr->fmr_max_pages=%x fmr=%p",
-                            list_len, e_fmr->fmr_max_pages, e_fmr);
-               return -EINVAL;
-       }
-
-       /* each page must be aligned */
-       page = page_list;
-       for (i = 0; i < list_len; i++) {
-               if (*page % e_fmr->fmr_page_size) {
-                       ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
-                                    "fmr_page_size=%x", i, *page, page, e_fmr,
-                                    e_fmr->fmr_page_size);
-                       return -EINVAL;
-               }
-               page++;
-       }
-
-       return 0;
-} /* end ehca_fmr_check_page_list() */
-
-/*----------------------------------------------------------------------*/
-
-/* PAGE_SIZE >= pginfo->hwpage_size */
-static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
-                                 u32 number,
-                                 u64 *kpage)
-{
-       int ret = 0;
-       u64 pgaddr;
-       u32 j = 0;
-       int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-       struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-       while (*sg != NULL) {
-               pgaddr = page_to_pfn(sg_page(*sg))
-                       << PAGE_SHIFT;
-               *kpage = pgaddr + (pginfo->next_hwpage *
-                                  pginfo->hwpage_size);
-               if (!(*kpage)) {
-                       ehca_gen_err("pgaddr=%llx "
-                                    "sg_dma_address=%llx "
-                                    "entry=%llx next_hwpage=%llx",
-                                    pgaddr, (u64)sg_dma_address(*sg),
-                                    pginfo->u.usr.next_nmap,
-                                    pginfo->next_hwpage);
-                       return -EFAULT;
-               }
-               (pginfo->hwpage_cnt)++;
-               (pginfo->next_hwpage)++;
-               kpage++;
-               if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
-                       (pginfo->kpage_cnt)++;
-                       (pginfo->u.usr.next_nmap)++;
-                       pginfo->next_hwpage = 0;
-                       *sg = sg_next(*sg);
-               }
-               j++;
-               if (j >= number)
-                       break;
-       }
-
-       return ret;
-}
-
-/*
- * check given pages for contiguous layout
- * last page addr is returned in prev_pgaddr for further check
- */
-static int ehca_check_kpages_per_ate(struct scatterlist **sg,
-                                    int num_pages,
-                                    u64 *prev_pgaddr)
-{
-       for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
-               u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
-               if (ehca_debug_level >= 3)
-                       ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
-                                    *(u64 *)__va(pgaddr));
-               if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
-                       ehca_gen_err("uncontiguous page found pgaddr=%llx "
-                                    "prev_pgaddr=%llx entries_left_in_hwpage=%x",
-                                    pgaddr, *prev_pgaddr, num_pages);
-                       return -EINVAL;
-               }
-               *prev_pgaddr = pgaddr;
-       }
-       return 0;
-}
-
-/* PAGE_SIZE < pginfo->hwpage_size */
-static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
-                                 u32 number,
-                                 u64 *kpage)
-{
-       int ret = 0;
-       u64 pgaddr, prev_pgaddr;
-       u32 j = 0;
-       int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
-       int nr_kpages = kpages_per_hwpage;
-       struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-       while (*sg != NULL) {
-
-               if (nr_kpages == kpages_per_hwpage) {
-                       pgaddr = (page_to_pfn(sg_page(*sg))
-                                  << PAGE_SHIFT);
-                       *kpage = pgaddr;
-                       if (!(*kpage)) {
-                               ehca_gen_err("pgaddr=%llx entry=%llx",
-                                            pgaddr, pginfo->u.usr.next_nmap);
-                               ret = -EFAULT;
-                               return ret;
-                       }
-                       /*
-                        * The first page in a hwpage must be aligned;
-                        * the first MR page is exempt from this rule.
-                        */
-                       if (pgaddr & (pginfo->hwpage_size - 1)) {
-                               if (pginfo->hwpage_cnt) {
-                                       ehca_gen_err(
-                                               "invalid alignment "
-                                               "pgaddr=%llx entry=%llx "
-                                               "mr_pgsize=%llx",
-                                               pgaddr, pginfo->u.usr.next_nmap,
-                                               pginfo->hwpage_size);
-                                       ret = -EFAULT;
-                                       return ret;
-                               }
-                               /* first MR page */
-                               pginfo->kpage_cnt =
-                                       (pgaddr &
-                                        (pginfo->hwpage_size - 1)) >>
-                                       PAGE_SHIFT;
-                               nr_kpages -= pginfo->kpage_cnt;
-                               *kpage = pgaddr &
-                                        ~(pginfo->hwpage_size - 1);
-                       }
-                       if (ehca_debug_level >= 3) {
-                               u64 val = *(u64 *)__va(pgaddr);
-                               ehca_gen_dbg("kpage=%llx page=%llx "
-                                            "value=%016llx",
-                                            *kpage, pgaddr, val);
-                       }
-                       prev_pgaddr = pgaddr;
-                       *sg = sg_next(*sg);
-                       pginfo->kpage_cnt++;
-                       pginfo->u.usr.next_nmap++;
-                       nr_kpages--;
-                       if (!nr_kpages)
-                               goto next_kpage;
-                       continue;
-               }
-
-               ret = ehca_check_kpages_per_ate(sg, nr_kpages,
-                                               &prev_pgaddr);
-               if (ret)
-                       return ret;
-               pginfo->kpage_cnt += nr_kpages;
-               pginfo->u.usr.next_nmap += nr_kpages;
-
-next_kpage:
-               nr_kpages = kpages_per_hwpage;
-               (pginfo->hwpage_cnt)++;
-               kpage++;
-               j++;
-               if (j >= number)
-                       break;
-       }
-
-       return ret;
-}
-
-static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
-                                u32 number, u64 *kpage)
-{
-       int ret = 0;
-       struct ib_phys_buf *pbuf;
-       u64 num_hw, offs_hw;
-       u32 i = 0;
-
-       /* loop over desired phys_buf_array entries */
-       while (i < number) {
-               pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
-               num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
-                                    pbuf->size, pginfo->hwpage_size);
-               offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
-                       pginfo->hwpage_size;
-               while (pginfo->next_hwpage < offs_hw + num_hw) {
-                       /* sanity check */
-                       if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
-                           (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
-                               ehca_gen_err("kpage_cnt >= num_kpages, "
-                                            "kpage_cnt=%llx num_kpages=%llx "
-                                            "hwpage_cnt=%llx "
-                                            "num_hwpages=%llx i=%x",
-                                            pginfo->kpage_cnt,
-                                            pginfo->num_kpages,
-                                            pginfo->hwpage_cnt,
-                                            pginfo->num_hwpages, i);
-                               return -EFAULT;
-                       }
-                       *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
-                                (pginfo->next_hwpage * pginfo->hwpage_size);
-                       if ( !(*kpage) && pbuf->addr ) {
-                               ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
-                                            "next_hwpage=%llx", pbuf->addr,
-                                            pbuf->size, pginfo->next_hwpage);
-                               return -EFAULT;
-                       }
-                       (pginfo->hwpage_cnt)++;
-                       (pginfo->next_hwpage)++;
-                       if (PAGE_SIZE >= pginfo->hwpage_size) {
-                               if (pginfo->next_hwpage %
-                                   (PAGE_SIZE / pginfo->hwpage_size) == 0)
-                                       (pginfo->kpage_cnt)++;
-                       } else
-                               pginfo->kpage_cnt += pginfo->hwpage_size /
-                                       PAGE_SIZE;
-                       kpage++;
-                       i++;
-                       if (i >= number) break;
-               }
-               if (pginfo->next_hwpage >= offs_hw + num_hw) {
-                       (pginfo->u.phy.next_buf)++;
-                       pginfo->next_hwpage = 0;
-               }
-       }
-       return ret;
-}
-
-static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
-                               u32 number, u64 *kpage)
-{
-       int ret = 0;
-       u64 *fmrlist;
-       u32 i;
-
-       /* loop over desired page_list entries */
-       fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
-       for (i = 0; i < number; i++) {
-               *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
-                          pginfo->next_hwpage * pginfo->hwpage_size;
-               if ( !(*kpage) ) {
-                       ehca_gen_err("*fmrlist=%llx fmrlist=%p "
-                                    "next_listelem=%llx next_hwpage=%llx",
-                                    *fmrlist, fmrlist,
-                                    pginfo->u.fmr.next_listelem,
-                                    pginfo->next_hwpage);
-                       return -EFAULT;
-               }
-               (pginfo->hwpage_cnt)++;
-               if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
-                       if (pginfo->next_hwpage %
-                           (pginfo->u.fmr.fmr_pgsize /
-                            pginfo->hwpage_size) == 0) {
-                               (pginfo->kpage_cnt)++;
-                               (pginfo->u.fmr.next_listelem)++;
-                               fmrlist++;
-                               pginfo->next_hwpage = 0;
-                       } else
-                               (pginfo->next_hwpage)++;
-               } else {
-                       unsigned int cnt_per_hwpage = pginfo->hwpage_size /
-                               pginfo->u.fmr.fmr_pgsize;
-                       unsigned int j;
-                       u64 prev = *kpage;
-                       /* check if adrs are contiguous */
-                       for (j = 1; j < cnt_per_hwpage; j++) {
-                               u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
-                               if (prev + pginfo->u.fmr.fmr_pgsize != p) {
-                                       ehca_gen_err("uncontiguous fmr pages "
-                                                    "found prev=%llx p=%llx "
-                                                    "idx=%x", prev, p, i + j);
-                                       return -EINVAL;
-                               }
-                               prev = p;
-                       }
-                       pginfo->kpage_cnt += cnt_per_hwpage;
-                       pginfo->u.fmr.next_listelem += cnt_per_hwpage;
-                       fmrlist += cnt_per_hwpage;
-               }
-               kpage++;
-       }
-       return ret;
-}
-
-/* setup page buffer from page info */
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-                    u32 number,
-                    u64 *kpage)
-{
-       int ret;
-
-       switch (pginfo->type) {
-       case EHCA_MR_PGI_PHYS:
-               ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
-               break;
-       case EHCA_MR_PGI_USER:
-               ret = PAGE_SIZE >= pginfo->hwpage_size ?
-                       ehca_set_pagebuf_user1(pginfo, number, kpage) :
-                       ehca_set_pagebuf_user2(pginfo, number, kpage);
-               break;
-       case EHCA_MR_PGI_FMR:
-               ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
-               break;
-       default:
-               ehca_gen_err("bad pginfo->type=%x", pginfo->type);
-               ret = -EFAULT;
-               break;
-       }
-       return ret;
-} /* end ehca_set_pagebuf() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check MR if it is a max-MR, i.e. uses whole memory
- * in case it's a max-MR 1 is returned, else 0
- */
-int ehca_mr_is_maxmr(u64 size,
-                    u64 *iova_start)
-{
-       /* a MR is treated as max-MR only if it fits following: */
-       if ((size == ehca_mr_len) &&
-           (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
-               ehca_gen_dbg("this is a max-MR");
-               return 1;
-       } else
-               return 0;
-} /* end ehca_mr_is_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/* map access control for MR/MW. This routine is used for MR and MW. */
-void ehca_mrmw_map_acl(int ib_acl,
-                      u32 *hipz_acl)
-{
-       *hipz_acl = 0;
-       if (ib_acl & IB_ACCESS_REMOTE_READ)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
-       if (ib_acl & IB_ACCESS_REMOTE_WRITE)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
-       if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
-       if (ib_acl & IB_ACCESS_LOCAL_WRITE)
-               *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
-       if (ib_acl & IB_ACCESS_MW_BIND)
-               *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
-} /* end ehca_mrmw_map_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/* sets page size in hipz access control for MR/MW. */
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
-{
-       *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
-} /* end ehca_mrmw_set_pgsize_hipz_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * reverse map access control for MR/MW.
- * This routine is used for MR and MW.
- */
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-                              int *ib_acl) /*OUT*/
-{
-       *ib_acl = 0;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
-               *ib_acl |= IB_ACCESS_REMOTE_READ;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
-               *ib_acl |= IB_ACCESS_REMOTE_WRITE;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
-               *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
-               *ib_acl |= IB_ACCESS_LOCAL_WRITE;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
-               *ib_acl |= IB_ACCESS_MW_BIND;
-} /* end ehca_mrmw_reverse_map_acl() */
-
-
-/*----------------------------------------------------------------------*/
-
-/*
- * MR destructor and constructor
- * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
- * except struct ib_mr and spinlock
- */
-void ehca_mr_deletenew(struct ehca_mr *mr)
-{
-       mr->flags = 0;
-       mr->num_kpages = 0;
-       mr->num_hwpages = 0;
-       mr->acl = 0;
-       mr->start = NULL;
-       mr->fmr_page_size = 0;
-       mr->fmr_max_pages = 0;
-       mr->fmr_max_maps = 0;
-       mr->fmr_map_cnt = 0;
-       memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
-       memset(&mr->galpas, 0, sizeof(mr->galpas));
-} /* end ehca_mr_deletenew() */
-
-int ehca_init_mrmw_cache(void)
-{
-       mr_cache = kmem_cache_create("ehca_cache_mr",
-                                    sizeof(struct ehca_mr), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!mr_cache)
-               return -ENOMEM;
-       mw_cache = kmem_cache_create("ehca_cache_mw",
-                                    sizeof(struct ehca_mw), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!mw_cache) {
-               kmem_cache_destroy(mr_cache);
-               mr_cache = NULL;
-               return -ENOMEM;
-       }
-       return 0;
-}
-
-void ehca_cleanup_mrmw_cache(void)
-{
-       kmem_cache_destroy(mr_cache);
-       kmem_cache_destroy(mw_cache);
-}
-
-static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
-                                    int dir)
-{
-       if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
-               ehca_top_bmap->dir[dir] =
-                       kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
-               if (!ehca_top_bmap->dir[dir])
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
-       }
-       return 0;
-}
-
-static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
-{
-       if (!ehca_bmap_valid(ehca_bmap->top[top])) {
-               ehca_bmap->top[top] =
-                       kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
-               if (!ehca_bmap->top[top])
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
-       }
-       return ehca_init_top_bmap(ehca_bmap->top[top], dir);
-}
-
-static inline int ehca_calc_index(unsigned long i, unsigned long s)
-{
-       return (i >> s) & EHCA_INDEX_MASK;
-}
-
-void ehca_destroy_busmap(void)
-{
-       int top, dir;
-
-       if (!ehca_bmap)
-               return;
-
-       for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]))
-                       continue;
-               for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-                       if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-                               continue;
-
-                       kfree(ehca_bmap->top[top]->dir[dir]);
-               }
-
-               kfree(ehca_bmap->top[top]);
-       }
-
-       kfree(ehca_bmap);
-       ehca_bmap = NULL;
-}
-
-static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
-{
-       unsigned long i, start_section, end_section;
-       int top, dir, idx;
-
-       if (!nr_pages)
-               return 0;
-
-       if (!ehca_bmap) {
-               ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
-               if (!ehca_bmap)
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
-       }
-
-       start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
-       end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
-       for (i = start_section; i < end_section; i++) {
-               int ret;
-               top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
-               dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
-               idx = i & EHCA_INDEX_MASK;
-
-               ret = ehca_init_bmap(ehca_bmap, top, dir);
-               if (ret) {
-                       ehca_destroy_busmap();
-                       return ret;
-               }
-               ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
-               ehca_mr_len += EHCA_SECTSIZE;
-       }
-       return 0;
-}
-
-static int ehca_is_hugepage(unsigned long pfn)
-{
-       int page_order;
-
-       if (pfn & EHCA_HUGEPAGE_PFN_MASK)
-               return 0;
-
-       page_order = compound_order(pfn_to_page(pfn));
-       if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
-               return 0;
-
-       return 1;
-}
-
-static int ehca_create_busmap_callback(unsigned long initial_pfn,
-                                      unsigned long total_nr_pages, void *arg)
-{
-       int ret;
-       unsigned long pfn, start_pfn, end_pfn, nr_pages;
-
-       if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
-               return ehca_update_busmap(initial_pfn, total_nr_pages);
-
-       /* Given chunk is >= 16GB -> check for hugepages */
-       start_pfn = initial_pfn;
-       end_pfn = initial_pfn + total_nr_pages;
-       pfn = start_pfn;
-
-       while (pfn < end_pfn) {
-               if (ehca_is_hugepage(pfn)) {
-                       /* Add mem found in front of the hugepage */
-                       nr_pages = pfn - start_pfn;
-                       ret = ehca_update_busmap(start_pfn, nr_pages);
-                       if (ret)
-                               return ret;
-                       /* Skip the hugepage */
-                       pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
-                       start_pfn = pfn;
-               } else
-                       pfn += (EHCA_SECTSIZE / PAGE_SIZE);
-       }
-
-       /* Add mem found behind the hugepage(s)  */
-       nr_pages = pfn - start_pfn;
-       return ehca_update_busmap(start_pfn, nr_pages);
-}
-
-int ehca_create_busmap(void)
-{
-       int ret;
-
-       ehca_mr_len = 0;
-       ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
-                                  ehca_create_busmap_callback);
-       return ret;
-}
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-                                  struct ehca_mr *e_mr,
-                                  struct ehca_mr_pginfo *pginfo)
-{
-       int top;
-       u64 hret, *kpage;
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               return -ENOMEM;
-       }
-       for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]))
-                       continue;
-               hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
-               if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
-                       break;
-       }
-
-       ehca_free_fw_ctrlblock(kpage);
-
-       if (hret == H_SUCCESS)
-               return 0; /* Everything is fine */
-       else {
-               ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
-                                "h_ret=%lli e_mr=%p top=%x lkey=%x "
-                                "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
-                                e_mr->ib.ib_mr.lkey,
-                                shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle);
-               return ehca2ib_return_code(hret);
-       }
-}
-
-static u64 ehca_map_vaddr(void *caddr)
-{
-       int top, dir, idx;
-       unsigned long abs_addr, offset;
-       u64 entry;
-
-       if (!ehca_bmap)
-               return EHCA_INVAL_ADDR;
-
-       abs_addr = __pa(caddr);
-       top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
-       if (!ehca_bmap_valid(ehca_bmap->top[top]))
-               return EHCA_INVAL_ADDR;
-
-       dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
-       if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-               return EHCA_INVAL_ADDR;
-
-       idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
-
-       entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
-       if (ehca_bmap_valid(entry)) {
-               offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
-               return entry | offset;
-       } else
-               return EHCA_INVAL_ADDR;
-}
-
-static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == EHCA_INVAL_ADDR;
-}
-
-static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
-                              size_t size, enum dma_data_direction direction)
-{
-       if (cpu_addr)
-               return ehca_map_vaddr(cpu_addr);
-       else
-               return EHCA_INVAL_ADDR;
-}
-
-static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-                                 enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
-                            unsigned long offset, size_t size,
-                            enum dma_data_direction direction)
-{
-       u64 addr;
-
-       if (offset + size > PAGE_SIZE)
-               return EHCA_INVAL_ADDR;
-
-       addr = ehca_map_vaddr(page_address(page));
-       if (!ehca_dma_mapping_error(dev, addr))
-               addr += offset;
-
-       return addr;
-}
-
-static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-                               enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                          int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       int i;
-
-       for_each_sg(sgl, sg, nents, i) {
-               u64 addr;
-               addr = ehca_map_vaddr(sg_virt(sg));
-               if (ehca_dma_mapping_error(dev, addr))
-                       return 0;
-
-               sg->dma_address = addr;
-               sg->dma_length = sg->length;
-       }
-       return nents;
-}
-
-static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
-                             int nents, enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-                                        size_t size,
-                                        enum dma_data_direction dir)
-{
-       dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
-}
-
-static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
-                                           size_t size,
-                                           enum dma_data_direction dir)
-{
-       dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                    u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-       u64 dma_addr;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p) {
-               addr = page_address(p);
-               dma_addr = ehca_map_vaddr(addr);
-               if (ehca_dma_mapping_error(dev, dma_addr)) {
-                       free_pages((unsigned long)addr, get_order(size));
-                       return NULL;
-               }
-               if (dma_handle)
-                       *dma_handle = dma_addr;
-               return addr;
-       }
-       return NULL;
-}
-
-static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
-                                  void *cpu_addr, u64 dma_handle)
-{
-       if (cpu_addr && size)
-               free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-
-struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
-       .mapping_error          = ehca_dma_mapping_error,
-       .map_single             = ehca_dma_map_single,
-       .unmap_single           = ehca_dma_unmap_single,
-       .map_page               = ehca_dma_map_page,
-       .unmap_page             = ehca_dma_unmap_page,
-       .map_sg                 = ehca_dma_map_sg,
-       .unmap_sg               = ehca_dma_unmap_sg,
-       .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
-       .sync_single_for_device = ehca_dma_sync_single_for_device,
-       .alloc_coherent         = ehca_dma_alloc_coherent,
-       .free_coherent          = ehca_dma_free_coherent,
-};
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
deleted file mode 100644 (file)
index 50d8b51..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW declarations and inline functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _EHCA_MRMW_H_
-#define _EHCA_MRMW_H_
-
-enum ehca_reg_type {
-       EHCA_REG_MR,
-       EHCA_REG_BUSMAP_MR
-};
-
-int ehca_reg_mr(struct ehca_shca *shca,
-               struct ehca_mr *e_mr,
-               u64 *iova_start,
-               u64 size,
-               int acl,
-               struct ehca_pd *e_pd,
-               struct ehca_mr_pginfo *pginfo,
-               u32 *lkey,
-               u32 *rkey,
-               enum ehca_reg_type reg_type);
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-                      struct ehca_mr *e_mr,
-                      struct ehca_mr_pginfo *pginfo);
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-                 struct ehca_mr *e_mr,
-                 u64 *iova_start,
-                 u64 size,
-                 int mr_access_flags,
-                 struct ehca_pd *e_pd,
-                 struct ehca_mr_pginfo *pginfo,
-                 u32 *lkey,
-                 u32 *rkey);
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-                      struct ehca_mr *e_fmr);
-
-int ehca_reg_smr(struct ehca_shca *shca,
-                struct ehca_mr *e_origmr,
-                struct ehca_mr *e_newmr,
-                u64 *iova_start,
-                int acl,
-                struct ehca_pd *e_pd,
-                u32 *lkey,
-                u32 *rkey);
-
-int ehca_reg_internal_maxmr(struct ehca_shca *shca,
-                           struct ehca_pd *e_pd,
-                           struct ehca_mr **maxmr);
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-                  struct ehca_mr *e_newmr,
-                  u64 *iova_start,
-                  int acl,
-                  struct ehca_pd *e_pd,
-                  u32 *lkey,
-                  u32 *rkey);
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
-                                 int num_phys_buf,
-                                 u64 *iova_start,
-                                 u64 *size);
-
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-                            u64 *page_list,
-                            int list_len);
-
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-                    u32 number,
-                    u64 *kpage);
-
-int ehca_mr_is_maxmr(u64 size,
-                    u64 *iova_start);
-
-void ehca_mrmw_map_acl(int ib_acl,
-                      u32 *hipz_acl);
-
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
-
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-                              int *ib_acl);
-
-void ehca_mr_deletenew(struct ehca_mr *mr);
-
-int ehca_create_busmap(void);
-
-void ehca_destroy_busmap(void);
-
-extern struct ib_dma_mapping_ops ehca_dma_mapping_ops;
-#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c
deleted file mode 100644 (file)
index 2a8aae4..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  PD functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-
-static struct kmem_cache *pd_cache;
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-                           struct ib_ucontext *context, struct ib_udata *udata)
-{
-       struct ehca_pd *pd;
-       int i;
-
-       pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
-       if (!pd) {
-               ehca_err(device, "device=%p context=%p out of memory",
-                        device, context);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       for (i = 0; i < 2; i++) {
-               INIT_LIST_HEAD(&pd->free[i]);
-               INIT_LIST_HEAD(&pd->full[i]);
-       }
-       mutex_init(&pd->lock);
-
-       /*
-        * Kernel PD: when device = -1, 0
-        * User   PD: when context != -1
-        */
-       if (!context) {
-               /*
-                * Kernel PDs after init reuses always
-                * the one created in ehca_shca_reopen()
-                */
-               struct ehca_shca *shca = container_of(device, struct ehca_shca,
-                                                     ib_device);
-               pd->fw_pd.value = shca->pd->fw_pd.value;
-       } else
-               pd->fw_pd.value = (u64)pd;
-
-       return &pd->ib_pd;
-}
-
-int ehca_dealloc_pd(struct ib_pd *pd)
-{
-       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-       int i, leftovers = 0;
-       struct ipz_small_queue_page *page, *tmp;
-
-       for (i = 0; i < 2; i++) {
-               list_splice(&my_pd->full[i], &my_pd->free[i]);
-               list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
-                       leftovers = 1;
-                       free_page(page->page);
-                       kmem_cache_free(small_qp_cache, page);
-               }
-       }
-
-       if (leftovers)
-               ehca_warn(pd->device,
-                         "Some small queue pages were not freed");
-
-       kmem_cache_free(pd_cache, my_pd);
-
-       return 0;
-}
-
-int ehca_init_pd_cache(void)
-{
-       pd_cache = kmem_cache_create("ehca_cache_pd",
-                                    sizeof(struct ehca_pd), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!pd_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_pd_cache(void)
-{
-       kmem_cache_destroy(pd_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h
deleted file mode 100644 (file)
index 90c4efa..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Hardware request structures
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _EHCA_QES_H_
-#define _EHCA_QES_H_
-
-#include "ehca_tools.h"
-
-/* virtual scatter gather entry to specify remote addresses with length */
-struct ehca_vsgentry {
-       u64 vaddr;
-       u32 lkey;
-       u32 length;
-};
-
-#define GRH_FLAG_MASK        EHCA_BMASK_IBM( 7,  7)
-#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM( 0,  3)
-#define GRH_TCLASS_MASK      EHCA_BMASK_IBM( 4, 12)
-#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13, 31)
-#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32, 47)
-#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48, 55)
-#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56, 63)
-
-/*
- * Unreliable Datagram Address Vector Format
- * see IBTA Vol1 chapter 8.3 Global Routing Header
- */
-struct ehca_ud_av {
-       u8 sl;
-       u8 lnh;
-       u16 dlid;
-       u8 reserved1;
-       u8 reserved2;
-       u8 reserved3;
-       u8 slid_path_bits;
-       u8 reserved4;
-       u8 ipd;
-       u8 reserved5;
-       u8 pmtu;
-       u32 reserved6;
-       u64 reserved7;
-       union {
-               struct {
-                       u64 word_0; /* always set to 6  */
-                       /*should be 0x1B for IB transport */
-                       u64 word_1;
-                       u64 word_2;
-                       u64 word_3;
-                       u64 word_4;
-               } grh;
-               struct {
-                       u32 wd_0;
-                       u32 wd_1;
-                       /* DWord_1 --> SGID */
-
-                       u32 sgid_wd3;
-                       u32 sgid_wd2;
-
-                       u32 sgid_wd1;
-                       u32 sgid_wd0;
-                       /* DWord_3 --> DGID */
-
-                       u32 dgid_wd3;
-                       u32 dgid_wd2;
-
-                       u32 dgid_wd1;
-                       u32 dgid_wd0;
-               } grh_l;
-       };
-};
-
-/* maximum number of sg entries allowed in a WQE */
-#define MAX_WQE_SG_ENTRIES 252
-
-#define WQE_OPTYPE_SEND             0x80
-#define WQE_OPTYPE_RDMAREAD         0x40
-#define WQE_OPTYPE_RDMAWRITE        0x20
-#define WQE_OPTYPE_CMPSWAP          0x10
-#define WQE_OPTYPE_FETCHADD         0x08
-#define WQE_OPTYPE_BIND             0x04
-
-#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
-#define WQE_WRFLAG_FENCE            0x40
-#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
-#define WQE_WRFLAG_SOLIC_EVENT      0x10
-
-#define WQEF_CACHE_HINT             0x80
-#define WQEF_CACHE_HINT_RD_WR       0x40
-#define WQEF_TIMED_WQE              0x20
-#define WQEF_PURGE                  0x08
-#define WQEF_HIGH_NIBBLE            0xF0
-
-#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
-#define MW_BIND_ACCESSCTRL_R_READ    0x20
-#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
-
-struct ehca_wqe {
-       u64 work_request_id;
-       u8 optype;
-       u8 wr_flag;
-       u16 pkeyi;
-       u8 wqef;
-       u8 nr_of_data_seg;
-       u16 wqe_provided_slid;
-       u32 destination_qp_number;
-       u32 resync_psn_sqp;
-       u32 local_ee_context_qkey;
-       u32 immediate_data;
-       union {
-               struct {
-                       u64 remote_virtual_address;
-                       u32 rkey;
-                       u32 reserved;
-                       u64 atomic_1st_op_dma_len;
-                       u64 atomic_2nd_op;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-
-               } nud;
-               struct {
-                       u64 ehca_ud_av_ptr;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 reserved3;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-               } ud_avp;
-               struct {
-                       struct ehca_ud_av ud_av;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
-                                                    2];
-               } ud_av;
-               struct {
-                       u64 reserved0;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 reserved3;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-               } all_rcv;
-
-               struct {
-                       u64 reserved;
-                       u32 rkey;
-                       u32 old_rkey;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 virtual_address;
-                       u32 reserved3;
-                       u32 length;
-                       u32 reserved4;
-                       u16 reserved5;
-                       u8 reserved6;
-                       u8 lr_ctl;
-                       u32 lkey;
-                       u32 reserved7;
-                       u64 reserved8;
-                       u64 reserved9;
-                       u64 reserved10;
-                       u64 reserved11;
-               } bind;
-               struct {
-                       u64 reserved12;
-                       u64 reserved13;
-                       u32 size;
-                       u32 start;
-               } inline_data;
-       } u;
-
-};
-
-#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
-#define WC_IMM_DATA     EHCA_BMASK_IBM(1, 1)
-#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2, 2)
-#define WC_SE_BIT       EHCA_BMASK_IBM(3, 3)
-#define WC_STATUS_ERROR_BIT 0x80000000
-#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
-#define WC_STATUS_PURGE_BIT 0x10
-#define WC_SEND_RECEIVE_BIT 0x80
-
-struct ehca_cqe {
-       u64 work_request_id;
-       u8 optype;
-       u8 w_completion_flags;
-       u16 reserved1;
-       u32 nr_bytes_transferred;
-       u32 immediate_data;
-       u32 local_qp_number;
-       u8 freed_resource_count;
-       u8 service_level;
-       u16 wqe_count;
-       u32 qp_token;
-       u32 qkey_ee_token;
-       u32 remote_qp_number;
-       u16 dlid;
-       u16 rlid;
-       u16 reserved2;
-       u16 pkey_index;
-       u32 cqe_timestamp;
-       u32 wqe_timestamp;
-       u8 wqe_timestamp_valid;
-       u8 reserved3;
-       u8 reserved4;
-       u8 cqe_flags;
-       u32 status;
-};
-
-struct ehca_eqe {
-       u64 entry;
-};
-
-struct ehca_mrte {
-       u64 starting_va;
-       u64 length; /* length of memory region in bytes*/
-       u32 pd;
-       u8 key_instance;
-       u8 pagesize;
-       u8 mr_control;
-       u8 local_remote_access_ctrl;
-       u8 reserved[0x20 - 0x18];
-       u64 at_pointer[4];
-};
-#endif /*_EHCA_QES_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c
deleted file mode 100644 (file)
index 896c01f..0000000
+++ /dev/null
@@ -1,2256 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  QP functions
- *
- *  Authors: Joachim Fenkes <fenkes@de.ibm.com>
- *           Stefan Roscher <stefan.roscher@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-static struct kmem_cache *qp_cache;
-
-/*
- * attributes not supported by query qp
- */
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS       | \
-                                    IB_QP_EN_SQD_ASYNC_NOTIFY)
-
-/*
- * ehca (internal) qp state values
- */
-enum ehca_qp_state {
-       EHCA_QPS_RESET = 1,
-       EHCA_QPS_INIT = 2,
-       EHCA_QPS_RTR = 3,
-       EHCA_QPS_RTS = 5,
-       EHCA_QPS_SQD = 6,
-       EHCA_QPS_SQE = 8,
-       EHCA_QPS_ERR = 128
-};
-
-/*
- * qp state transitions as defined by IB Arch Rel 1.1 page 431
- */
-enum ib_qp_statetrans {
-       IB_QPST_ANY2RESET,
-       IB_QPST_ANY2ERR,
-       IB_QPST_RESET2INIT,
-       IB_QPST_INIT2RTR,
-       IB_QPST_INIT2INIT,
-       IB_QPST_RTR2RTS,
-       IB_QPST_RTS2SQD,
-       IB_QPST_RTS2RTS,
-       IB_QPST_SQD2RTS,
-       IB_QPST_SQE2RTS,
-       IB_QPST_SQD2SQD,
-       IB_QPST_MAX     /* nr of transitions, this must be last!!! */
-};
-
-/*
- * ib2ehca_qp_state maps IB to ehca qp_state
- * returns ehca qp state corresponding to given ib qp state
- */
-static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
-{
-       switch (ib_qp_state) {
-       case IB_QPS_RESET:
-               return EHCA_QPS_RESET;
-       case IB_QPS_INIT:
-               return EHCA_QPS_INIT;
-       case IB_QPS_RTR:
-               return EHCA_QPS_RTR;
-       case IB_QPS_RTS:
-               return EHCA_QPS_RTS;
-       case IB_QPS_SQD:
-               return EHCA_QPS_SQD;
-       case IB_QPS_SQE:
-               return EHCA_QPS_SQE;
-       case IB_QPS_ERR:
-               return EHCA_QPS_ERR;
-       default:
-               ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
-               return -EINVAL;
-       }
-}
-
-/*
- * ehca2ib_qp_state maps ehca to IB qp_state
- * returns ib qp state corresponding to given ehca qp state
- */
-static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
-                                               ehca_qp_state)
-{
-       switch (ehca_qp_state) {
-       case EHCA_QPS_RESET:
-               return IB_QPS_RESET;
-       case EHCA_QPS_INIT:
-               return IB_QPS_INIT;
-       case EHCA_QPS_RTR:
-               return IB_QPS_RTR;
-       case EHCA_QPS_RTS:
-               return IB_QPS_RTS;
-       case EHCA_QPS_SQD:
-               return IB_QPS_SQD;
-       case EHCA_QPS_SQE:
-               return IB_QPS_SQE;
-       case EHCA_QPS_ERR:
-               return IB_QPS_ERR;
-       default:
-               ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
-               return -EINVAL;
-       }
-}
-
-/*
- * ehca_qp_type used as index for req_attr and opt_attr of
- * struct ehca_modqp_statetrans
- */
-enum ehca_qp_type {
-       QPT_RC = 0,
-       QPT_UC = 1,
-       QPT_UD = 2,
-       QPT_SQP = 3,
-       QPT_MAX
-};
-
-/*
- * ib2ehcaqptype maps Ib to ehca qp_type
- * returns ehca qp type corresponding to ib qp type
- */
-static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
-{
-       switch (ibqptype) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               return QPT_SQP;
-       case IB_QPT_RC:
-               return QPT_RC;
-       case IB_QPT_UC:
-               return QPT_UC;
-       case IB_QPT_UD:
-               return QPT_UD;
-       default:
-               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-               return -EINVAL;
-       }
-}
-
-static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
-                                                        int ib_tostate)
-{
-       int index = -EINVAL;
-       switch (ib_tostate) {
-       case IB_QPS_RESET:
-               index = IB_QPST_ANY2RESET;
-               break;
-       case IB_QPS_INIT:
-               switch (ib_fromstate) {
-               case IB_QPS_RESET:
-                       index = IB_QPST_RESET2INIT;
-                       break;
-               case IB_QPS_INIT:
-                       index = IB_QPST_INIT2INIT;
-                       break;
-               }
-               break;
-       case IB_QPS_RTR:
-               if (ib_fromstate == IB_QPS_INIT)
-                       index = IB_QPST_INIT2RTR;
-               break;
-       case IB_QPS_RTS:
-               switch (ib_fromstate) {
-               case IB_QPS_RTR:
-                       index = IB_QPST_RTR2RTS;
-                       break;
-               case IB_QPS_RTS:
-                       index = IB_QPST_RTS2RTS;
-                       break;
-               case IB_QPS_SQD:
-                       index = IB_QPST_SQD2RTS;
-                       break;
-               case IB_QPS_SQE:
-                       index = IB_QPST_SQE2RTS;
-                       break;
-               }
-               break;
-       case IB_QPS_SQD:
-               if (ib_fromstate == IB_QPS_RTS)
-                       index = IB_QPST_RTS2SQD;
-               break;
-       case IB_QPS_SQE:
-               break;
-       case IB_QPS_ERR:
-               index = IB_QPST_ANY2ERR;
-               break;
-       default:
-               break;
-       }
-       return index;
-}
-
-/*
- * ibqptype2servicetype returns hcp service type corresponding to given
- * ib qp type used by create_qp()
- */
-static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
-{
-       switch (ibqptype) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               return ST_UD;
-       case IB_QPT_RC:
-               return ST_RC;
-       case IB_QPT_UC:
-               return ST_UC;
-       case IB_QPT_UD:
-               return ST_UD;
-       case IB_QPT_RAW_IPV6:
-               return -EINVAL;
-       case IB_QPT_RAW_ETHERTYPE:
-               return -EINVAL;
-       default:
-               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-               return -EINVAL;
-       }
-}
-
-/*
- * init userspace queue info from ipz_queue data
- */
-static inline void queue2resp(struct ipzu_queue_resp *resp,
-                             struct ipz_queue *queue)
-{
-       resp->qe_size = queue->qe_size;
-       resp->act_nr_of_sg = queue->act_nr_of_sg;
-       resp->queue_length = queue->queue_length;
-       resp->pagesize = queue->pagesize;
-       resp->toggle_state = queue->toggle_state;
-       resp->offset = queue->offset;
-}
-
-/*
- * init_qp_queue initializes/constructs r/squeue and registers queue pages.
- */
-static inline int init_qp_queue(struct ehca_shca *shca,
-                               struct ehca_pd *pd,
-                               struct ehca_qp *my_qp,
-                               struct ipz_queue *queue,
-                               int q_type,
-                               u64 expected_hret,
-                               struct ehca_alloc_queue_parms *parms,
-                               int wqe_size)
-{
-       int ret, cnt, ipz_rc, nr_q_pages;
-       void *vpage;
-       u64 rpage, h_ret;
-       struct ib_device *ib_dev = &shca->ib_device;
-       struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
-
-       if (!parms->queue_size)
-               return 0;
-
-       if (parms->is_small) {
-               nr_q_pages = 1;
-               ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-                                       128 << parms->page_size,
-                                       wqe_size, parms->act_nr_sges, 1);
-       } else {
-               nr_q_pages = parms->queue_size;
-               ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-                                       EHCA_PAGESIZE, wqe_size,
-                                       parms->act_nr_sges, 0);
-       }
-
-       if (!ipz_rc) {
-               ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
-                        ipz_rc);
-               return -EBUSY;
-       }
-
-       /* register queue pages */
-       for (cnt = 0; cnt < nr_q_pages; cnt++) {
-               vpage = ipz_qpageit_get_inc(queue);
-               if (!vpage) {
-                       ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-                                "failed p_vpage= %p", vpage);
-                       ret = -EINVAL;
-                       goto init_qp_queue1;
-               }
-               rpage = __pa(vpage);
-
-               h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
-                                                my_qp->ipz_qp_handle,
-                                                NULL, 0, q_type,
-                                                rpage, parms->is_small ? 0 : 1,
-                                                my_qp->galpas.kernel);
-               if (cnt == (nr_q_pages - 1)) {  /* last page! */
-                       if (h_ret != expected_hret) {
-                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret=%lli", h_ret);
-                               ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queue1;
-                       }
-                       vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
-                       if (vpage) {
-                               ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-                                        "should not succeed vpage=%p", vpage);
-                               ret = -EINVAL;
-                               goto init_qp_queue1;
-                       }
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED) {
-                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret=%lli", h_ret);
-                               ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queue1;
-                       }
-               }
-       }
-
-       ipz_qeit_reset(queue);
-
-       return 0;
-
-init_qp_queue1:
-       ipz_queue_dtor(pd, queue);
-       return ret;
-}
-
-static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
-{
-       if (is_llqp)
-               return 128 << act_nr_sge;
-       else
-               return offsetof(struct ehca_wqe,
-                               u.nud.sg_list[act_nr_sge]);
-}
-
-static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
-                                      int req_nr_sge, int is_llqp)
-{
-       u32 wqe_size, q_size;
-       int act_nr_sge = req_nr_sge;
-
-       if (!is_llqp)
-               /* round up #SGEs so WQE size is a power of 2 */
-               for (act_nr_sge = 4; act_nr_sge <= 252;
-                    act_nr_sge = 4 + 2 * act_nr_sge)
-                       if (act_nr_sge >= req_nr_sge)
-                               break;
-
-       wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
-       q_size = wqe_size * (queue->max_wr + 1);
-
-       if (q_size <= 512)
-               queue->page_size = 2;
-       else if (q_size <= 1024)
-               queue->page_size = 3;
-       else
-               queue->page_size = 0;
-
-       queue->is_small = (queue->page_size != 0);
-}
-
-/* needs to be called with cq->spinlock held */
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
-{
-       struct list_head *list, *node;
-
-       /* TODO: support low latency QPs */
-       if (qp->ext_type == EQPT_LLQP)
-               return;
-
-       if (on_sq) {
-               list = &qp->send_cq->sqp_err_list;
-               node = &qp->sq_err_node;
-       } else {
-               list = &qp->recv_cq->rqp_err_list;
-               node = &qp->rq_err_node;
-       }
-
-       if (list_empty(node))
-               list_add_tail(node, list);
-
-       return;
-}
-
-static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-
-       if (!list_empty(node))
-               list_del_init(node);
-
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-}
-
-static void reset_queue_map(struct ehca_queue_map *qmap)
-{
-       int i;
-
-       qmap->tail = qmap->entries - 1;
-       qmap->left_to_poll = 0;
-       qmap->next_wqe_idx = 0;
-       for (i = 0; i < qmap->entries; i++) {
-               qmap->map[i].reported = 1;
-               qmap->map[i].cqe_req = 0;
-       }
-}
-
-/*
- * Create an ib_qp struct that is either a QP or an SRQ, depending on
- * the value of the is_srq parameter. If init_attr and srq_init_attr share
- * fields, the field out of init_attr is used.
- */
-static struct ehca_qp *internal_create_qp(
-       struct ib_pd *pd,
-       struct ib_qp_init_attr *init_attr,
-       struct ib_srq_init_attr *srq_init_attr,
-       struct ib_udata *udata, int is_srq)
-{
-       struct ehca_qp *my_qp, *my_srq = NULL;
-       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-       struct ib_ucontext *context = NULL;
-       u64 h_ret;
-       int is_llqp = 0, has_srq = 0, is_user = 0;
-       int qp_type, max_send_sge, max_recv_sge, ret;
-
-       /* h_call's out parameters */
-       struct ehca_alloc_qp_parms parms;
-       u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
-       unsigned long flags;
-
-       if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
-               ehca_err(pd->device, "Unable to create QP, max number of %i "
-                        "QPs reached.", shca->max_num_qps);
-               ehca_err(pd->device, "To increase the maximum number of QPs "
-                        "use the number_of_qps module parameter.\n");
-               return ERR_PTR(-ENOSPC);
-       }
-
-       if (init_attr->create_flags) {
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       memset(&parms, 0, sizeof(parms));
-       qp_type = init_attr->qp_type;
-
-       if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
-               init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
-               ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
-                        init_attr->sq_sig_type);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       /* save LLQP info */
-       if (qp_type & 0x80) {
-               is_llqp = 1;
-               parms.ext_type = EQPT_LLQP;
-               parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
-       }
-       qp_type &= 0x1F;
-       init_attr->qp_type &= 0x1F;
-
-       /* handle SRQ base QPs */
-       if (init_attr->srq) {
-               my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
-
-               if (qp_type == IB_QPT_UC) {
-                       ehca_err(pd->device, "UC with SRQ not supported");
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-
-               has_srq = 1;
-               parms.ext_type = EQPT_SRQBASE;
-               parms.srq_qpn = my_srq->real_qp_num;
-       }
-
-       if (is_llqp && has_srq) {
-               ehca_err(pd->device, "LLQPs can't have an SRQ");
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       /* handle SRQs */
-       if (is_srq) {
-               parms.ext_type = EQPT_SRQ;
-               parms.srq_limit = srq_init_attr->attr.srq_limit;
-               if (init_attr->cap.max_recv_sge > 3) {
-                       ehca_err(pd->device, "no more than three SGEs "
-                                "supported for SRQ  pd=%p  max_sge=%x",
-                                pd, init_attr->cap.max_recv_sge);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       /* check QP type */
-       if (qp_type != IB_QPT_UD &&
-           qp_type != IB_QPT_UC &&
-           qp_type != IB_QPT_RC &&
-           qp_type != IB_QPT_SMI &&
-           qp_type != IB_QPT_GSI) {
-               ehca_err(pd->device, "wrong QP Type=%x", qp_type);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (is_llqp) {
-               switch (qp_type) {
-               case IB_QPT_RC:
-                       if ((init_attr->cap.max_send_wr > 255) ||
-                           (init_attr->cap.max_recv_wr > 255)) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of max_sq_wr=%x "
-                                        "or max_rq_wr=%x for RC LLQP",
-                                        init_attr->cap.max_send_wr,
-                                        init_attr->cap.max_recv_wr);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       }
-                       break;
-               case IB_QPT_UD:
-                       if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
-                               ehca_err(pd->device, "UD LLQP not supported "
-                                        "by this adapter");
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-ENOSYS);
-                       }
-                       if (!(init_attr->cap.max_send_sge <= 5
-                           && init_attr->cap.max_send_sge >= 1
-                           && init_attr->cap.max_recv_sge <= 5
-                           && init_attr->cap.max_recv_sge >= 1)) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of max_send_sge=%x "
-                                        "or max_recv_sge=%x for UD LLQP",
-                                        init_attr->cap.max_send_sge,
-                                        init_attr->cap.max_recv_sge);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       } else if (init_attr->cap.max_send_wr > 255) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of "
-                                        "max_send_wr=%x for UD QP_TYPE=%x",
-                                        init_attr->cap.max_send_wr, qp_type);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       }
-                       break;
-               default:
-                       ehca_err(pd->device, "unsupported LL QP Type=%x",
-                                qp_type);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       } else {
-               int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
-                              || qp_type == IB_QPT_GSI) ? 250 : 252;
-
-               if (init_attr->cap.max_send_sge > max_sge
-                   || init_attr->cap.max_recv_sge > max_sge) {
-                       ehca_err(pd->device, "Invalid number of SGEs requested "
-                                "send_sge=%x recv_sge=%x max_sge=%x",
-                                init_attr->cap.max_send_sge,
-                                init_attr->cap.max_recv_sge, max_sge);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
-       if (!my_qp) {
-               ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       if (pd->uobject && udata) {
-               is_user = 1;
-               context = pd->uobject->context;
-       }
-
-       atomic_set(&my_qp->nr_events, 0);
-       init_waitqueue_head(&my_qp->wait_completion);
-       spin_lock_init(&my_qp->spinlock_s);
-       spin_lock_init(&my_qp->spinlock_r);
-       my_qp->qp_type = qp_type;
-       my_qp->ext_type = parms.ext_type;
-       my_qp->state = IB_QPS_RESET;
-
-       if (init_attr->recv_cq)
-               my_qp->recv_cq =
-                       container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
-       if (init_attr->send_cq)
-               my_qp->send_cq =
-                       container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
-       idr_preload(GFP_KERNEL);
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-
-       ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
-       if (ret >= 0)
-               my_qp->token = ret;
-
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-       idr_preload_end();
-       if (ret < 0) {
-               if (ret == -ENOSPC) {
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Invalid number of qp");
-               } else {
-                       ret = -ENOMEM;
-                       ehca_err(pd->device, "Can't allocate new idr entry.");
-               }
-               goto create_qp_exit0;
-       }
-
-       if (has_srq)
-               parms.srq_token = my_qp->token;
-
-       parms.servicetype = ibqptype2servicetype(qp_type);
-       if (parms.servicetype < 0) {
-               ret = -EINVAL;
-               ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
-               goto create_qp_exit1;
-       }
-
-       /* Always signal by WQE so we can hide circ. WQEs */
-       parms.sigtype = HCALL_SIGT_BY_WQE;
-
-       /* UD_AV CIRCUMVENTION */
-       max_send_sge = init_attr->cap.max_send_sge;
-       max_recv_sge = init_attr->cap.max_recv_sge;
-       if (parms.servicetype == ST_UD && !is_llqp) {
-               max_send_sge += 2;
-               max_recv_sge += 2;
-       }
-
-       parms.token = my_qp->token;
-       parms.eq_handle = shca->eq.ipz_eq_handle;
-       parms.pd = my_pd->fw_pd;
-       if (my_qp->send_cq)
-               parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
-       if (my_qp->recv_cq)
-               parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
-
-       parms.squeue.max_wr = init_attr->cap.max_send_wr;
-       parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
-       parms.squeue.max_sge = max_send_sge;
-       parms.rqueue.max_sge = max_recv_sge;
-
-       /* RC QPs need one more SWQE for unsolicited ack circumvention */
-       if (qp_type == IB_QPT_RC)
-               parms.squeue.max_wr++;
-
-       if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
-               if (HAS_SQ(my_qp))
-                       ehca_determine_small_queue(
-                               &parms.squeue, max_send_sge, is_llqp);
-               if (HAS_RQ(my_qp))
-                       ehca_determine_small_queue(
-                               &parms.rqueue, max_recv_sge, is_llqp);
-               parms.qp_storage =
-                       (parms.squeue.is_small || parms.rqueue.is_small);
-       }
-
-       h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
-                        h_ret);
-               ret = ehca2ib_return_code(h_ret);
-               goto create_qp_exit1;
-       }
-
-       ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
-       my_qp->ipz_qp_handle = parms.qp_handle;
-       my_qp->galpas = parms.galpas;
-
-       swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
-       rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
-
-       switch (qp_type) {
-       case IB_QPT_RC:
-               if (is_llqp) {
-                       parms.squeue.act_nr_sges = 1;
-                       parms.rqueue.act_nr_sges = 1;
-               }
-               /* hide the extra WQE */
-               parms.squeue.act_nr_wqes--;
-               break;
-       case IB_QPT_UD:
-       case IB_QPT_GSI:
-       case IB_QPT_SMI:
-               /* UD circumvention */
-               if (is_llqp) {
-                       parms.squeue.act_nr_sges = 1;
-                       parms.rqueue.act_nr_sges = 1;
-               } else {
-                       parms.squeue.act_nr_sges -= 2;
-                       parms.rqueue.act_nr_sges -= 2;
-               }
-
-               if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
-                       parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
-                       parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
-                       parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
-                       parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
-                       ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
-               }
-
-               break;
-
-       default:
-               break;
-       }
-
-       /* initialize r/squeue and register queue pages */
-       if (HAS_SQ(my_qp)) {
-               ret = init_qp_queue(
-                       shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
-                       HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
-                       &parms.squeue, swqe_size);
-               if (ret) {
-                       ehca_err(pd->device, "Couldn't initialize squeue "
-                                "and pages ret=%i", ret);
-                       goto create_qp_exit2;
-               }
-
-               if (!is_user) {
-                       my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
-                               my_qp->ipz_squeue.qe_size;
-                       my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
-                                                   sizeof(struct ehca_qmap_entry));
-                       if (!my_qp->sq_map.map) {
-                               ehca_err(pd->device, "Couldn't allocate squeue "
-                                        "map ret=%i", ret);
-                               goto create_qp_exit3;
-                       }
-                       INIT_LIST_HEAD(&my_qp->sq_err_node);
-                       /* to avoid the generation of bogus flush CQEs */
-                       reset_queue_map(&my_qp->sq_map);
-               }
-       }
-
-       if (HAS_RQ(my_qp)) {
-               ret = init_qp_queue(
-                       shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
-                       H_SUCCESS, &parms.rqueue, rwqe_size);
-               if (ret) {
-                       ehca_err(pd->device, "Couldn't initialize rqueue "
-                                "and pages ret=%i", ret);
-                       goto create_qp_exit4;
-               }
-               if (!is_user) {
-                       my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
-                               my_qp->ipz_rqueue.qe_size;
-                       my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
-                                                   sizeof(struct ehca_qmap_entry));
-                       if (!my_qp->rq_map.map) {
-                               ehca_err(pd->device, "Couldn't allocate squeue "
-                                        "map ret=%i", ret);
-                               goto create_qp_exit5;
-                       }
-                       INIT_LIST_HEAD(&my_qp->rq_err_node);
-                       /* to avoid the generation of bogus flush CQEs */
-                       reset_queue_map(&my_qp->rq_map);
-               }
-       } else if (init_attr->srq && !is_user) {
-               /* this is a base QP, use the queue map of the SRQ */
-               my_qp->rq_map = my_srq->rq_map;
-               INIT_LIST_HEAD(&my_qp->rq_err_node);
-
-               my_qp->ipz_rqueue = my_srq->ipz_rqueue;
-       }
-
-       if (is_srq) {
-               my_qp->ib_srq.pd = &my_pd->ib_pd;
-               my_qp->ib_srq.device = my_pd->ib_pd.device;
-
-               my_qp->ib_srq.srq_context = init_attr->qp_context;
-               my_qp->ib_srq.event_handler = init_attr->event_handler;
-       } else {
-               my_qp->ib_qp.qp_num = ib_qp_num;
-               my_qp->ib_qp.pd = &my_pd->ib_pd;
-               my_qp->ib_qp.device = my_pd->ib_pd.device;
-
-               my_qp->ib_qp.recv_cq = init_attr->recv_cq;
-               my_qp->ib_qp.send_cq = init_attr->send_cq;
-
-               my_qp->ib_qp.qp_type = qp_type;
-               my_qp->ib_qp.srq = init_attr->srq;
-
-               my_qp->ib_qp.qp_context = init_attr->qp_context;
-               my_qp->ib_qp.event_handler = init_attr->event_handler;
-       }
-
-       init_attr->cap.max_inline_data = 0; /* not supported yet */
-       init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
-       init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
-       init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
-       init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
-       my_qp->init_attr = *init_attr;
-
-       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-               shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-                       &my_qp->ib_qp;
-               if (ehca_nr_ports < 0) {
-                       /* alloc array to cache subsequent modify qp parms
-                        * for autodetect mode
-                        */
-                       my_qp->mod_qp_parm =
-                               kzalloc(EHCA_MOD_QP_PARM_MAX *
-                                       sizeof(*my_qp->mod_qp_parm),
-                                       GFP_KERNEL);
-                       if (!my_qp->mod_qp_parm) {
-                               ehca_err(pd->device,
-                                        "Could not alloc mod_qp_parm");
-                               goto create_qp_exit5;
-                       }
-               }
-       }
-
-       /* NOTE: define_apq0() not supported yet */
-       if (qp_type == IB_QPT_GSI) {
-               h_ret = ehca_define_sqp(shca, my_qp, init_attr);
-               if (h_ret != H_SUCCESS) {
-                       kfree(my_qp->mod_qp_parm);
-                       my_qp->mod_qp_parm = NULL;
-                       /* the QP pointer is no longer valid */
-                       shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-                               NULL;
-                       ret = ehca2ib_return_code(h_ret);
-                       goto create_qp_exit6;
-               }
-       }
-
-       if (my_qp->send_cq) {
-               ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
-               if (ret) {
-                       ehca_err(pd->device,
-                                "Couldn't assign qp to send_cq ret=%i", ret);
-                       goto create_qp_exit7;
-               }
-       }
-
-       /* copy queues, galpa data to user space */
-       if (context && udata) {
-               struct ehca_create_qp_resp resp;
-               memset(&resp, 0, sizeof(resp));
-
-               resp.qp_num = my_qp->real_qp_num;
-               resp.token = my_qp->token;
-               resp.qp_type = my_qp->qp_type;
-               resp.ext_type = my_qp->ext_type;
-               resp.qkey = my_qp->qkey;
-               resp.real_qp_num = my_qp->real_qp_num;
-
-               if (HAS_SQ(my_qp))
-                       queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
-               if (HAS_RQ(my_qp))
-                       queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
-               resp.fw_handle_ofs = (u32)
-                       (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
-
-               if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
-                       ehca_err(pd->device, "Copy to udata failed");
-                       ret = -EINVAL;
-                       goto create_qp_exit8;
-               }
-       }
-
-       return my_qp;
-
-create_qp_exit8:
-       ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
-
-create_qp_exit7:
-       kfree(my_qp->mod_qp_parm);
-
-create_qp_exit6:
-       if (HAS_RQ(my_qp) && !is_user)
-               vfree(my_qp->rq_map.map);
-
-create_qp_exit5:
-       if (HAS_RQ(my_qp))
-               ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-
-create_qp_exit4:
-       if (HAS_SQ(my_qp) && !is_user)
-               vfree(my_qp->sq_map.map);
-
-create_qp_exit3:
-       if (HAS_SQ(my_qp))
-               ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-
-create_qp_exit2:
-       hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-
-create_qp_exit1:
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-       idr_remove(&ehca_qp_idr, my_qp->token);
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-create_qp_exit0:
-       kmem_cache_free(qp_cache, my_qp);
-       atomic_dec(&shca->num_qps);
-       return ERR_PTR(ret);
-}
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-                            struct ib_qp_init_attr *qp_init_attr,
-                            struct ib_udata *udata)
-{
-       struct ehca_qp *ret;
-
-       ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
-       return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-                              struct ib_uobject *uobject);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-                              struct ib_srq_init_attr *srq_init_attr,
-                              struct ib_udata *udata)
-{
-       struct ib_qp_init_attr qp_init_attr;
-       struct ehca_qp *my_qp;
-       struct ib_srq *ret;
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-       struct hcp_modify_qp_control_block *mqpcb;
-       u64 hret, update_mask;
-
-       if (srq_init_attr->srq_type != IB_SRQT_BASIC)
-               return ERR_PTR(-ENOSYS);
-
-       /* For common attributes, internal_create_qp() takes its info
-        * out of qp_init_attr, so copy all common attrs there.
-        */
-       memset(&qp_init_attr, 0, sizeof(qp_init_attr));
-       qp_init_attr.event_handler = srq_init_attr->event_handler;
-       qp_init_attr.qp_context = srq_init_attr->srq_context;
-       qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       qp_init_attr.qp_type = IB_QPT_RC;
-       qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
-       qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
-
-       my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
-       if (IS_ERR(my_qp))
-               return (struct ib_srq *)my_qp;
-
-       /* copy back return values */
-       srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
-       srq_init_attr->attr.max_sge = 3;
-
-       /* drive SRQ into RTR state */
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!mqpcb) {
-               ehca_err(pd->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-               ret = ERR_PTR(-ENOMEM);
-               goto create_srq1;
-       }
-
-       mqpcb->qp_state = EHCA_QPS_INIT;
-       mqpcb->prim_phys_port = 1;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not modify SRQ to INIT "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       mqpcb->qp_enable = 1;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not enable SRQ "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       mqpcb->qp_state  = EHCA_QPS_RTR;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not modify SRQ to RTR "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return &my_qp->ib_srq;
-
-create_srq2:
-       ret = ERR_PTR(ehca2ib_return_code(hret));
-       ehca_free_fw_ctrlblock(mqpcb);
-
-create_srq1:
-       internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
-
-       return ret;
-}
-
-/*
- * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
- * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
- * returns total number of bad wqes in bad_wqe_cnt
- */
-static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
-                          int *bad_wqe_cnt)
-{
-       u64 h_ret;
-       struct ipz_queue *squeue;
-       void *bad_send_wqe_p, *bad_send_wqe_v;
-       u64 q_ofs;
-       struct ehca_wqe *wqe;
-       int qp_num = my_qp->ib_qp.qp_num;
-
-       /* get send wqe pointer */
-       h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-                                          my_qp->ipz_qp_handle, &my_qp->pf,
-                                          &bad_send_wqe_p, NULL, 2);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
-                        " ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, qp_num, h_ret);
-               return ehca2ib_return_code(h_ret);
-       }
-       bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
-       ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
-                qp_num, bad_send_wqe_p);
-       /* convert wqe pointer to vadr */
-       bad_send_wqe_v = __va((u64)bad_send_wqe_p);
-       if (ehca_debug_level >= 2)
-               ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
-       squeue = &my_qp->ipz_squeue;
-       if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
-               ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
-                        " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
-               return -EFAULT;
-       }
-
-       /* loop sets wqe's purge bit */
-       wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-       *bad_wqe_cnt = 0;
-       while (wqe->optype != 0xff && wqe->wqef != 0xff) {
-               if (ehca_debug_level >= 2)
-                       ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
-               wqe->nr_of_data_seg = 0; /* suppress data access */
-               wqe->wqef = WQEF_PURGE; /* WQE to be purged */
-               q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
-               wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-               *bad_wqe_cnt = (*bad_wqe_cnt)+1;
-       }
-       /*
-        * bad wqe will be reprocessed and ignored when pol_cq() is called,
-        *  i.e. nr of wqes with flush error status is one less
-        */
-       ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
-                qp_num, (*bad_wqe_cnt)-1);
-       wqe->wqef = 0;
-
-       return 0;
-}
-
-static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
-                         struct ehca_queue_map *qmap)
-{
-       void *wqe_v;
-       u64 q_ofs;
-       u32 wqe_idx;
-       unsigned int tail_idx;
-
-       /* convert real to abs address */
-       wqe_p = wqe_p & (~(1UL << 63));
-
-       wqe_v = __va(wqe_p);
-
-       if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
-               ehca_gen_err("Invalid offset for calculating left cqes "
-                               "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v);
-               return -EFAULT;
-       }
-
-       tail_idx = next_index(qmap->tail, qmap->entries);
-       wqe_idx = q_ofs / ipz_queue->qe_size;
-
-       /* check all processed wqes, whether a cqe is requested or not */
-       while (tail_idx != wqe_idx) {
-               if (qmap->map[tail_idx].cqe_req)
-                       qmap->left_to_poll++;
-               tail_idx = next_index(tail_idx, qmap->entries);
-       }
-       /* save index in queue, where we have to start flushing */
-       qmap->next_wqe_idx = wqe_idx;
-       return 0;
-}
-
-static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
-{
-       u64 h_ret;
-       void *send_wqe_p, *recv_wqe_p;
-       int ret;
-       unsigned long flags;
-       int qp_num = my_qp->ib_qp.qp_num;
-
-       /* this hcall is not supported on base QPs */
-       if (my_qp->ext_type != EQPT_SRQBASE) {
-               /* get send and receive wqe pointer */
-               h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle, &my_qp->pf,
-                               &send_wqe_p, &recv_wqe_p, 4);
-               if (h_ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device, "disable_and_get_wqe() "
-                                "failed ehca_qp=%p qp_num=%x h_ret=%lli",
-                                my_qp, qp_num, h_ret);
-                       return ehca2ib_return_code(h_ret);
-               }
-
-               /*
-                * acquire lock to ensure that nobody is polling the cq which
-                * could mean that the qmap->tail pointer is in an
-                * inconsistent state.
-                */
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
-                               &my_qp->sq_map);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-               if (ret)
-                       return ret;
-
-
-               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-               ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
-                               &my_qp->rq_map);
-               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-               if (ret)
-                       return ret;
-       } else {
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               my_qp->sq_map.left_to_poll = 0;
-               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-                                                       my_qp->sq_map.entries);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-               my_qp->rq_map.left_to_poll = 0;
-               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-                                                       my_qp->rq_map.entries);
-               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-       }
-
-       /* this assures flush cqes being generated only for pending wqes */
-       if ((my_qp->sq_map.left_to_poll == 0) &&
-                               (my_qp->rq_map.left_to_poll == 0)) {
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               ehca_add_to_err_list(my_qp, 1);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-               if (HAS_RQ(my_qp)) {
-                       spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-                       ehca_add_to_err_list(my_qp, 0);
-                       spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
-                                       flags);
-               }
-       }
-
-       return 0;
-}
-
-/*
- * internal_modify_qp with circumvention to handle aqp0 properly
- * smi_reset2init indicates if this is an internal reset-to-init-call for
- * smi. This flag must always be zero if called from ehca_modify_qp()!
- * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
- */
-static int internal_modify_qp(struct ib_qp *ibqp,
-                             struct ib_qp_attr *attr,
-                             int attr_mask, int smi_reset2init)
-{
-       enum ib_qp_state qp_cur_state, qp_new_state;
-       int cnt, qp_attr_idx, ret = 0;
-       enum ib_qp_statetrans statetrans;
-       struct hcp_modify_qp_control_block *mqpcb;
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca =
-               container_of(ibqp->pd->device, struct ehca_shca, ib_device);
-       u64 update_mask;
-       u64 h_ret;
-       int bad_wqe_cnt = 0;
-       int is_user = 0;
-       int squeue_locked = 0;
-       unsigned long flags = 0;
-
-       /* do query_qp to obtain current attr values */
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!mqpcb) {
-               ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               mqpcb, my_qp->galpas.kernel);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(ibqp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, ibqp->qp_num, h_ret);
-               ret = ehca2ib_return_code(h_ret);
-               goto modify_qp_exit1;
-       }
-       if (ibqp->uobject)
-               is_user = 1;
-
-       qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
-
-       if (qp_cur_state == -EINVAL) {  /* invalid qp state */
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        mqpcb->qp_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-       /*
-        * circumvention to set aqp0 initial state to init
-        * as expected by IB spec
-        */
-       if (smi_reset2init == 0 &&
-           ibqp->qp_type == IB_QPT_SMI &&
-           qp_cur_state == IB_QPS_RESET &&
-           (attr_mask & IB_QP_STATE) &&
-           attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
-               struct ib_qp_attr smiqp_attr = {
-                       .qp_state = IB_QPS_INIT,
-                       .port_num = my_qp->init_attr.port_num,
-                       .pkey_index = 0,
-                       .qkey = 0
-               };
-               int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
-                       IB_QP_PKEY_INDEX | IB_QP_QKEY;
-               int smirc = internal_modify_qp(
-                       ibqp, &smiqp_attr, smiqp_attr_mask, 1);
-               if (smirc) {
-                       ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
-                                "ehca_modify_qp() rc=%i", smirc);
-                       ret = H_PARAMETER;
-                       goto modify_qp_exit1;
-               }
-               qp_cur_state = IB_QPS_INIT;
-               ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
-       }
-       /* is transmitted current state  equal to "real" current state */
-       if ((attr_mask & IB_QP_CUR_STATE) &&
-           qp_cur_state != attr->cur_qp_state) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device,
-                        "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
-                        " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
-                        attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
-                "new qp_state=%x attribute_mask=%x",
-                my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
-
-       qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
-       if (!smi_reset2init &&
-           !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
-                               attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device,
-                        "Invalid qp transition new_state=%x cur_state=%x "
-                        "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
-                        qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
-               goto modify_qp_exit1;
-       }
-
-       mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
-       if (mqpcb->qp_state)
-               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       else {
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "Invalid new qp state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        qp_new_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       /* retrieve state transition struct to get req and opt attrs */
-       statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
-       if (statetrans < 0) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
-                        "new_qp_state=%x State_xsition=%x ehca_qp=%p "
-                        "qp_num=%x", qp_cur_state, qp_new_state,
-                        statetrans, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
-
-       if (qp_attr_idx < 0) {
-               ret = qp_attr_idx;
-               ehca_err(ibqp->device,
-                        "Invalid QP type=%x ehca_qp=%p qp_num=%x",
-                        ibqp->qp_type, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       ehca_dbg(ibqp->device,
-                "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
-                my_qp, ibqp->qp_num, statetrans);
-
-       /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
-        * in non-LL UD QPs.
-        */
-       if ((my_qp->qp_type == IB_QPT_UD) &&
-           (my_qp->ext_type != EQPT_LLQP) &&
-           (statetrans == IB_QPST_INIT2RTR) &&
-           (shca->hw_level >= 0x22)) {
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-               mqpcb->send_grh_flag = 1;
-       }
-
-       /* sqe -> rts: set purge bit of bad wqe before actual trans */
-       if ((my_qp->qp_type == IB_QPT_UD ||
-            my_qp->qp_type == IB_QPT_GSI ||
-            my_qp->qp_type == IB_QPT_SMI) &&
-           statetrans == IB_QPST_SQE2RTS) {
-               /* mark next free wqe if kernel */
-               if (!ibqp->uobject) {
-                       struct ehca_wqe *wqe;
-                       /* lock send queue */
-                       spin_lock_irqsave(&my_qp->spinlock_s, flags);
-                       squeue_locked = 1;
-                       /* mark next free wqe */
-                       wqe = (struct ehca_wqe *)
-                               ipz_qeit_get(&my_qp->ipz_squeue);
-                       wqe->optype = wqe->wqef = 0xff;
-                       ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
-                                ibqp->qp_num, wqe);
-               }
-               ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
-               if (ret) {
-                       ehca_err(ibqp->device, "prepare_sqe_rts() failed "
-                                "ehca_qp=%p qp_num=%x ret=%i",
-                                my_qp, ibqp->qp_num, ret);
-                       goto modify_qp_exit2;
-               }
-       }
-
-       /*
-        * enable RDMA_Atomic_Control if reset->init und reliable con
-        * this is necessary since gen2 does not provide that flag,
-        * but pHyp requires it
-        */
-       if (statetrans == IB_QPST_RESET2INIT &&
-           (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
-               mqpcb->rdma_atomic_ctrl = 3;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
-       }
-       /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
-       if (statetrans == IB_QPST_INIT2RTR &&
-           (ibqp->qp_type == IB_QPT_UC) &&
-           !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
-               mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX) {
-               if (attr->pkey_index >= 16) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid pkey_index=%x. "
-                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
-                                attr->pkey_index, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->prim_p_key_idx = attr->pkey_index;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
-       }
-       if (attr_mask & IB_QP_PORT) {
-               struct ehca_sport *sport;
-               struct ehca_qp *aqp1;
-               if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid port=%x. "
-                                "ehca_qp=%p qp_num=%x num_ports=%x",
-                                attr->port_num, my_qp, ibqp->qp_num,
-                                shca->num_ports);
-                       goto modify_qp_exit2;
-               }
-               sport = &shca->sport[attr->port_num - 1];
-               if (!sport->ibqp_sqp[IB_QPT_GSI]) {
-                       /* should not occur */
-                       ret = -EFAULT;
-                       ehca_err(ibqp->device, "AQP1 was not created for "
-                                "port=%x", attr->port_num);
-                       goto modify_qp_exit2;
-               }
-               aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
-                                   struct ehca_qp, ib_qp);
-               if (ibqp->qp_type != IB_QPT_GSI &&
-                   ibqp->qp_type != IB_QPT_SMI &&
-                   aqp1->mod_qp_parm) {
-                       /*
-                        * firmware will reject this modify_qp() because
-                        * port is not activated/initialized fully
-                        */
-                       ret = -EFAULT;
-                       ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
-                                 "either port is being activated (try again) "
-                                 "or cabling issue", attr->port_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->prim_phys_port = attr->port_num;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
-       }
-       if (attr_mask & IB_QP_QKEY) {
-               mqpcb->qkey = attr->qkey;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
-       }
-       if (attr_mask & IB_QP_AV) {
-               mqpcb->dlid = attr->ah_attr.dlid;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
-               mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
-               mqpcb->service_level = attr->ah_attr.sl;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
-
-               if (ehca_calc_ipd(shca, mqpcb->prim_phys_port,
-                                 attr->ah_attr.static_rate,
-                                 &mqpcb->max_static_rate)) {
-                       ret = -EINVAL;
-                       goto modify_qp_exit2;
-               }
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
-
-               /*
-                * Always supply the GRH flag, even if it's zero, to give the
-                * hypervisor a clear "yes" or "no" instead of a "perhaps"
-                */
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-
-               /*
-                * only if GRH is TRUE we might consider SOURCE_GID_IDX
-                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-                */
-               if (attr->ah_attr.ah_flags == IB_AH_GRH) {
-                       mqpcb->send_grh_flag = 1;
-
-                       mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
-
-                       for (cnt = 0; cnt < 16; cnt++)
-                               mqpcb->dest_gid.byte[cnt] =
-                                       attr->ah_attr.grh.dgid.raw[cnt];
-
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
-                       mqpcb->flow_label = attr->ah_attr.grh.flow_label;
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
-                       mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
-                       mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
-               }
-       }
-
-       if (attr_mask & IB_QP_PATH_MTU) {
-               /* store ld(MTU) */
-               my_qp->mtu_shift = attr->path_mtu + 7;
-               mqpcb->path_mtu = attr->path_mtu;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
-       }
-       if (attr_mask & IB_QP_TIMEOUT) {
-               mqpcb->timeout = attr->timeout;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
-       }
-       if (attr_mask & IB_QP_RETRY_CNT) {
-               mqpcb->retry_count = attr->retry_cnt;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
-       }
-       if (attr_mask & IB_QP_RNR_RETRY) {
-               mqpcb->rnr_retry_count = attr->rnr_retry;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
-       }
-       if (attr_mask & IB_QP_RQ_PSN) {
-               mqpcb->receive_psn = attr->rq_psn;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
-       }
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-               mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
-                       attr->max_dest_rd_atomic : 2;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-       }
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-               mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
-                       attr->max_rd_atomic : 2;
-               update_mask |=
-                       EHCA_BMASK_SET
-                       (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
-       }
-       if (attr_mask & IB_QP_ALT_PATH) {
-               if (attr->alt_port_num < 1
-                   || attr->alt_port_num > shca->num_ports) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid alt_port=%x. "
-                                "ehca_qp=%p qp_num=%x num_ports=%x",
-                                attr->alt_port_num, my_qp, ibqp->qp_num,
-                                shca->num_ports);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->alt_phys_port = attr->alt_port_num;
-
-               if (attr->alt_pkey_index >= 16) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
-                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
-                                attr->pkey_index, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->alt_p_key_idx = attr->alt_pkey_index;
-
-               mqpcb->timeout_al = attr->alt_timeout;
-               mqpcb->dlid_al = attr->alt_ah_attr.dlid;
-               mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
-               mqpcb->service_level_al = attr->alt_ah_attr.sl;
-
-               if (ehca_calc_ipd(shca, mqpcb->alt_phys_port,
-                                 attr->alt_ah_attr.static_rate,
-                                 &mqpcb->max_static_rate_al)) {
-                       ret = -EINVAL;
-                       goto modify_qp_exit2;
-               }
-
-               /* OpenIB doesn't support alternate retry counts - copy them */
-               mqpcb->retry_count_al = mqpcb->retry_count;
-               mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
-
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
-
-               /*
-                * Always supply the GRH flag, even if it's zero, to give the
-                * hypervisor a clear "yes" or "no" instead of a "perhaps"
-                */
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
-
-               /*
-                * only if GRH is TRUE we might consider SOURCE_GID_IDX
-                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-                */
-               if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
-                       mqpcb->send_grh_flag_al = 1;
-
-                       for (cnt = 0; cnt < 16; cnt++)
-                               mqpcb->dest_gid_al.byte[cnt] =
-                                       attr->alt_ah_attr.grh.dgid.raw[cnt];
-                       mqpcb->source_gid_idx_al =
-                               attr->alt_ah_attr.grh.sgid_index;
-                       mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
-                       mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
-                       mqpcb->traffic_class_al =
-                               attr->alt_ah_attr.grh.traffic_class;
-
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
-                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
-               }
-       }
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER) {
-               mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
-       }
-
-       if (attr_mask & IB_QP_SQ_PSN) {
-               mqpcb->send_psn = attr->sq_psn;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
-       }
-
-       if (attr_mask & IB_QP_DEST_QPN) {
-               mqpcb->dest_qp_nr = attr->dest_qp_num;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
-       }
-
-       if (attr_mask & IB_QP_PATH_MIG_STATE) {
-               if (attr->path_mig_state != IB_MIG_REARM
-                   && attr->path_mig_state != IB_MIG_MIGRATED) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid mig_state=%x",
-                                attr->path_mig_state);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->path_migration_state = attr->path_mig_state + 1;
-               if (attr->path_mig_state == IB_MIG_REARM)
-                       my_qp->mig_armed = 1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
-       }
-
-       if (attr_mask & IB_QP_CAP) {
-               mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
-               mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
-               /* no support for max_send/recv_sge yet */
-       }
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
-
-       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                                my_qp->ipz_qp_handle,
-                                &my_qp->pf,
-                                update_mask,
-                                mqpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
-               goto modify_qp_exit2;
-       }
-
-       if ((my_qp->qp_type == IB_QPT_UD ||
-            my_qp->qp_type == IB_QPT_GSI ||
-            my_qp->qp_type == IB_QPT_SMI) &&
-           statetrans == IB_QPST_SQE2RTS) {
-               /* doorbell to reprocessing wqes */
-               iosync(); /* serialize GAL register access */
-               hipz_update_sqa(my_qp, bad_wqe_cnt-1);
-               ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
-       }
-
-       if (statetrans == IB_QPST_RESET2INIT ||
-           statetrans == IB_QPST_INIT2INIT) {
-               mqpcb->qp_enable = 1;
-               mqpcb->qp_state = EHCA_QPS_INIT;
-               update_mask = 0;
-               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-
-               h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                                        my_qp->ipz_qp_handle,
-                                        &my_qp->pf,
-                                        update_mask,
-                                        mqpcb,
-                                        my_qp->galpas.kernel);
-
-               if (h_ret != H_SUCCESS) {
-                       ret = ehca2ib_return_code(h_ret);
-                       ehca_err(ibqp->device, "ENABLE in context of "
-                                "RESET_2_INIT failed! Maybe you didn't get "
-                                "a LID h_ret=%lli ehca_qp=%p qp_num=%x",
-                                h_ret, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-       }
-       if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
-           && !is_user) {
-               ret = check_for_left_cqes(my_qp, shca);
-               if (ret)
-                       goto modify_qp_exit2;
-       }
-
-       if (statetrans == IB_QPST_ANY2RESET) {
-               ipz_qeit_reset(&my_qp->ipz_rqueue);
-               ipz_qeit_reset(&my_qp->ipz_squeue);
-
-               if (qp_cur_state == IB_QPS_ERR && !is_user) {
-                       del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-                       if (HAS_RQ(my_qp))
-                               del_from_err_list(my_qp->recv_cq,
-                                                 &my_qp->rq_err_node);
-               }
-               if (!is_user)
-                       reset_queue_map(&my_qp->sq_map);
-
-               if (HAS_RQ(my_qp) && !is_user)
-                       reset_queue_map(&my_qp->rq_map);
-       }
-
-       if (attr_mask & IB_QP_QKEY)
-               my_qp->qkey = attr->qkey;
-
-modify_qp_exit2:
-       if (squeue_locked) { /* this means: sqe -> rts */
-               spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-               my_qp->sqerr_purgeflag = 1;
-       }
-
-modify_qp_exit1:
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return ret;
-}
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-                  struct ib_udata *udata)
-{
-       int ret = 0;
-
-       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-                                             ib_device);
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-
-       /* The if-block below caches qp_attr to be modified for GSI and SMI
-        * qps during the initialization by ib_mad. When the respective port
-        * is activated, ie we got an event PORT_ACTIVE, we'll replay the
-        * cached modify calls sequence, see ehca_recover_sqs() below.
-        * Why that is required:
-        * 1) If one port is connected, older code requires that port one
-        *    to be connected and module option nr_ports=1 to be given by
-        *    user, which is very inconvenient for end user.
-        * 2) Firmware accepts modify_qp() only if respective port has become
-        *    active. Older code had a wait loop of 30sec create_qp()/
-        *    define_aqp1(), which is not appropriate in practice. This
-        *    code now removes that wait loop, see define_aqp1(), and always
-        *    reports all ports to ib_mad resp. users. Only activated ports
-        *    will then usable for the users.
-        */
-       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
-               int port = my_qp->init_attr.port_num;
-               struct ehca_sport *sport = &shca->sport[port - 1];
-               unsigned long flags;
-               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-               /* cache qp_attr only during init */
-               if (my_qp->mod_qp_parm) {
-                       struct ehca_mod_qp_parm *p;
-                       if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
-                               ehca_err(&shca->ib_device,
-                                        "mod_qp_parm overflow state=%x port=%x"
-                                        " type=%x", attr->qp_state,
-                                        my_qp->init_attr.port_num,
-                                        ibqp->qp_type);
-                               spin_unlock_irqrestore(&sport->mod_sqp_lock,
-                                                      flags);
-                               return -EINVAL;
-                       }
-                       p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
-                       p->mask = attr_mask;
-                       p->attr = *attr;
-                       my_qp->mod_qp_parm_idx++;
-                       ehca_dbg(&shca->ib_device,
-                                "Saved qp_attr for state=%x port=%x type=%x",
-                                attr->qp_state, my_qp->init_attr.port_num,
-                                ibqp->qp_type);
-                       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-                       goto out;
-               }
-               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-       }
-
-       ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
-
-out:
-       if ((ret == 0) && (attr_mask & IB_QP_STATE))
-               my_qp->state = attr->qp_state;
-
-       return ret;
-}
-
-void ehca_recover_sqp(struct ib_qp *sqp)
-{
-       struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
-       int port = my_sqp->init_attr.port_num;
-       struct ib_qp_attr attr;
-       struct ehca_mod_qp_parm *qp_parm;
-       int i, qp_parm_idx, ret;
-       unsigned long flags, wr_cnt;
-
-       if (!my_sqp->mod_qp_parm)
-               return;
-       ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
-
-       qp_parm = my_sqp->mod_qp_parm;
-       qp_parm_idx = my_sqp->mod_qp_parm_idx;
-       for (i = 0; i < qp_parm_idx; i++) {
-               attr = qp_parm[i].attr;
-               ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
-               if (ret) {
-                       ehca_err(sqp->device, "Could not modify SQP port=%x "
-                                "qp_num=%x ret=%x", port, sqp->qp_num, ret);
-                       goto free_qp_parm;
-               }
-               ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
-                        port, sqp->qp_num, attr.qp_state);
-       }
-
-       /* re-trigger posted recv wrs */
-       wr_cnt =  my_sqp->ipz_rqueue.current_q_offset /
-               my_sqp->ipz_rqueue.qe_size;
-       if (wr_cnt) {
-               spin_lock_irqsave(&my_sqp->spinlock_r, flags);
-               hipz_update_rqa(my_sqp, wr_cnt);
-               spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
-               ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
-                        port, sqp->qp_num, wr_cnt);
-       }
-
-free_qp_parm:
-       kfree(qp_parm);
-       /* this prevents subsequent calls to modify_qp() to cache qp_attr */
-       my_sqp->mod_qp_parm = NULL;
-}
-
-int ehca_query_qp(struct ib_qp *qp,
-                 struct ib_qp_attr *qp_attr,
-                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       struct hcp_modify_qp_control_block *qpcb;
-       int cnt, ret = 0;
-       u64 h_ret;
-
-       if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
-               ehca_err(qp->device, "Invalid attribute mask "
-                        "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
-                        my_qp, qp->qp_num, qp_attr_mask);
-               return -EINVAL;
-       }
-
-       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!qpcb) {
-               ehca_err(qp->device, "Out of memory for qpcb "
-                        "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(adapter_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               qpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(qp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, qp->qp_num, h_ret);
-               goto query_qp_exit1;
-       }
-
-       qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
-       qp_attr->qp_state = qp_attr->cur_qp_state;
-
-       if (qp_attr->cur_qp_state == -EINVAL) {
-               ret = -EINVAL;
-               ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        qpcb->qp_state, my_qp, qp->qp_num);
-               goto query_qp_exit1;
-       }
-
-       if (qp_attr->qp_state == IB_QPS_SQD)
-               qp_attr->sq_draining = 1;
-
-       qp_attr->qkey = qpcb->qkey;
-       qp_attr->path_mtu = qpcb->path_mtu;
-       qp_attr->path_mig_state = qpcb->path_migration_state - 1;
-       qp_attr->rq_psn = qpcb->receive_psn;
-       qp_attr->sq_psn = qpcb->send_psn;
-       qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
-       qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
-       qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
-       /* UD_AV CIRCUMVENTION */
-       if (my_qp->qp_type == IB_QPT_UD) {
-               qp_attr->cap.max_send_sge =
-                       qpcb->actual_nr_sges_in_sq_wqe - 2;
-               qp_attr->cap.max_recv_sge =
-                       qpcb->actual_nr_sges_in_rq_wqe - 2;
-       } else {
-               qp_attr->cap.max_send_sge =
-                       qpcb->actual_nr_sges_in_sq_wqe;
-               qp_attr->cap.max_recv_sge =
-                       qpcb->actual_nr_sges_in_rq_wqe;
-       }
-
-       qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
-       qp_attr->dest_qp_num = qpcb->dest_qp_nr;
-
-       qp_attr->pkey_index = qpcb->prim_p_key_idx;
-       qp_attr->port_num = qpcb->prim_phys_port;
-       qp_attr->timeout = qpcb->timeout;
-       qp_attr->retry_cnt = qpcb->retry_count;
-       qp_attr->rnr_retry = qpcb->rnr_retry_count;
-
-       qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
-       qp_attr->alt_port_num = qpcb->alt_phys_port;
-       qp_attr->alt_timeout = qpcb->timeout_al;
-
-       qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
-       qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
-
-       /* primary av */
-       qp_attr->ah_attr.sl = qpcb->service_level;
-
-       if (qpcb->send_grh_flag) {
-               qp_attr->ah_attr.ah_flags = IB_AH_GRH;
-       }
-
-       qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
-       qp_attr->ah_attr.dlid = qpcb->dlid;
-       qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
-       qp_attr->ah_attr.port_num = qp_attr->port_num;
-
-       /* primary GRH */
-       qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
-       qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
-       qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
-       qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
-
-       for (cnt = 0; cnt < 16; cnt++)
-               qp_attr->ah_attr.grh.dgid.raw[cnt] =
-                       qpcb->dest_gid.byte[cnt];
-
-       /* alternate AV */
-       qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
-       if (qpcb->send_grh_flag_al) {
-               qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
-       }
-
-       qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
-       qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
-       qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
-
-       /* alternate GRH */
-       qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
-       qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
-       qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
-       qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
-
-       for (cnt = 0; cnt < 16; cnt++)
-               qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
-                       qpcb->dest_gid_al.byte[cnt];
-
-       /* return init attributes given in ehca_create_qp */
-       if (qp_init_attr)
-               *qp_init_attr = my_qp->init_attr;
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
-
-query_qp_exit1:
-       ehca_free_fw_ctrlblock(qpcb);
-
-       return ret;
-}
-
-int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
-{
-       struct ehca_qp *my_qp =
-               container_of(ibsrq, struct ehca_qp, ib_srq);
-       struct ehca_shca *shca =
-               container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
-       struct hcp_modify_qp_control_block *mqpcb;
-       u64 update_mask;
-       u64 h_ret;
-       int ret = 0;
-
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!mqpcb) {
-               ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-               return -ENOMEM;
-       }
-
-       update_mask = 0;
-       if (attr_mask & IB_SRQ_LIMIT) {
-               attr_mask &= ~IB_SRQ_LIMIT;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
-               mqpcb->curr_srq_limit = attr->srq_limit;
-               mqpcb->qp_aff_asyn_ev_log_reg =
-                       EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
-       }
-
-       /* by now, all bits in attr_mask should have been cleared */
-       if (attr_mask) {
-               ehca_err(ibsrq->device, "invalid attribute mask bits set  "
-                        "attr_mask=%x", attr_mask);
-               ret = -EINVAL;
-               goto modify_srq_exit0;
-       }
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
-                                NULL, update_mask, mqpcb,
-                                my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x",
-                        h_ret, my_qp, my_qp->real_qp_num);
-       }
-
-modify_srq_exit0:
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return ret;
-}
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
-{
-       struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
-       struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       struct hcp_modify_qp_control_block *qpcb;
-       int ret = 0;
-       u64 h_ret;
-
-       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!qpcb) {
-               ehca_err(srq->device, "Out of memory for qpcb "
-                        "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
-                               NULL, qpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(srq->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, h_ret);
-               goto query_srq_exit1;
-       }
-
-       srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
-       srq_attr->max_sge = 3;
-       srq_attr->srq_limit = qpcb->curr_srq_limit;
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-query_srq_exit1:
-       ehca_free_fw_ctrlblock(qpcb);
-
-       return ret;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-                              struct ib_uobject *uobject)
-{
-       struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
-       struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-                                            ib_pd);
-       struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
-       u32 qp_num = my_qp->real_qp_num;
-       int ret;
-       u64 h_ret;
-       u8 port_num;
-       int is_user = 0;
-       enum ib_qp_type qp_type;
-       unsigned long flags;
-
-       if (uobject) {
-               is_user = 1;
-               if (my_qp->mm_count_galpa ||
-                   my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
-                       ehca_err(dev, "Resources still referenced in "
-                                "user space qp_num=%x", qp_num);
-                       return -EINVAL;
-               }
-       }
-
-       if (my_qp->send_cq) {
-               ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
-               if (ret) {
-                       ehca_err(dev, "Couldn't unassign qp from "
-                                "send_cq ret=%i qp_num=%x cq_num=%x", ret,
-                                qp_num, my_qp->send_cq->cq_number);
-                       return ret;
-               }
-       }
-
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-       idr_remove(&ehca_qp_idr, my_qp->token);
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-       /*
-        * SRQs will never get into an error list and do not have a recv_cq,
-        * so we need to skip them here.
-        */
-       if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
-               del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
-
-       if (HAS_SQ(my_qp) && !is_user)
-               del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-       /* now wait until all pending events have completed */
-       wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
-
-       h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
-               return ehca2ib_return_code(h_ret);
-       }
-
-       port_num = my_qp->init_attr.port_num;
-       qp_type  = my_qp->init_attr.qp_type;
-
-       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-               kfree(my_qp->mod_qp_parm);
-               my_qp->mod_qp_parm = NULL;
-               shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
-               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-       }
-
-       /* no support for IB_QPT_SMI yet */
-       if (qp_type == IB_QPT_GSI) {
-               struct ib_event event;
-               ehca_info(dev, "device %s: port %x is inactive.",
-                               shca->ib_device.name, port_num);
-               event.device = &shca->ib_device;
-               event.event = IB_EVENT_PORT_ERR;
-               event.element.port_num = port_num;
-               shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
-               ib_dispatch_event(&event);
-       }
-
-       if (HAS_RQ(my_qp)) {
-               ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-               if (!is_user)
-                       vfree(my_qp->rq_map.map);
-       }
-       if (HAS_SQ(my_qp)) {
-               ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-               if (!is_user)
-                       vfree(my_qp->sq_map.map);
-       }
-       kmem_cache_free(qp_cache, my_qp);
-       atomic_dec(&shca->num_qps);
-       return 0;
-}
-
-int ehca_destroy_qp(struct ib_qp *qp)
-{
-       return internal_destroy_qp(qp->device,
-                                  container_of(qp, struct ehca_qp, ib_qp),
-                                  qp->uobject);
-}
-
-int ehca_destroy_srq(struct ib_srq *srq)
-{
-       return internal_destroy_qp(srq->device,
-                                  container_of(srq, struct ehca_qp, ib_srq),
-                                  srq->uobject);
-}
-
-int ehca_init_qp_cache(void)
-{
-       qp_cache = kmem_cache_create("ehca_cache_qp",
-                                    sizeof(struct ehca_qp), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!qp_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_qp_cache(void)
-{
-       kmem_cache_destroy(qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
deleted file mode 100644 (file)
index 10e2074..0000000
+++ /dev/null
@@ -1,954 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  post_send/recv, poll_cq, req_notify
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-/* in RC traffic, insert an empty RDMA READ every this many packets */
-#define ACK_CIRC_THRESHOLD 2000000
-
-static u64 replace_wr_id(u64 wr_id, u16 idx)
-{
-       u64 ret;
-
-       ret = wr_id & ~QMAP_IDX_MASK;
-       ret |= idx & QMAP_IDX_MASK;
-
-       return ret;
-}
-
-static u16 get_app_wr_id(u64 wr_id)
-{
-       return wr_id & QMAP_IDX_MASK;
-}
-
-static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
-                                 struct ehca_wqe *wqe_p,
-                                 struct ib_recv_wr *recv_wr,
-                                 u32 rq_map_idx)
-{
-       u8 cnt_ds;
-       if (unlikely((recv_wr->num_sge < 0) ||
-                    (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
-               ehca_gen_err("Invalid number of WQE SGE. "
-                        "num_sqe=%x max_nr_of_sg=%x",
-                        recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
-               return -EINVAL; /* invalid SG list length */
-       }
-
-       /* clear wqe header until sglist */
-       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-       wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
-       wqe_p->nr_of_data_seg = recv_wr->num_sge;
-
-       for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
-               wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
-                       recv_wr->sg_list[cnt_ds].addr;
-               wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
-                       recv_wr->sg_list[cnt_ds].lkey;
-               wqe_p->u.all_rcv.sg_list[cnt_ds].length =
-                       recv_wr->sg_list[cnt_ds].length;
-       }
-
-       if (ehca_debug_level >= 3) {
-               ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
-                            ipz_rqueue);
-               ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
-       }
-
-       return 0;
-}
-
-#if defined(DEBUG_GSI_SEND_WR)
-
-/* need ib_mad struct */
-#include <rdma/ib_mad.h>
-
-static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
-{
-       int idx;
-       int j;
-       while (ud_wr) {
-               struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
-               struct ib_sge *sge = ud_wr->wr.sg_list;
-               ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
-                            "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
-                            ud_wr->wr.num_sge, ud_wr->wr.send_flags,
-                            ud_wr->.wr.opcode);
-               if (mad_hdr) {
-                       ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
-                                    "mgmt_class=%x class_version=%x method=%x "
-                                    "status=%x class_specific=%x tid=%lx "
-                                    "attr_id=%x resv=%x attr_mod=%x",
-                                    idx, mad_hdr->base_version,
-                                    mad_hdr->mgmt_class,
-                                    mad_hdr->class_version, mad_hdr->method,
-                                    mad_hdr->status, mad_hdr->class_specific,
-                                    mad_hdr->tid, mad_hdr->attr_id,
-                                    mad_hdr->resv,
-                                    mad_hdr->attr_mod);
-               }
-               for (j = 0; j < ud_wr->wr.num_sge; j++) {
-                       u8 *data = __va(sge->addr);
-                       ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
-                                    "lkey=%x",
-                                    idx, j, data, sge->length, sge->lkey);
-                       /* assume length is n*16 */
-                       ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
-                                idx, j);
-                       sge++;
-               } /* eof for j */
-               idx++;
-               ud_wr = ud_wr(ud_wr->wr.next);
-       } /* eof while ud_wr */
-}
-
-#endif /* DEBUG_GSI_SEND_WR */
-
-static inline int ehca_write_swqe(struct ehca_qp *qp,
-                                 struct ehca_wqe *wqe_p,
-                                 struct ib_send_wr *send_wr,
-                                 u32 sq_map_idx,
-                                 int hidden)
-{
-       u32 idx;
-       u64 dma_length;
-       struct ehca_av *my_av;
-       u32 remote_qkey;
-       struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
-
-       if (unlikely((send_wr->num_sge < 0) ||
-                    (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
-               ehca_gen_err("Invalid number of WQE SGE. "
-                        "num_sqe=%x max_nr_of_sg=%x",
-                        send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
-               return -EINVAL; /* invalid SG list length */
-       }
-
-       /* clear wqe header until sglist */
-       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-       wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
-
-       qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
-       qmap_entry->reported = 0;
-       qmap_entry->cqe_req = 0;
-
-       switch (send_wr->opcode) {
-       case IB_WR_SEND:
-       case IB_WR_SEND_WITH_IMM:
-               wqe_p->optype = WQE_OPTYPE_SEND;
-               break;
-       case IB_WR_RDMA_WRITE:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
-               break;
-       case IB_WR_RDMA_READ:
-               wqe_p->optype = WQE_OPTYPE_RDMAREAD;
-               break;
-       default:
-               ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
-               return -EINVAL; /* invalid opcode */
-       }
-
-       wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
-
-       wqe_p->wr_flag = 0;
-
-       if ((send_wr->send_flags & IB_SEND_SIGNALED ||
-           qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
-           && !hidden) {
-               wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
-               qmap_entry->cqe_req = 1;
-       }
-
-       if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
-           send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
-               /* this might not work as long as HW does not support it */
-               wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
-               wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
-       }
-
-       wqe_p->nr_of_data_seg = send_wr->num_sge;
-
-       switch (qp->qp_type) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               /* no break is intential here */
-       case IB_QPT_UD:
-               /* IB 1.2 spec C10-15 compliance */
-               remote_qkey = ud_wr(send_wr)->remote_qkey;
-               if (remote_qkey & 0x80000000)
-                       remote_qkey = qp->qkey;
-
-               wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
-               wqe_p->local_ee_context_qkey = remote_qkey;
-               if (unlikely(!ud_wr(send_wr)->ah)) {
-                       ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
-                       return -EINVAL;
-               }
-               if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
-                       ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
-                       return -EINVAL;
-               }
-               my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
-               wqe_p->u.ud_av.ud_av = my_av->av;
-
-               /*
-                * omitted check of IB_SEND_INLINE
-                * since HW does not support it
-                */
-               for (idx = 0; idx < send_wr->num_sge; idx++) {
-                       wqe_p->u.ud_av.sg_list[idx].vaddr =
-                               send_wr->sg_list[idx].addr;
-                       wqe_p->u.ud_av.sg_list[idx].lkey =
-                               send_wr->sg_list[idx].lkey;
-                       wqe_p->u.ud_av.sg_list[idx].length =
-                               send_wr->sg_list[idx].length;
-               } /* eof for idx */
-               if (qp->qp_type == IB_QPT_SMI ||
-                   qp->qp_type == IB_QPT_GSI)
-                       wqe_p->u.ud_av.ud_av.pmtu = 1;
-               if (qp->qp_type == IB_QPT_GSI) {
-                       wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
-#ifdef DEBUG_GSI_SEND_WR
-                       trace_ud_wr(ud_wr(send_wr));
-#endif /* DEBUG_GSI_SEND_WR */
-               }
-               break;
-
-       case IB_QPT_UC:
-               if (send_wr->send_flags & IB_SEND_FENCE)
-                       wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
-               /* no break is intentional here */
-       case IB_QPT_RC:
-               /* TODO: atomic not implemented */
-               wqe_p->u.nud.remote_virtual_address =
-                       rdma_wr(send_wr)->remote_addr;
-               wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
-
-               /*
-                * omitted checking of IB_SEND_INLINE
-                * since HW does not support it
-                */
-               dma_length = 0;
-               for (idx = 0; idx < send_wr->num_sge; idx++) {
-                       wqe_p->u.nud.sg_list[idx].vaddr =
-                               send_wr->sg_list[idx].addr;
-                       wqe_p->u.nud.sg_list[idx].lkey =
-                               send_wr->sg_list[idx].lkey;
-                       wqe_p->u.nud.sg_list[idx].length =
-                               send_wr->sg_list[idx].length;
-                       dma_length += send_wr->sg_list[idx].length;
-               } /* eof idx */
-               wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
-
-               /* unsolicited ack circumvention */
-               if (send_wr->opcode == IB_WR_RDMA_READ) {
-                       /* on RDMA read, switch on and reset counters */
-                       qp->message_count = qp->packet_count = 0;
-                       qp->unsol_ack_circ = 1;
-               } else
-                       /* else estimate #packets */
-                       qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
-
-               break;
-
-       default:
-               ehca_gen_err("Invalid qptype=%x", qp->qp_type);
-               return -EINVAL;
-       }
-
-       if (ehca_debug_level >= 3) {
-               ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
-               ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
-       }
-       return 0;
-}
-
-/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
-static inline void map_ib_wc_status(u32 cqe_status,
-                                   enum ib_wc_status *wc_status)
-{
-       if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
-               switch (cqe_status & 0x3F) {
-               case 0x01:
-               case 0x21:
-                       *wc_status = IB_WC_LOC_LEN_ERR;
-                       break;
-               case 0x02:
-               case 0x22:
-                       *wc_status = IB_WC_LOC_QP_OP_ERR;
-                       break;
-               case 0x03:
-               case 0x23:
-                       *wc_status = IB_WC_LOC_EEC_OP_ERR;
-                       break;
-               case 0x04:
-               case 0x24:
-                       *wc_status = IB_WC_LOC_PROT_ERR;
-                       break;
-               case 0x05:
-               case 0x25:
-                       *wc_status = IB_WC_WR_FLUSH_ERR;
-                       break;
-               case 0x06:
-                       *wc_status = IB_WC_MW_BIND_ERR;
-                       break;
-               case 0x07: /* remote error - look into bits 20:24 */
-                       switch ((cqe_status
-                                & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
-                       case 0x0:
-                               /*
-                                * PSN Sequence Error!
-                                * couldn't find a matching status!
-                                */
-                               *wc_status = IB_WC_GENERAL_ERR;
-                               break;
-                       case 0x1:
-                               *wc_status = IB_WC_REM_INV_REQ_ERR;
-                               break;
-                       case 0x2:
-                               *wc_status = IB_WC_REM_ACCESS_ERR;
-                               break;
-                       case 0x3:
-                               *wc_status = IB_WC_REM_OP_ERR;
-                               break;
-                       case 0x4:
-                               *wc_status = IB_WC_REM_INV_RD_REQ_ERR;
-                               break;
-                       }
-                       break;
-               case 0x08:
-                       *wc_status = IB_WC_RETRY_EXC_ERR;
-                       break;
-               case 0x09:
-                       *wc_status = IB_WC_RNR_RETRY_EXC_ERR;
-                       break;
-               case 0x0A:
-               case 0x2D:
-                       *wc_status = IB_WC_REM_ABORT_ERR;
-                       break;
-               case 0x0B:
-               case 0x2E:
-                       *wc_status = IB_WC_INV_EECN_ERR;
-                       break;
-               case 0x0C:
-               case 0x2F:
-                       *wc_status = IB_WC_INV_EEC_STATE_ERR;
-                       break;
-               case 0x0D:
-                       *wc_status = IB_WC_BAD_RESP_ERR;
-                       break;
-               case 0x10:
-                       /* WQE purged */
-                       *wc_status = IB_WC_WR_FLUSH_ERR;
-                       break;
-               default:
-                       *wc_status = IB_WC_FATAL_ERR;
-
-               }
-       } else
-               *wc_status = IB_WC_SUCCESS;
-}
-
-static inline int post_one_send(struct ehca_qp *my_qp,
-                        struct ib_send_wr *cur_send_wr,
-                        int hidden)
-{
-       struct ehca_wqe *wqe_p;
-       int ret;
-       u32 sq_map_idx;
-       u64 start_offset = my_qp->ipz_squeue.current_q_offset;
-
-       /* get pointer next to free WQE */
-       wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
-       if (unlikely(!wqe_p)) {
-               /* too many posted work requests: queue overflow */
-               ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
-                        "qp_num=%x", my_qp->ib_qp.qp_num);
-               return -ENOMEM;
-       }
-
-       /*
-        * Get the index of the WQE in the send queue. The same index is used
-        * for writing into the sq_map.
-        */
-       sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
-
-       /* write a SEND WQE into the QUEUE */
-       ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
-       /*
-        * if something failed,
-        * reset the free entry pointer to the start value
-        */
-       if (unlikely(ret)) {
-               my_qp->ipz_squeue.current_q_offset = start_offset;
-               ehca_err(my_qp->ib_qp.device, "Could not write WQE "
-                        "qp_num=%x", my_qp->ib_qp.qp_num);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-int ehca_post_send(struct ib_qp *qp,
-                  struct ib_send_wr *send_wr,
-                  struct ib_send_wr **bad_send_wr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-       int wqe_cnt = 0;
-       int ret = 0;
-       unsigned long flags;
-
-       /* Reject WR if QP is in RESET, INIT or RTR state */
-       if (unlikely(my_qp->state < IB_QPS_RTS)) {
-               ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-                        my_qp->state, qp->qp_num);
-               ret = -EINVAL;
-               goto out;
-       }
-
-       /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_s, flags);
-
-       /* Send an empty extra RDMA read if:
-        *  1) there has been an RDMA read on this connection before
-        *  2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
-        *  3) we can be sure that any previous extra RDMA read has been
-        *     processed so we don't overflow the SQ
-        */
-       if (unlikely(my_qp->unsol_ack_circ &&
-                    my_qp->packet_count > ACK_CIRC_THRESHOLD &&
-                    my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
-               /* insert an empty RDMA READ to fix up the remote QP state */
-               struct ib_send_wr circ_wr;
-               memset(&circ_wr, 0, sizeof(circ_wr));
-               circ_wr.opcode = IB_WR_RDMA_READ;
-               post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
-               wqe_cnt++;
-               ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
-               my_qp->message_count = my_qp->packet_count = 0;
-       }
-
-       /* loop processes list of send reqs */
-       while (send_wr) {
-               ret = post_one_send(my_qp, send_wr, 0);
-               if (unlikely(ret)) {
-                       goto post_send_exit0;
-               }
-               wqe_cnt++;
-               send_wr = send_wr->next;
-       }
-
-post_send_exit0:
-       iosync(); /* serialize GAL register access */
-       hipz_update_sqa(my_qp, wqe_cnt);
-       if (unlikely(ret || ehca_debug_level >= 2))
-               ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-                        my_qp, qp->qp_num, wqe_cnt, ret);
-       my_qp->message_count += wqe_cnt;
-       spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-
-out:
-       if (ret)
-               *bad_send_wr = send_wr;
-       return ret;
-}
-
-static int internal_post_recv(struct ehca_qp *my_qp,
-                             struct ib_device *dev,
-                             struct ib_recv_wr *recv_wr,
-                             struct ib_recv_wr **bad_recv_wr)
-{
-       struct ehca_wqe *wqe_p;
-       int wqe_cnt = 0;
-       int ret = 0;
-       u32 rq_map_idx;
-       unsigned long flags;
-       struct ehca_qmap_entry *qmap_entry;
-
-       if (unlikely(!HAS_RQ(my_qp))) {
-               ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
-                        my_qp, my_qp->real_qp_num, my_qp->ext_type);
-               ret = -ENODEV;
-               goto out;
-       }
-
-       /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_r, flags);
-
-       /* loop processes list of recv reqs */
-       while (recv_wr) {
-               u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
-               /* get pointer next to free WQE */
-               wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
-               if (unlikely(!wqe_p)) {
-                       /* too many posted work requests: queue overflow */
-                       ret = -ENOMEM;
-                       ehca_err(dev, "Too many posted WQEs "
-                               "qp_num=%x", my_qp->real_qp_num);
-                       goto post_recv_exit0;
-               }
-               /*
-                * Get the index of the WQE in the recv queue. The same index
-                * is used for writing into the rq_map.
-                */
-               rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
-
-               /* write a RECV WQE into the QUEUE */
-               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
-                               rq_map_idx);
-               /*
-                * if something failed,
-                * reset the free entry pointer to the start value
-                */
-               if (unlikely(ret)) {
-                       my_qp->ipz_rqueue.current_q_offset = start_offset;
-                       ret = -EINVAL;
-                       ehca_err(dev, "Could not write WQE "
-                               "qp_num=%x", my_qp->real_qp_num);
-                       goto post_recv_exit0;
-               }
-
-               qmap_entry = &my_qp->rq_map.map[rq_map_idx];
-               qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
-               qmap_entry->reported = 0;
-               qmap_entry->cqe_req = 1;
-
-               wqe_cnt++;
-               recv_wr = recv_wr->next;
-       } /* eof for recv_wr */
-
-post_recv_exit0:
-       iosync(); /* serialize GAL register access */
-       hipz_update_rqa(my_qp, wqe_cnt);
-       if (unlikely(ret || ehca_debug_level >= 2))
-           ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-                    my_qp, my_qp->real_qp_num, wqe_cnt, ret);
-       spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
-
-out:
-       if (ret)
-               *bad_recv_wr = recv_wr;
-
-       return ret;
-}
-
-int ehca_post_recv(struct ib_qp *qp,
-                  struct ib_recv_wr *recv_wr,
-                  struct ib_recv_wr **bad_recv_wr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-
-       /* Reject WR if QP is in RESET state */
-       if (unlikely(my_qp->state == IB_QPS_RESET)) {
-               ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-                        my_qp->state, qp->qp_num);
-               *bad_recv_wr = recv_wr;
-               return -EINVAL;
-       }
-
-       return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
-}
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-                      struct ib_recv_wr *recv_wr,
-                      struct ib_recv_wr **bad_recv_wr)
-{
-       return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
-                                 srq->device, recv_wr, bad_recv_wr);
-}
-
-/*
- * ib_wc_opcode table converts ehca wc opcode to ib
- * Since we use zero to indicate invalid opcode, the actual ib opcode must
- * be decremented!!!
- */
-static const u8 ib_wc_opcode[255] = {
-       [0x01] = IB_WC_RECV+1,
-       [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
-       [0x04] = IB_WC_BIND_MW+1,
-       [0x08] = IB_WC_FETCH_ADD+1,
-       [0x10] = IB_WC_COMP_SWAP+1,
-       [0x20] = IB_WC_RDMA_WRITE+1,
-       [0x40] = IB_WC_RDMA_READ+1,
-       [0x80] = IB_WC_SEND+1
-};
-
-/* internal function to poll one entry of cq */
-static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
-{
-       int ret = 0, qmap_tail_idx;
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       struct ehca_cqe *cqe;
-       struct ehca_qp *my_qp;
-       struct ehca_qmap_entry *qmap_entry;
-       struct ehca_queue_map *qmap;
-       int cqe_count = 0, is_error;
-
-repoll:
-       cqe = (struct ehca_cqe *)
-               ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
-       if (!cqe) {
-               ret = -EAGAIN;
-               if (ehca_debug_level >= 3)
-                       ehca_dbg(cq->device, "Completion queue is empty  "
-                                "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
-               goto poll_cq_one_exit0;
-       }
-
-       /* prevents loads being reordered across this point */
-       rmb();
-
-       cqe_count++;
-       if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
-               struct ehca_qp *qp;
-               int purgeflag;
-               unsigned long flags;
-
-               qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
-               if (!qp) {
-                       ehca_err(cq->device, "cq_num=%x qp_num=%x "
-                                "could not find qp -> ignore cqe",
-                                my_cq->cq_number, cqe->local_qp_number);
-                       ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
-                                my_cq->cq_number, cqe->local_qp_number);
-                       /* ignore this purged cqe */
-                       goto repoll;
-               }
-               spin_lock_irqsave(&qp->spinlock_s, flags);
-               purgeflag = qp->sqerr_purgeflag;
-               spin_unlock_irqrestore(&qp->spinlock_s, flags);
-
-               if (purgeflag) {
-                       ehca_dbg(cq->device,
-                                "Got CQE with purged bit qp_num=%x src_qp=%x",
-                                cqe->local_qp_number, cqe->remote_qp_number);
-                       if (ehca_debug_level >= 2)
-                               ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
-                                        cqe->local_qp_number,
-                                        cqe->remote_qp_number);
-                       /*
-                        * ignore this to avoid double cqes of bad wqe
-                        * that caused sqe and turn off purge flag
-                        */
-                       qp->sqerr_purgeflag = 0;
-                       goto repoll;
-               }
-       }
-
-       is_error = cqe->status & WC_STATUS_ERROR_BIT;
-
-       /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
-       if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
-               ehca_dbg(cq->device,
-                        "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
-                        is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
-               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-                        my_cq, my_cq->cq_number);
-               ehca_dbg(cq->device,
-                        "ehca_cq=%p cq_num=%x -------------------------",
-                        my_cq, my_cq->cq_number);
-       }
-
-       read_lock(&ehca_qp_idr_lock);
-       my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
-       read_unlock(&ehca_qp_idr_lock);
-       if (!my_qp)
-               goto repoll;
-       wc->qp = &my_qp->ib_qp;
-
-       qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
-       if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
-               /* We got a send completion. */
-               qmap = &my_qp->sq_map;
-       else
-               /* We got a receive completion. */
-               qmap = &my_qp->rq_map;
-
-       /* advance the tail pointer */
-       qmap->tail = qmap_tail_idx;
-
-       if (is_error) {
-               /*
-                * set left_to_poll to 0 because in error state, we will not
-                * get any additional CQEs
-                */
-               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-                                                       my_qp->sq_map.entries);
-               my_qp->sq_map.left_to_poll = 0;
-               ehca_add_to_err_list(my_qp, 1);
-
-               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-                                                       my_qp->rq_map.entries);
-               my_qp->rq_map.left_to_poll = 0;
-               if (HAS_RQ(my_qp))
-                       ehca_add_to_err_list(my_qp, 0);
-       }
-
-       qmap_entry = &qmap->map[qmap_tail_idx];
-       if (qmap_entry->reported) {
-               ehca_warn(cq->device, "Double cqe on qp_num=%#x",
-                               my_qp->real_qp_num);
-               /* found a double cqe, discard it and read next one */
-               goto repoll;
-       }
-
-       wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
-       qmap_entry->reported = 1;
-
-       /* if left_to_poll is decremented to 0, add the QP to the error list */
-       if (qmap->left_to_poll > 0) {
-               qmap->left_to_poll--;
-               if ((my_qp->sq_map.left_to_poll == 0) &&
-                               (my_qp->rq_map.left_to_poll == 0)) {
-                       ehca_add_to_err_list(my_qp, 1);
-                       if (HAS_RQ(my_qp))
-                               ehca_add_to_err_list(my_qp, 0);
-               }
-       }
-
-       /* eval ib_wc_opcode */
-       wc->opcode = ib_wc_opcode[cqe->optype]-1;
-       if (unlikely(wc->opcode == -1)) {
-               ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
-                        "ehca_cq=%p cq_num=%x",
-                        cqe->optype, cqe->status, my_cq, my_cq->cq_number);
-               /* dump cqe for other infos */
-               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-                        my_cq, my_cq->cq_number);
-               /* update also queue adder to throw away this entry!!! */
-               goto repoll;
-       }
-
-       /* eval ib_wc_status */
-       if (unlikely(is_error)) {
-               /* complete with errors */
-               map_ib_wc_status(cqe->status, &wc->status);
-               wc->vendor_err = wc->status;
-       } else
-               wc->status = IB_WC_SUCCESS;
-
-       wc->byte_len = cqe->nr_bytes_transferred;
-       wc->pkey_index = cqe->pkey_index;
-       wc->slid = cqe->rlid;
-       wc->dlid_path_bits = cqe->dlid;
-       wc->src_qp = cqe->remote_qp_number;
-       /*
-        * HW has "Immed data present" and "GRH present" in bits 6 and 5.
-        * SW defines those in bits 1 and 0, so we can just shift and mask.
-        */
-       wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
-       wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
-       wc->sl = cqe->service_level;
-
-poll_cq_one_exit0:
-       if (cqe_count > 0)
-               hipz_update_feca(my_cq, cqe_count);
-
-       return ret;
-}
-
-static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
-                              struct ib_wc *wc, int num_entries,
-                              struct ipz_queue *ipz_queue, int on_sq)
-{
-       int nr = 0;
-       struct ehca_wqe *wqe;
-       u64 offset;
-       struct ehca_queue_map *qmap;
-       struct ehca_qmap_entry *qmap_entry;
-
-       if (on_sq)
-               qmap = &my_qp->sq_map;
-       else
-               qmap = &my_qp->rq_map;
-
-       qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-       while ((nr < num_entries) && (qmap_entry->reported == 0)) {
-               /* generate flush CQE */
-
-               memset(wc, 0, sizeof(*wc));
-
-               offset = qmap->next_wqe_idx * ipz_queue->qe_size;
-               wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
-               if (!wqe) {
-                       ehca_err(cq->device, "Invalid wqe offset=%#llx on "
-                                "qp_num=%#x", offset, my_qp->real_qp_num);
-                       return nr;
-               }
-
-               wc->wr_id = replace_wr_id(wqe->work_request_id,
-                                         qmap_entry->app_wr_id);
-
-               if (on_sq) {
-                       switch (wqe->optype) {
-                       case WQE_OPTYPE_SEND:
-                               wc->opcode = IB_WC_SEND;
-                               break;
-                       case WQE_OPTYPE_RDMAWRITE:
-                               wc->opcode = IB_WC_RDMA_WRITE;
-                               break;
-                       case WQE_OPTYPE_RDMAREAD:
-                               wc->opcode = IB_WC_RDMA_READ;
-                               break;
-                       default:
-                               ehca_err(cq->device, "Invalid optype=%x",
-                                               wqe->optype);
-                               return nr;
-                       }
-               } else
-                       wc->opcode = IB_WC_RECV;
-
-               if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
-                       wc->ex.imm_data = wqe->immediate_data;
-                       wc->wc_flags |= IB_WC_WITH_IMM;
-               }
-
-               wc->status = IB_WC_WR_FLUSH_ERR;
-
-               wc->qp = &my_qp->ib_qp;
-
-               /* mark as reported and advance next_wqe pointer */
-               qmap_entry->reported = 1;
-               qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
-                                               qmap->entries);
-               qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-               wc++; nr++;
-       }
-
-       return nr;
-
-}
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
-{
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int nr;
-       struct ehca_qp *err_qp;
-       struct ib_wc *current_wc = wc;
-       int ret = 0;
-       unsigned long flags;
-       int entries_left = num_entries;
-
-       if (num_entries < 1) {
-               ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
-                        "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
-               ret = -EINVAL;
-               goto poll_cq_exit0;
-       }
-
-       spin_lock_irqsave(&my_cq->spinlock, flags);
-
-       /* generate flush cqes for send queues */
-       list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
-               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-                               &err_qp->ipz_squeue, 1);
-               entries_left -= nr;
-               current_wc += nr;
-
-               if (entries_left == 0)
-                       break;
-       }
-
-       /* generate flush cqes for receive queues */
-       list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
-               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-                               &err_qp->ipz_rqueue, 0);
-               entries_left -= nr;
-               current_wc += nr;
-
-               if (entries_left == 0)
-                       break;
-       }
-
-       for (nr = 0; nr < entries_left; nr++) {
-               ret = ehca_poll_cq_one(cq, current_wc);
-               if (ret)
-                       break;
-               current_wc++;
-       } /* eof for nr */
-       entries_left -= nr;
-
-       spin_unlock_irqrestore(&my_cq->spinlock, flags);
-       if (ret == -EAGAIN  || !ret)
-               ret = num_entries - entries_left;
-
-poll_cq_exit0:
-       return ret;
-}
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
-{
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int ret = 0;
-
-       switch (notify_flags & IB_CQ_SOLICITED_MASK) {
-       case IB_CQ_SOLICITED:
-               hipz_set_cqx_n0(my_cq, 1);
-               break;
-       case IB_CQ_NEXT_COMP:
-               hipz_set_cqx_n1(my_cq, 1);
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-               unsigned long spl_flags;
-               spin_lock_irqsave(&my_cq->spinlock, spl_flags);
-               ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
-               spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
-       }
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c
deleted file mode 100644 (file)
index 376b031..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  SQP functions
- *
- *  Authors: Khadija Souissi <souissi@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <rdma/ib_mad.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define IB_MAD_STATUS_REDIRECT         cpu_to_be16(0x0002)
-#define IB_MAD_STATUS_UNSUP_VERSION    cpu_to_be16(0x0004)
-#define IB_MAD_STATUS_UNSUP_METHOD     cpu_to_be16(0x0008)
-
-#define IB_PMA_CLASS_PORT_INFO         cpu_to_be16(0x0001)
-
-/**
- * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
- * pair is created successfully, the corresponding port gets active.
- *
- * Define Special Queue pair 0 (SMI QP) is still not supported.
- *
- * @qp_init_attr: Queue pair init attributes with port and queue pair type
- */
-
-u64 ehca_define_sqp(struct ehca_shca *shca,
-                   struct ehca_qp *ehca_qp,
-                   struct ib_qp_init_attr *qp_init_attr)
-{
-       u32 pma_qp_nr, bma_qp_nr;
-       u64 ret;
-       u8 port = qp_init_attr->port_num;
-       int counter;
-
-       shca->sport[port - 1].port_state = IB_PORT_DOWN;
-
-       switch (qp_init_attr->qp_type) {
-       case IB_QPT_SMI:
-               /* function not supported yet */
-               break;
-       case IB_QPT_GSI:
-               ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
-                                        ehca_qp->ipz_qp_handle,
-                                        ehca_qp->galpas.kernel,
-                                        (u32) qp_init_attr->port_num,
-                                        &pma_qp_nr, &bma_qp_nr);
-
-               if (ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device,
-                                "Can't define AQP1 for port %x. h_ret=%lli",
-                                port, ret);
-                       return ret;
-               }
-               shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
-               ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
-                        port, pma_qp_nr);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "invalid qp_type=%x",
-                        qp_init_attr->qp_type);
-               return H_PARAMETER;
-       }
-
-       if (ehca_nr_ports < 0) /* autodetect mode */
-               return H_SUCCESS;
-
-       for (counter = 0;
-            shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
-                    counter < ehca_port_act_time;
-            counter++) {
-               ehca_dbg(&shca->ib_device, "... wait until port %x is active",
-                        port);
-               msleep_interruptible(1000);
-       }
-
-       if (counter == ehca_port_act_time) {
-               ehca_err(&shca->ib_device, "Port %x is not active.", port);
-               return H_HARDWARE;
-       }
-
-       return H_SUCCESS;
-}
-
-struct ib_perf {
-       struct ib_mad_hdr mad_hdr;
-       u8 reserved[40];
-       u8 data[192];
-} __attribute__ ((packed));
-
-/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
-struct tcslfl {
-       u32 tc:8;
-       u32 sl:4;
-       u32 fl:20;
-} __attribute__ ((packed));
-
-/* IP Version/TC/FL packed into 32 bits, as in GRH */
-struct vertcfl {
-       u32 ver:4;
-       u32 tc:8;
-       u32 fl:20;
-} __attribute__ ((packed));
-
-static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
-                            const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                            const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
-       const struct ib_perf *in_perf = (const struct ib_perf *)in_mad;
-       struct ib_perf *out_perf = (struct ib_perf *)out_mad;
-       struct ib_class_port_info *poi =
-               (struct ib_class_port_info *)out_perf->data;
-       struct tcslfl *tcslfl =
-               (struct tcslfl *)&poi->redirect_tcslfl;
-       struct ehca_shca *shca =
-               container_of(ibdev, struct ehca_shca, ib_device);
-       struct ehca_sport *sport = &shca->sport[port_num - 1];
-
-       ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
-
-       *out_mad = *in_mad;
-
-       if (in_perf->mad_hdr.class_version != 1) {
-               ehca_warn(ibdev, "Unsupported class_version=%x",
-                         in_perf->mad_hdr.class_version);
-               out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
-               goto perf_reply;
-       }
-
-       switch (in_perf->mad_hdr.method) {
-       case IB_MGMT_METHOD_GET:
-       case IB_MGMT_METHOD_SET:
-               /* set class port info for redirection */
-               out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
-               out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
-               memset(poi, 0, sizeof(*poi));
-               poi->base_version = 1;
-               poi->class_version = 1;
-               poi->resp_time_value = 18;
-
-               /* copy local routing information from WC where applicable */
-               tcslfl->sl         = in_wc->sl;
-               poi->redirect_lid  =
-                       sport->saved_attr.lid | in_wc->dlid_path_bits;
-               poi->redirect_qp   = sport->pma_qp_nr;
-               poi->redirect_qkey = IB_QP1_QKEY;
-
-               ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
-                               &poi->redirect_pkey);
-
-               /* if request was globally routed, copy route info */
-               if (in_grh) {
-                       const struct vertcfl *vertcfl =
-                               (const struct vertcfl *)&in_grh->version_tclass_flow;
-                       memcpy(poi->redirect_gid, in_grh->dgid.raw,
-                              sizeof(poi->redirect_gid));
-                       tcslfl->tc        = vertcfl->tc;
-                       tcslfl->fl        = vertcfl->fl;
-               } else
-                       /* else only fill in default GID */
-                       ehca_query_gid(ibdev, port_num, 0,
-                                      (union ib_gid *)&poi->redirect_gid);
-
-               ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
-                        sport->saved_attr.lid, sport->pma_qp_nr);
-               break;
-
-       case IB_MGMT_METHOD_GET_RESP:
-               return IB_MAD_RESULT_FAILURE;
-
-       default:
-               out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
-               break;
-       }
-
-perf_reply:
-       out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
-
-       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                    const struct ib_mad_hdr *in, size_t in_mad_size,
-                    struct ib_mad_hdr *out, size_t *out_mad_size,
-                    u16 *out_mad_pkey_index)
-{
-       int ret;
-       const struct ib_mad *in_mad = (const struct ib_mad *)in;
-       struct ib_mad *out_mad = (struct ib_mad *)out;
-
-       if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-                        *out_mad_size != sizeof(*out_mad)))
-               return IB_MAD_RESULT_FAILURE;
-
-       if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
-               return IB_MAD_RESULT_FAILURE;
-
-       /* accept only pma request */
-       if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
-               return IB_MAD_RESULT_SUCCESS;
-
-       ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
-       ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
-                               in_mad, out_mad);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h
deleted file mode 100644 (file)
index d280b12..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  auxiliary functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef EHCA_TOOLS_H
-#define EHCA_TOOLS_H
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/idr.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/vmalloc.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/device.h>
-
-#include <linux/atomic.h>
-#include <asm/ibmebus.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/hvcall.h>
-
-extern int ehca_debug_level;
-
-#define ehca_dbg(ib_dev, format, arg...) \
-       do { \
-               if (unlikely(ehca_debug_level)) \
-                       dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
-                                  "PU%04x EHCA_DBG:%s " format "\n", \
-                                  raw_smp_processor_id(), __func__, \
-                                  ## arg); \
-       } while (0)
-
-#define ehca_info(ib_dev, format, arg...) \
-       dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
-                raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_warn(ib_dev, format, arg...) \
-       dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
-                raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_err(ib_dev, format, arg...) \
-       dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
-               raw_smp_processor_id(), __func__, ## arg)
-
-/* use this one only if no ib_dev available */
-#define ehca_gen_dbg(format, arg...) \
-       do { \
-               if (unlikely(ehca_debug_level)) \
-                       printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
-                              raw_smp_processor_id(), __func__, ## arg); \
-       } while (0)
-
-#define ehca_gen_warn(format, arg...) \
-       printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
-              raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_gen_err(format, arg...) \
-       printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
-              raw_smp_processor_id(), __func__, ## arg)
-
-/**
- * ehca_dmp - printk a memory block, whose length is n*8 bytes.
- * Each line has the following layout:
- * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
- */
-#define ehca_dmp(adr, len, format, args...) \
-       do { \
-               unsigned int x; \
-               unsigned int l = (unsigned int)(len); \
-               unsigned char *deb = (unsigned char *)(adr); \
-               for (x = 0; x < l; x += 16) { \
-                       printk(KERN_INFO "EHCA_DMP:%s " format \
-                              " adr=%p ofs=%04x %016llx %016llx\n", \
-                              __func__, ##args, deb, x, \
-                              *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
-                       deb += 16; \
-               } \
-       } while (0)
-
-/* define a bitmask, little endian version */
-#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
-
-/* define a bitmask, the ibm way... */
-#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
-
-/* internal function, don't use */
-#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
-
-/* internal function, don't use */
-#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
-
-/**
- * EHCA_BMASK_SET - return value shifted and masked by mask
- * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
- * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
- * in variable
- */
-#define EHCA_BMASK_SET(mask, value) \
-       ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
-
-/**
- * EHCA_BMASK_GET - extract a parameter from value by mask
- */
-#define EHCA_BMASK_GET(mask, value) \
-       (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
-
-/* Converts ehca to ib return code */
-int ehca2ib_return_code(u64 ehca_rc);
-
-#endif /* EHCA_TOOLS_H */
diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c
deleted file mode 100644 (file)
index 1a1d5d9..0000000
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  userspace support verbs
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-                                       struct ib_udata *udata)
-{
-       struct ehca_ucontext *my_context;
-
-       my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
-       if (!my_context) {
-               ehca_err(device, "Out of memory device=%p", device);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       return &my_context->ib_ucontext;
-}
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context)
-{
-       kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
-       return 0;
-}
-
-static void ehca_mm_open(struct vm_area_struct *vma)
-{
-       u32 *count = (u32 *)vma->vm_private_data;
-       if (!count) {
-               ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-               return;
-       }
-       (*count)++;
-       if (!(*count))
-               ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-       ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-                    vma->vm_start, vma->vm_end, *count);
-}
-
-static void ehca_mm_close(struct vm_area_struct *vma)
-{
-       u32 *count = (u32 *)vma->vm_private_data;
-       if (!count) {
-               ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-               return;
-       }
-       (*count)--;
-       ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-                    vma->vm_start, vma->vm_end, *count);
-}
-
-static const struct vm_operations_struct vm_ops = {
-       .open = ehca_mm_open,
-       .close = ehca_mm_close,
-};
-
-static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
-                       u32 *mm_count)
-{
-       int ret;
-       u64 vsize, physical;
-
-       vsize = vma->vm_end - vma->vm_start;
-       if (vsize < EHCA_PAGESIZE) {
-               ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
-               return -EINVAL;
-       }
-
-       physical = galpas->user.fw_handle;
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-       ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
-       /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
-       ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
-                          vma->vm_page_prot);
-       if (unlikely(ret)) {
-               ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
-               return -ENOMEM;
-       }
-
-       vma->vm_private_data = mm_count;
-       (*mm_count)++;
-       vma->vm_ops = &vm_ops;
-
-       return 0;
-}
-
-static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
-                          u32 *mm_count)
-{
-       int ret;
-       u64 start, ofs;
-       struct page *page;
-
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       start = vma->vm_start;
-       for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
-               u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
-               page = virt_to_page(virt_addr);
-               ret = vm_insert_page(vma, start, page);
-               if (unlikely(ret)) {
-                       ehca_gen_err("vm_insert_page() failed rc=%i", ret);
-                       return ret;
-               }
-               start += PAGE_SIZE;
-       }
-       vma->vm_private_data = mm_count;
-       (*mm_count)++;
-       vma->vm_ops = &vm_ops;
-
-       return 0;
-}
-
-static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
-                       u32 rsrc_type)
-{
-       int ret;
-
-       switch (rsrc_type) {
-       case 0: /* galpa fw handle */
-               ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
-               ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_fw() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       case 1: /* cq queue_addr */
-               ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
-               ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_queue() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
-                        rsrc_type, cq->cq_number);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
-                       u32 rsrc_type)
-{
-       int ret;
-
-       switch (rsrc_type) {
-       case 0: /* galpa fw handle */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
-               ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "remap_pfn_range() failed ret=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return -ENOMEM;
-               }
-               break;
-
-       case 1: /* qp rqueue_addr */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
-               ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
-                                     &qp->mm_count_rqueue);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       case 2: /* qp squeue_addr */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
-               ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
-                                     &qp->mm_count_squeue);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
-                        rsrc_type, qp->ib_qp.qp_num);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       u64 fileoffset = vma->vm_pgoff;
-       u32 idr_handle = fileoffset & 0x1FFFFFF;
-       u32 q_type = (fileoffset >> 27) & 0x1;    /* CQ, QP,...        */
-       u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
-       u32 ret;
-       struct ehca_cq *cq;
-       struct ehca_qp *qp;
-       struct ib_uobject *uobject;
-
-       switch (q_type) {
-       case  0: /* CQ */
-               read_lock(&ehca_cq_idr_lock);
-               cq = idr_find(&ehca_cq_idr, idr_handle);
-               read_unlock(&ehca_cq_idr_lock);
-
-               /* make sure this mmap really belongs to the authorized user */
-               if (!cq)
-                       return -EINVAL;
-
-               if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
-                       return -EINVAL;
-
-               ret = ehca_mmap_cq(vma, cq, rsrc_type);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_cq() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       case 1: /* QP */
-               read_lock(&ehca_qp_idr_lock);
-               qp = idr_find(&ehca_qp_idr, idr_handle);
-               read_unlock(&ehca_qp_idr_lock);
-
-               /* make sure this mmap really belongs to the authorized user */
-               if (!qp)
-                       return -EINVAL;
-
-               uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
-               if (!uobject || uobject->context != context)
-                       return -EINVAL;
-
-               ret = ehca_mmap_qp(vma, qp, rsrc_type);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_qp() failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_gen_err("bad queue type %x", q_type);
-               return -EINVAL;
-       }
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c
deleted file mode 100644 (file)
index 89517ff..0000000
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <asm/hvcall.h>
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hcp_phyp.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
-#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
-#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
-#define H_ALL_RES_QP_STORAGE            EHCA_BMASK_IBM(16, 17)
-#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
-#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
-#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
-#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
-#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
-#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
-#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
-
-#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
-#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
-#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
-
-#define H_ALL_RES_QP_UD_AV_LKEY         EHCA_BMASK_IBM(32, 63)
-#define H_ALL_RES_QP_SRQ_QP_TOKEN       EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_SRQ_QP_HANDLE      EHCA_BMASK_IBM(0, 64)
-#define H_ALL_RES_QP_SRQ_LIMIT          EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_SRQ_QPN            EHCA_BMASK_IBM(40, 63)
-
-#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
-#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
-
-#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
-
-#define H_MP_INIT_TYPE                  EHCA_BMASK_IBM(44, 47)
-#define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
-#define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
-
-#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
-#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
-#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
-
-static DEFINE_SPINLOCK(hcall_lock);
-
-static long ehca_plpar_hcall_norets(unsigned long opcode,
-                                   unsigned long arg1,
-                                   unsigned long arg2,
-                                   unsigned long arg3,
-                                   unsigned long arg4,
-                                   unsigned long arg5,
-                                   unsigned long arg6,
-                                   unsigned long arg7)
-{
-       long ret;
-       int i, sleep_msecs;
-       unsigned long flags = 0;
-
-       if (unlikely(ehca_debug_level >= 2))
-               ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
-                            opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
-
-       for (i = 0; i < 5; i++) {
-               /* serialize hCalls to work around firmware issue */
-               if (ehca_lock_hcalls)
-                       spin_lock_irqsave(&hcall_lock, flags);
-
-               ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
-                                        arg5, arg6, arg7);
-
-               if (ehca_lock_hcalls)
-                       spin_unlock_irqrestore(&hcall_lock, flags);
-
-               if (H_IS_LONG_BUSY(ret)) {
-                       sleep_msecs = get_longbusy_msecs(ret);
-                       msleep_interruptible(sleep_msecs);
-                       continue;
-               }
-
-               if (ret < H_SUCCESS)
-                       ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
-                                    opcode, ret, arg1, arg2, arg3,
-                                    arg4, arg5, arg6, arg7);
-               else
-                       if (unlikely(ehca_debug_level >= 2))
-                               ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
-
-               return ret;
-       }
-
-       return H_BUSY;
-}
-
-static long ehca_plpar_hcall9(unsigned long opcode,
-                             unsigned long *outs, /* array of 9 outputs */
-                             unsigned long arg1,
-                             unsigned long arg2,
-                             unsigned long arg3,
-                             unsigned long arg4,
-                             unsigned long arg5,
-                             unsigned long arg6,
-                             unsigned long arg7,
-                             unsigned long arg8,
-                             unsigned long arg9)
-{
-       long ret;
-       int i, sleep_msecs;
-       unsigned long flags = 0;
-
-       if (unlikely(ehca_debug_level >= 2))
-               ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
-                            arg1, arg2, arg3, arg4, arg5,
-                            arg6, arg7, arg8, arg9);
-
-       for (i = 0; i < 5; i++) {
-               /* serialize hCalls to work around firmware issue */
-               if (ehca_lock_hcalls)
-                       spin_lock_irqsave(&hcall_lock, flags);
-
-               ret = plpar_hcall9(opcode, outs,
-                                  arg1, arg2, arg3, arg4, arg5,
-                                  arg6, arg7, arg8, arg9);
-
-               if (ehca_lock_hcalls)
-                       spin_unlock_irqrestore(&hcall_lock, flags);
-
-               if (H_IS_LONG_BUSY(ret)) {
-                       sleep_msecs = get_longbusy_msecs(ret);
-                       msleep_interruptible(sleep_msecs);
-                       continue;
-               }
-
-               if (ret < H_SUCCESS) {
-                       ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
-                                    opcode, arg1, arg2, arg3, arg4, arg5,
-                                    arg6, arg7, arg8, arg9);
-                       ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-                                    ret, outs[0], outs[1], outs[2], outs[3],
-                                    outs[4], outs[5], outs[6], outs[7],
-                                    outs[8]);
-               } else if (unlikely(ehca_debug_level >= 2))
-                       ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-                                    ret, outs[0], outs[1], outs[2], outs[3],
-                                    outs[4], outs[5], outs[6], outs[7],
-                                    outs[8]);
-               return ret;
-       }
-
-       return H_BUSY;
-}
-
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u32 neq_control,
-                            const u32 number_of_entries,
-                            struct ipz_eq_handle *eq_handle,
-                            u32 *act_nr_of_entries,
-                            u32 *act_pages,
-                            u32 *eq_ist)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-       u64 allocate_controls;
-
-       /* resource type */
-       allocate_controls = 3ULL;
-
-       /* ISN is associated */
-       if (neq_control != 1)
-               allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
-       else /* notification event queue */
-               allocate_controls = (1ULL << 63) | allocate_controls;
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,  /* r4 */
-                               allocate_controls,      /* r5 */
-                               number_of_entries,      /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       eq_handle->handle = outs[0];
-       *act_nr_of_entries = (u32)outs[3];
-       *act_pages = (u32)outs[4];
-       *eq_ist = (u32)outs[5];
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resource - ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-                      struct ipz_eq_handle eq_handle,
-                      const u64 event_mask)
-{
-       return ehca_plpar_hcall_norets(H_RESET_EVENTS,
-                                      adapter_handle.handle, /* r4 */
-                                      eq_handle.handle,      /* r5 */
-                                      event_mask,            /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_cq *cq,
-                            struct ehca_alloc_cq_parms *param)
-{
-       int rc;
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,   /* r4  */
-                               2,                       /* r5  */
-                               param->eq_handle.handle, /* r6  */
-                               cq->token,               /* r7  */
-                               param->nr_cqe,           /* r8  */
-                               0, 0, 0, 0);
-       cq->ipz_cq_handle.handle = outs[0];
-       param->act_nr_of_entries = (u32)outs[3];
-       param->act_pages = (u32)outs[4];
-
-       if (ret == H_SUCCESS) {
-               rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
-               if (rc) {
-                       ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-                                    rc, outs[5]);
-
-                       ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                               adapter_handle.handle,     /* r4 */
-                                               cq->ipz_cq_handle.handle,  /* r5 */
-                                               0, 0, 0, 0, 0);
-                       ret = H_NO_MEM;
-               }
-       }
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_alloc_qp_parms *parms, int is_user)
-{
-       int rc;
-       u64 ret;
-       u64 allocate_controls, max_r10_reg, r11, r12;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       allocate_controls =
-               EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
-                                parms->squeue.page_size)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
-                                parms->rqueue.page_size)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
-                                !!(parms->ll_comp_flags & LLQP_RECV_COMP))
-               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
-                                !!(parms->ll_comp_flags & LLQP_SEND_COMP))
-               | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
-                                parms->ud_av_l_key_ctl)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
-
-       max_r10_reg =
-               EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
-                              parms->squeue.max_wr + 1)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
-                                parms->rqueue.max_wr + 1)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
-                                parms->squeue.max_sge)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
-                                parms->rqueue.max_sge);
-
-       r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
-
-       if (parms->ext_type == EQPT_SRQ)
-               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
-       else
-               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,             /* r4  */
-                               allocate_controls,                 /* r5  */
-                               parms->send_cq_handle.handle,
-                               parms->recv_cq_handle.handle,
-                               parms->eq_handle.handle,
-                               ((u64)parms->token << 32) | parms->pd.value,
-                               max_r10_reg, r11, r12);
-
-       parms->qp_handle.handle = outs[0];
-       parms->real_qp_num = (u32)outs[1];
-       parms->squeue.act_nr_wqes =
-               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
-       parms->rqueue.act_nr_wqes =
-               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
-       parms->squeue.act_nr_sges =
-               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
-       parms->rqueue.act_nr_sges =
-               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
-       parms->squeue.queue_size =
-               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
-       parms->rqueue.queue_size =
-               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
-
-       if (ret == H_SUCCESS) {
-               rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
-               if (rc) {
-                       ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-                                    rc, outs[6]);
-
-                       ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                               adapter_handle.handle,     /* r4 */
-                                               parms->qp_handle.handle,  /* r5 */
-                                               0, 0, 0, 0, 0);
-                       ret = H_NO_MEM;
-               }
-       }
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-                     const u8 port_id,
-                     struct hipz_query_port *query_port_response_block)
-{
-       u64 ret;
-       u64 r_cb = __pa(query_port_response_block);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("response block not page aligned");
-               return H_PARAMETER;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
-                                     adapter_handle.handle, /* r4 */
-                                     port_id,               /* r5 */
-                                     r_cb,                  /* r6 */
-                                     0, 0, 0, 0);
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(query_port_response_block, 64, "response_block");
-
-       return ret;
-}
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-                      const u8 port_id, const u32 port_cap,
-                      const u8 init_type, const int modify_mask)
-{
-       u64 port_attributes = port_cap;
-
-       if (modify_mask & IB_PORT_SHUTDOWN)
-               port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
-       if (modify_mask & IB_PORT_INIT_TYPE)
-               port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
-       if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
-               port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
-
-       return ehca_plpar_hcall_norets(H_MODIFY_PORT,
-                                      adapter_handle.handle, /* r4 */
-                                      port_id,               /* r5 */
-                                      port_attributes,       /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-                    struct hipz_query_hca *query_hca_rblock)
-{
-       u64 r_cb = __pa(query_hca_rblock);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("response_block=%p not page aligned",
-                            query_hca_rblock);
-               return H_PARAMETER;
-       }
-
-       return ehca_plpar_hcall_norets(H_QUERY_HCA,
-                                      adapter_handle.handle, /* r4 */
-                                      r_cb,                  /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-                         const u8 pagesize,
-                         const u8 queue_type,
-                         const u64 resource_handle,
-                         const u64 logical_address_of_page,
-                         u64 count)
-{
-       return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
-                                      adapter_handle.handle,      /* r4  */
-                                      (u64)queue_type | ((u64)pagesize) << 8,
-                                      /* r5  */
-                                      resource_handle,            /* r6  */
-                                      logical_address_of_page,    /* r7  */
-                                      count,                      /* r8  */
-                                      0, 0);
-}
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_eq_handle eq_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count)
-{
-       if (count != 1) {
-               ehca_gen_err("Ppage counter=%llx", count);
-               return H_PARAMETER;
-       }
-       return hipz_h_register_rpage(adapter_handle,
-                                    pagesize,
-                                    queue_type,
-                                    eq_handle.handle,
-                                    logical_address_of_page, count);
-}
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
-                          u32 ist)
-{
-       u64 ret;
-       ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
-                                     adapter_handle.handle, /* r4 */
-                                     ist,                   /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret != H_SUCCESS && ret != H_BUSY)
-               ehca_gen_err("Could not query interrupt state.");
-
-       return ret;
-}
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_cq_handle cq_handle,
-                            struct ehca_pfcq *pfcq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa gal)
-{
-       if (count != 1) {
-               ehca_gen_err("Page counter=%llx", count);
-               return H_PARAMETER;
-       }
-
-       return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-                                    cq_handle.handle, logical_address_of_page,
-                                    count);
-}
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_qp_handle qp_handle,
-                            struct ehca_pfqp *pfqp,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa galpa)
-{
-       if (count > 1) {
-               ehca_gen_err("Page counter=%llx", count);
-               return H_PARAMETER;
-       }
-
-       return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-                                    qp_handle.handle, logical_address_of_page,
-                                    count);
-}
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-                              const struct ipz_qp_handle qp_handle,
-                              struct ehca_pfqp *pfqp,
-                              void **log_addr_next_sq_wqe2processed,
-                              void **log_addr_next_rq_wqe2processed,
-                              int dis_and_get_function_code)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-                               adapter_handle.handle,     /* r4 */
-                               dis_and_get_function_code, /* r5 */
-                               qp_handle.handle,          /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       if (log_addr_next_sq_wqe2processed)
-               *log_addr_next_sq_wqe2processed = (void *)outs[0];
-       if (log_addr_next_rq_wqe2processed)
-               *log_addr_next_rq_wqe2processed = (void *)outs[1];
-
-       return ret;
-}
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-                    const struct ipz_qp_handle qp_handle,
-                    struct ehca_pfqp *pfqp,
-                    const u64 update_mask,
-                    struct hcp_modify_qp_control_block *mqpcb,
-                    struct h_galpa gal)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-       ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
-                               adapter_handle.handle, /* r4 */
-                               qp_handle.handle,      /* r5 */
-                               update_mask,           /* r6 */
-                               __pa(mqpcb),           /* r7 */
-                               0, 0, 0, 0, 0);
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Insufficient resources ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-                   const struct ipz_qp_handle qp_handle,
-                   struct ehca_pfqp *pfqp,
-                   struct hcp_modify_qp_control_block *qqpcb,
-                   struct h_galpa gal)
-{
-       return ehca_plpar_hcall_norets(H_QUERY_QP,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      __pa(qqpcb),           /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_qp *qp)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = hcp_galpas_dtor(&qp->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct qp->galpas");
-               return H_RESOURCE;
-       }
-       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-                               adapter_handle.handle,     /* r4 */
-                               /* function code */
-                               1,                         /* r5 */
-                               qp->ipz_qp_handle.handle,  /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       if (ret == H_HARDWARE)
-               ehca_gen_err("HCA not operational. ret=%lli", ret);
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     qp->ipz_qp_handle.handle,  /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("Resource still in use. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port)
-{
-       return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      port,                  /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port, u32 * pma_qp_nr,
-                      u32 * bma_qp_nr)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
-                               adapter_handle.handle, /* r4 */
-                               qp_handle.handle,      /* r5 */
-                               port,                  /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       *pma_qp_nr = (u32)outs[0];
-       *bma_qp_nr = (u32)outs[1];
-
-       if (ret == H_ALIAS_EXIST)
-               ehca_gen_err("AQP1 already exists. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id)
-{
-       u64 ret;
-
-       ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
-                                     adapter_handle.handle,  /* r4 */
-                                     qp_handle.handle,       /* r5 */
-                                     mcg_dlid,               /* r6 */
-                                     interface_id,           /* r7 */
-                                     subnet_prefix,          /* r8 */
-                                     0, 0);
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id)
-{
-       return ehca_plpar_hcall_norets(H_DETACH_MCQP,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      mcg_dlid,              /* r6 */
-                                      interface_id,          /* r7 */
-                                      subnet_prefix,         /* r8 */
-                                      0, 0);
-}
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_cq *cq,
-                     u8 force_flag)
-{
-       u64 ret;
-
-       ret = hcp_galpas_dtor(&cq->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct cp->galpas");
-               return H_RESOURCE;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     cq->ipz_cq_handle.handle,  /* r5 */
-                                     force_flag != 0 ? 1L : 0L, /* r6 */
-                                     0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_eq *eq)
-{
-       u64 ret;
-
-       ret = hcp_galpas_dtor(&eq->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct eq->galpas");
-               return H_RESOURCE;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     eq->ipz_eq_handle.handle,  /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("Resource in use. ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u64 vaddr,
-                            const u64 length,
-                            const u32 access_ctrl,
-                            const struct ipz_pd pd,
-                            struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,            /* r4 */
-                               5,                                /* r5 */
-                               vaddr,                            /* r6 */
-                               length,                           /* r7 */
-                               (((u64)access_ctrl) << 32ULL),    /* r8 */
-                               pd.value,                         /* r9 */
-                               0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count)
-{
-       u64 ret;
-
-       if (unlikely(ehca_debug_level >= 3)) {
-               if (count > 1) {
-                       u64 *kpage;
-                       int i;
-                       kpage = __va(logical_address_of_page);
-                       for (i = 0; i < count; i++)
-                               ehca_gen_dbg("kpage[%d]=%p",
-                                            i, (void *)kpage[i]);
-               } else
-                       ehca_gen_dbg("kpage=%p",
-                                    (void *)logical_address_of_page);
-       }
-
-       if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
-               ehca_gen_err("logical_address_of_page not on a 4k boundary "
-                            "adapter_handle=%llx mr=%p mr_handle=%llx "
-                            "pagesize=%x queue_type=%x "
-                            "logical_address_of_page=%llx count=%llx",
-                            adapter_handle.handle, mr,
-                            mr->ipz_mr_handle.handle, pagesize, queue_type,
-                            logical_address_of_page, count);
-               ret = H_PARAMETER;
-       } else
-               ret = hipz_h_register_rpage(adapter_handle, pagesize,
-                                           queue_type,
-                                           mr->ipz_mr_handle.handle,
-                                           logical_address_of_page, count);
-       return ret;
-}
-
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mr *mr,
-                   struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
-                               adapter_handle.handle,     /* r4 */
-                               mr->ipz_mr_handle.handle,  /* r5 */
-                               0, 0, 0, 0, 0, 0, 0);
-       outparms->len = outs[0];
-       outparms->vaddr = outs[1];
-       outparms->acl  = outs[4] >> 32;
-       outparms->lkey = (u32)(outs[5] >> 32);
-       outparms->rkey = (u32)(outs[5] & (0xffffffff));
-
-       return ret;
-}
-
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mr *mr)
-{
-       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                      adapter_handle.handle,    /* r4 */
-                                      mr->ipz_mr_handle.handle, /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-                         const struct ehca_mr *mr,
-                         const u64 vaddr_in,
-                         const u64 length,
-                         const u32 access_ctrl,
-                         const struct ipz_pd pd,
-                         const u64 mr_addr_cb,
-                         struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
-                               adapter_handle.handle,    /* r4 */
-                               mr->ipz_mr_handle.handle, /* r5 */
-                               vaddr_in,                 /* r6 */
-                               length,                   /* r7 */
-                               /* r8 */
-                               ((((u64)access_ctrl) << 32ULL) | pd.value),
-                               mr_addr_cb,               /* r9 */
-                               0, 0, 0);
-       outparms->vaddr = outs[1];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-                       const struct ehca_mr *mr,
-                       const struct ehca_mr *orig_mr,
-                       const u64 vaddr_in,
-                       const u32 access_ctrl,
-                       const struct ipz_pd pd,
-                       struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
-                               adapter_handle.handle,            /* r4 */
-                               orig_mr->ipz_mr_handle.handle,    /* r5 */
-                               vaddr_in,                         /* r6 */
-                               (((u64)access_ctrl) << 32ULL),    /* r7 */
-                               pd.value,                         /* r8 */
-                               0, 0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mw *mw,
-                            const struct ipz_pd pd,
-                            struct ehca_mw_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,      /* r4 */
-                               6,                          /* r5 */
-                               pd.value,                   /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mw *mw,
-                   struct ehca_mw_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
-                               adapter_handle.handle,    /* r4 */
-                               mw->ipz_mw_handle.handle, /* r5 */
-                               0, 0, 0, 0, 0, 0, 0);
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mw *mw)
-{
-       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                      adapter_handle.handle,    /* r4 */
-                                      mw->ipz_mw_handle.handle, /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-                     const u64 ressource_handle,
-                     void *rblock,
-                     unsigned long *byte_count)
-{
-       u64 r_cb = __pa(rblock);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("rblock not page aligned.");
-               return H_PARAMETER;
-       }
-
-       return ehca_plpar_hcall_norets(H_ERROR_DATA,
-                                      adapter_handle.handle,
-                                      ressource_handle,
-                                      r_cb,
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_eoi(int irq)
-{
-       unsigned long xirr;
-
-       iosync();
-       xirr = (0xffULL << 24) | irq;
-
-       return plpar_hcall_norets(H_EOI, xirr);
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h
deleted file mode 100644 (file)
index a46e514..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_IF_H__
-#define __HCP_IF_H__
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "hipz_hw.h"
-
-/*
- * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize
- * resources, create the empty EQPT (ring).
- */
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u32 neq_control,
-                            const u32 number_of_entries,
-                            struct ipz_eq_handle *eq_handle,
-                            u32 * act_nr_of_entries,
-                            u32 * act_pages,
-                            u32 * eq_ist);
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-                      struct ipz_eq_handle eq_handle,
-                      const u64 event_mask);
-/*
- * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
- * resources, create the empty CQPT (ring).
- */
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_cq *cq,
-                            struct ehca_alloc_cq_parms *param);
-
-
-/*
- * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
- * initialize resources, create empty QPPTs (2 rings).
- */
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_alloc_qp_parms *parms, int is_user);
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-                     const u8 port_id,
-                     struct hipz_query_port *query_port_response_block);
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-                      const u8 port_id, const u32 port_cap,
-                      const u8 init_type, const int modify_mask);
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-                    struct hipz_query_hca *query_hca_rblock);
-
-/*
- * hipz_h_register_rpage internal function in hcp_if.h for all
- * hcp_H_REGISTER_RPAGE calls.
- */
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-                         const u8 pagesize,
-                         const u8 queue_type,
-                         const u64 resource_handle,
-                         const u64 logical_address_of_page,
-                         u64 count);
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_eq_handle eq_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count);
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle
-                          hcp_adapter_handle,
-                          u32 ist);
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_cq_handle cq_handle,
-                            struct ehca_pfcq *pfcq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa gal);
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_qp_handle qp_handle,
-                            struct ehca_pfqp *pfqp,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa galpa);
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-                              const struct ipz_qp_handle qp_handle,
-                              struct ehca_pfqp *pfqp,
-                              void **log_addr_next_sq_wqe_tb_processed,
-                              void **log_addr_next_rq_wqe_tb_processed,
-                              int dis_and_get_function_code);
-enum hcall_sigt {
-       HCALL_SIGT_NO_CQE = 0,
-       HCALL_SIGT_BY_WQE = 1,
-       HCALL_SIGT_EVERY = 2
-};
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-                    const struct ipz_qp_handle qp_handle,
-                    struct ehca_pfqp *pfqp,
-                    const u64 update_mask,
-                    struct hcp_modify_qp_control_block *mqpcb,
-                    struct h_galpa gal);
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-                   const struct ipz_qp_handle qp_handle,
-                   struct ehca_pfqp *pfqp,
-                   struct hcp_modify_qp_control_block *qqpcb,
-                   struct h_galpa gal);
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_qp *qp);
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port);
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port, u32 * pma_qp_nr,
-                      u32 * bma_qp_nr);
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_cq *cq,
-                     u8 force_flag);
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_eq *eq);
-
-/*
- * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u64 vaddr,
-                            const u64 length,
-                            const u32 access_ctrl,
-                            const struct ipz_pd pd,
-                            struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count);
-
-/* hipz_h_query_mr queries MR in HW and FW */
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mr *mr,
-                   struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mr frees MR resources in HW and FW */
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mr *mr);
-
-/* hipz_h_reregister_pmr reregisters MR in HW and FW */
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-                         const struct ehca_mr *mr,
-                         const u64 vaddr_in,
-                         const u64 length,
-                         const u32 access_ctrl,
-                         const struct ipz_pd pd,
-                         const u64 mr_addr_cb,
-                         struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_smr register shared MR in HW and FW */
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-                       const struct ehca_mr *mr,
-                       const struct ehca_mr *orig_mr,
-                       const u64 vaddr_in,
-                       const u32 access_ctrl,
-                       const struct ipz_pd pd,
-                       struct ehca_mr_hipzout_parms *outparms);
-
-/*
- * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mw *mw,
-                            const struct ipz_pd pd,
-                            struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_query_mw queries MW in HW and FW */
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mw *mw,
-                   struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mw frees MW resources in HW and FW */
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mw *mw);
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-                     const u64 ressource_handle,
-                     void *rblock,
-                     unsigned long *byte_count);
-u64 hipz_h_eoi(int irq);
-
-#endif /* __HCP_IF_H__ */
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c
deleted file mode 100644 (file)
index 077376f..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *   load store abstraction for ehca register access with tracing
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-u64 hcall_map_page(u64 physaddr)
-{
-       return (u64)ioremap(physaddr, EHCA_PAGESIZE);
-}
-
-int hcall_unmap_page(u64 mapaddr)
-{
-       iounmap((volatile void __iomem *) mapaddr);
-       return 0;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-                   u64 paddr_kernel, u64 paddr_user)
-{
-       if (!is_user) {
-               galpas->kernel.fw_handle = hcall_map_page(paddr_kernel);
-               if (!galpas->kernel.fw_handle)
-                       return -ENOMEM;
-       } else
-               galpas->kernel.fw_handle = 0;
-
-       galpas->user.fw_handle = paddr_user;
-
-       return 0;
-}
-
-int hcp_galpas_dtor(struct h_galpas *galpas)
-{
-       if (galpas->kernel.fw_handle) {
-               int ret = hcall_unmap_page(galpas->kernel.fw_handle);
-               if (ret)
-                       return ret;
-       }
-
-       galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h
deleted file mode 100644 (file)
index d1b0299..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware calls
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_PHYP_H__
-#define __HCP_PHYP_H__
-
-
-/*
- * eHCA page (mapped into memory)
- * resource to access eHCA register pages in CPU address space
-*/
-struct h_galpa {
-       u64 fw_handle;
-       /* for pSeries this is a 64bit memory address where
-          I/O memory is mapped into CPU address space (kv) */
-};
-
-/*
- * resource to access eHCA address space registers, all types
- */
-struct h_galpas {
-       u32 pid;                /*PID of userspace galpa checking */
-       struct h_galpa user;    /* user space accessible resource,
-                                  set to 0 if unused */
-       struct h_galpa kernel;  /* kernel space accessible resource,
-                                  set to 0 if unused */
-};
-
-static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
-{
-       u64 addr = galpa.fw_handle + offset;
-       return *(volatile u64 __force *)addr;
-}
-
-static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
-{
-       u64 addr = galpa.fw_handle + offset;
-       *(volatile u64 __force *)addr = value;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-                   u64 paddr_kernel, u64 paddr_user);
-
-int hcp_galpas_dtor(struct h_galpas *galpas);
-
-u64 hcall_map_page(u64 physaddr);
-
-int hcall_unmap_page(u64 mapaddr);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h
deleted file mode 100644 (file)
index 9dac93d..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_H__
-#define __HIPZ_FNS_H__
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-#include "hipz_fns_core.h"
-
-#define hipz_galpa_store_eq(gal, offset, value) \
-       hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_eq(gal, offset) \
-       hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qped(gal, offset, value) \
-       hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_qped(gal, offset) \
-       hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
-
-#define hipz_galpa_store_mrmw(gal, offset, value) \
-       hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_mrmw(gal, offset) \
-       hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h
deleted file mode 100644 (file)
index 868735f..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_CORE_H__
-#define __HIPZ_FNS_CORE_H__
-
-#include "hcp_phyp.h"
-#include "hipz_hw.h"
-
-#define hipz_galpa_store_cq(gal, offset, value) \
-       hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_cq(gal, offset) \
-       hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qp(gal, offset, value) \
-       hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
-#define hipz_galpa_load_qp(gal, offset) \
-       hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
-
-static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-       /*  ringing doorbell :-) */
-       hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
-                           EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
-}
-
-static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-       /*  ringing doorbell :-) */
-       hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
-                           EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
-}
-
-static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
-{
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
-                           EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
-}
-
-static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
-{
-       u64 cqx_n0_reg;
-
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
-                           EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
-                                          value));
-       cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
-}
-
-static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
-{
-       u64 cqx_n1_reg;
-
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
-                           EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
-       cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
-}
-
-#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h
deleted file mode 100644 (file)
index bf996c7..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  eHCA register definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_HW_H__
-#define __HIPZ_HW_H__
-
-#include "ehca_tools.h"
-
-#define EHCA_MAX_MTU 4
-
-/* QP Table Entry Memory Map */
-struct hipz_qptemm {
-       u64 qpx_hcr;
-       u64 qpx_c;
-       u64 qpx_herr;
-       u64 qpx_aer;
-/* 0x20*/
-       u64 qpx_sqa;
-       u64 qpx_sqc;
-       u64 qpx_rqa;
-       u64 qpx_rqc;
-/* 0x40*/
-       u64 qpx_st;
-       u64 qpx_pmstate;
-       u64 qpx_pmfa;
-       u64 qpx_pkey;
-/* 0x60*/
-       u64 qpx_pkeya;
-       u64 qpx_pkeyb;
-       u64 qpx_pkeyc;
-       u64 qpx_pkeyd;
-/* 0x80*/
-       u64 qpx_qkey;
-       u64 qpx_dqp;
-       u64 qpx_dlidp;
-       u64 qpx_portp;
-/* 0xa0*/
-       u64 qpx_slidp;
-       u64 qpx_slidpp;
-       u64 qpx_dlida;
-       u64 qpx_porta;
-/* 0xc0*/
-       u64 qpx_slida;
-       u64 qpx_slidpa;
-       u64 qpx_slvl;
-       u64 qpx_ipd;
-/* 0xe0*/
-       u64 qpx_mtu;
-       u64 qpx_lato;
-       u64 qpx_rlimit;
-       u64 qpx_rnrlimit;
-/* 0x100*/
-       u64 qpx_t;
-       u64 qpx_sqhp;
-       u64 qpx_sqptp;
-       u64 qpx_nspsn;
-/* 0x120*/
-       u64 qpx_nspsnhwm;
-       u64 reserved1;
-       u64 qpx_sdsi;
-       u64 qpx_sdsbc;
-/* 0x140*/
-       u64 qpx_sqwsize;
-       u64 qpx_sqwts;
-       u64 qpx_lsn;
-       u64 qpx_nssn;
-/* 0x160 */
-       u64 qpx_mor;
-       u64 qpx_cor;
-       u64 qpx_sqsize;
-       u64 qpx_erc;
-/* 0x180*/
-       u64 qpx_rnrrc;
-       u64 qpx_ernrwt;
-       u64 qpx_rnrresp;
-       u64 qpx_lmsna;
-/* 0x1a0 */
-       u64 qpx_sqhpc;
-       u64 qpx_sqcptp;
-       u64 qpx_sigt;
-       u64 qpx_wqecnt;
-/* 0x1c0*/
-       u64 qpx_rqhp;
-       u64 qpx_rqptp;
-       u64 qpx_rqsize;
-       u64 qpx_nrr;
-/* 0x1e0*/
-       u64 qpx_rdmac;
-       u64 qpx_nrpsn;
-       u64 qpx_lapsn;
-       u64 qpx_lcr;
-/* 0x200*/
-       u64 qpx_rwc;
-       u64 qpx_rwva;
-       u64 qpx_rdsi;
-       u64 qpx_rdsbc;
-/* 0x220*/
-       u64 qpx_rqwsize;
-       u64 qpx_crmsn;
-       u64 qpx_rdd;
-       u64 qpx_larpsn;
-/* 0x240*/
-       u64 qpx_pd;
-       u64 qpx_scqn;
-       u64 qpx_rcqn;
-       u64 qpx_aeqn;
-/* 0x260*/
-       u64 qpx_aaelog;
-       u64 qpx_ram;
-       u64 qpx_rdmaqe0;
-       u64 qpx_rdmaqe1;
-/* 0x280*/
-       u64 qpx_rdmaqe2;
-       u64 qpx_rdmaqe3;
-       u64 qpx_nrpsnhwm;
-/* 0x298*/
-       u64 reserved[(0x400 - 0x298) / 8];
-/* 0x400 extended data */
-       u64 reserved_ext[(0x500 - 0x400) / 8];
-/* 0x500 */
-       u64 reserved2[(0x1000 - 0x500) / 8];
-/* 0x1000      */
-};
-
-#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
-
-#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
-
-/* MRMWPT Entry Memory Map */
-struct hipz_mrmwmm {
-       /* 0x00 */
-       u64 mrx_hcr;
-
-       u64 mrx_c;
-       u64 mrx_herr;
-       u64 mrx_aer;
-       /* 0x20 */
-       u64 mrx_pp;
-       u64 reserved1;
-       u64 reserved2;
-       u64 reserved3;
-       /* 0x40 */
-       u64 reserved4[(0x200 - 0x40) / 8];
-       /* 0x200 */
-       u64 mrx_ctl[64];
-
-};
-
-#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
-
-struct hipz_qpedmm {
-       /* 0x00 */
-       u64 reserved0[(0x400) / 8];
-       /* 0x400 */
-       u64 qpedx_phh;
-       u64 qpedx_ppsgp;
-       /* 0x410 */
-       u64 qpedx_ppsgu;
-       u64 qpedx_ppdgp;
-       /* 0x420 */
-       u64 qpedx_ppdgu;
-       u64 qpedx_aph;
-       /* 0x430 */
-       u64 qpedx_apsgp;
-       u64 qpedx_apsgu;
-       /* 0x440 */
-       u64 qpedx_apdgp;
-       u64 qpedx_apdgu;
-       /* 0x450 */
-       u64 qpedx_apav;
-       u64 qpedx_apsav;
-       /* 0x460  */
-       u64 qpedx_hcr;
-       u64 reserved1[4];
-       /* 0x488 */
-       u64 qpedx_rrl0;
-       /* 0x490 */
-       u64 qpedx_rrrkey0;
-       u64 qpedx_rrva0;
-       /* 0x4a0 */
-       u64 reserved2;
-       u64 qpedx_rrl1;
-       /* 0x4b0 */
-       u64 qpedx_rrrkey1;
-       u64 qpedx_rrva1;
-       /* 0x4c0 */
-       u64 reserved3;
-       u64 qpedx_rrl2;
-       /* 0x4d0 */
-       u64 qpedx_rrrkey2;
-       u64 qpedx_rrva2;
-       /* 0x4e0 */
-       u64 reserved4;
-       u64 qpedx_rrl3;
-       /* 0x4f0 */
-       u64 qpedx_rrrkey3;
-       u64 qpedx_rrva3;
-};
-
-#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
-
-/* CQ Table Entry Memory Map */
-struct hipz_cqtemm {
-       u64 cqx_hcr;
-       u64 cqx_c;
-       u64 cqx_herr;
-       u64 cqx_aer;
-/* 0x20  */
-       u64 cqx_ptp;
-       u64 cqx_tp;
-       u64 cqx_fec;
-       u64 cqx_feca;
-/* 0x40  */
-       u64 cqx_ep;
-       u64 cqx_eq;
-/* 0x50  */
-       u64 reserved1;
-       u64 cqx_n0;
-/* 0x60  */
-       u64 cqx_n1;
-       u64 reserved2[(0x1000 - 0x60) / 8];
-/* 0x1000 */
-};
-
-#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32, 63)
-#define CQX_FECADDER              EHCA_BMASK_IBM(32, 63)
-#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-
-#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
-
-/* EQ Table Entry Memory Map */
-struct hipz_eqtemm {
-       u64 eqx_hcr;
-       u64 eqx_c;
-
-       u64 eqx_herr;
-       u64 eqx_aer;
-/* 0x20 */
-       u64 eqx_ptp;
-       u64 eqx_tp;
-       u64 eqx_ssba;
-       u64 eqx_psba;
-
-/* 0x40 */
-       u64 eqx_cec;
-       u64 eqx_meql;
-       u64 eqx_xisbi;
-       u64 eqx_xisc;
-/* 0x60 */
-       u64 eqx_it;
-
-};
-
-#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
-
-/* access control defines for MR/MW */
-#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
-#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
-#define HIPZ_ACCESSCTRL_R_READ   0x00200000
-#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
-#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
-
-/* query hca response block */
-struct hipz_query_hca {
-       u32 cur_reliable_dg;
-       u32 cur_qp;
-       u32 cur_cq;
-       u32 cur_eq;
-       u32 cur_mr;
-       u32 cur_mw;
-       u32 cur_ee_context;
-       u32 cur_mcast_grp;
-       u32 cur_qp_attached_mcast_grp;
-       u32 reserved1;
-       u32 cur_ipv6_qp;
-       u32 cur_eth_qp;
-       u32 cur_hp_mr;
-       u32 reserved2[3];
-       u32 max_rd_domain;
-       u32 max_qp;
-       u32 max_cq;
-       u32 max_eq;
-       u32 max_mr;
-       u32 max_hp_mr;
-       u32 max_mw;
-       u32 max_mrwpte;
-       u32 max_special_mrwpte;
-       u32 max_rd_ee_context;
-       u32 max_mcast_grp;
-       u32 max_total_mcast_qp_attach;
-       u32 max_mcast_qp_attach;
-       u32 max_raw_ipv6_qp;
-       u32 max_raw_ethy_qp;
-       u32 internal_clock_frequency;
-       u32 max_pd;
-       u32 max_ah;
-       u32 max_cqe;
-       u32 max_wqes_wq;
-       u32 max_partitions;
-       u32 max_rr_ee_context;
-       u32 max_rr_qp;
-       u32 max_rr_hca;
-       u32 max_act_wqs_ee_context;
-       u32 max_act_wqs_qp;
-       u32 max_sge;
-       u32 max_sge_rd;
-       u32 memory_page_size_supported;
-       u64 max_mr_size;
-       u32 local_ca_ack_delay;
-       u32 num_ports;
-       u32 vendor_id;
-       u32 vendor_part_id;
-       u32 hw_ver;
-       u64 node_guid;
-       u64 hca_cap_indicators;
-       u32 data_counter_register_size;
-       u32 max_shared_rq;
-       u32 max_isns_eq;
-       u32 max_neq;
-} __attribute__ ((packed));
-
-#define HCA_CAP_AH_PORT_NR_CHECK      EHCA_BMASK_IBM( 0,  0)
-#define HCA_CAP_ATOMIC                EHCA_BMASK_IBM( 1,  1)
-#define HCA_CAP_AUTO_PATH_MIG         EHCA_BMASK_IBM( 2,  2)
-#define HCA_CAP_BAD_P_KEY_CTR         EHCA_BMASK_IBM( 3,  3)
-#define HCA_CAP_SQD_RTS_PORT_CHANGE   EHCA_BMASK_IBM( 4,  4)
-#define HCA_CAP_CUR_QP_STATE_MOD      EHCA_BMASK_IBM( 5,  5)
-#define HCA_CAP_INIT_TYPE             EHCA_BMASK_IBM( 6,  6)
-#define HCA_CAP_PORT_ACTIVE_EVENT     EHCA_BMASK_IBM( 7,  7)
-#define HCA_CAP_Q_KEY_VIOL_CTR        EHCA_BMASK_IBM( 8,  8)
-#define HCA_CAP_WQE_RESIZE            EHCA_BMASK_IBM( 9,  9)
-#define HCA_CAP_RAW_PACKET_MCAST      EHCA_BMASK_IBM(10, 10)
-#define HCA_CAP_SHUTDOWN_PORT         EHCA_BMASK_IBM(11, 11)
-#define HCA_CAP_RC_LL_QP              EHCA_BMASK_IBM(12, 12)
-#define HCA_CAP_SRQ                   EHCA_BMASK_IBM(13, 13)
-#define HCA_CAP_UD_LL_QP              EHCA_BMASK_IBM(16, 16)
-#define HCA_CAP_RESIZE_MR             EHCA_BMASK_IBM(17, 17)
-#define HCA_CAP_MINI_QP               EHCA_BMASK_IBM(18, 18)
-#define HCA_CAP_H_ALLOC_RES_SYNC      EHCA_BMASK_IBM(19, 19)
-
-/* query port response block */
-struct hipz_query_port {
-       u32 state;
-       u32 bad_pkey_cntr;
-       u32 lmc;
-       u32 lid;
-       u32 subnet_timeout;
-       u32 qkey_viol_cntr;
-       u32 sm_sl;
-       u32 sm_lid;
-       u32 capability_mask;
-       u32 init_type_reply;
-       u32 pkey_tbl_len;
-       u32 gid_tbl_len;
-       u64 gid_prefix;
-       u32 port_nr;
-       u16 pkey_entries[16];
-       u8  reserved1[32];
-       u32 trent_size;
-       u32 trbuf_size;
-       u64 max_msg_sz;
-       u32 max_mtu;
-       u32 vl_cap;
-       u32 phys_pstate;
-       u32 phys_state;
-       u32 phys_speed;
-       u32 phys_width;
-       u8  reserved2[1884];
-       u64 guid_entries[255];
-} __attribute__ ((packed));
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c
deleted file mode 100644 (file)
index 7ffc748..0000000
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ipz_pt_fn.h"
-#include "ehca_classes.h"
-
-#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
-
-struct kmem_cache *small_qp_cache;
-
-void *ipz_qpageit_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       queue->current_q_offset += queue->pagesize;
-       if (queue->current_q_offset > queue->queue_length) {
-               queue->current_q_offset -= queue->pagesize;
-               ret = NULL;
-       }
-       if (((u64)ret) % queue->pagesize) {
-               ehca_gen_err("ERROR!! not at PAGE-Boundary");
-               return NULL;
-       }
-       return ret;
-}
-
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u64 last_entry_in_q = queue->queue_length - queue->qe_size;
-
-       queue->current_q_offset += queue->qe_size;
-       if (queue->current_q_offset > last_entry_in_q) {
-               queue->current_q_offset = 0;
-               queue->toggle_state = (~queue->toggle_state) & 1;
-       }
-
-       return ret;
-}
-
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
-{
-       int i;
-       for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
-               u64 page = __pa(queue->queue_pages[i]);
-               if (addr >= page && addr < page + queue->pagesize) {
-                       *q_offset = addr - page + i * queue->pagesize;
-                       return 0;
-               }
-       }
-       return -EINVAL;
-}
-
-#if PAGE_SHIFT < EHCA_PAGESHIFT
-#error Kernel pages must be at least as large than eHCA pages (4K) !
-#endif
-
-/*
- * allocate pages for queue:
- * outer loop allocates whole kernel pages (page aligned) and
- * inner loop divides a kernel page into smaller hca queue pages
- */
-static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
-{
-       int k, f = 0;
-       u8 *kpage;
-
-       while (f < nr_of_pages) {
-               kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
-               if (!kpage)
-                       goto out;
-
-               for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
-                       queue->queue_pages[f] = (struct ipz_page *)kpage;
-                       kpage += EHCA_PAGESIZE;
-                       f++;
-               }
-       }
-       return 1;
-
-out:
-       for (f = 0; f < nr_of_pages && queue->queue_pages[f];
-            f += PAGES_PER_KPAGE)
-               free_page((unsigned long)(queue->queue_pages)[f]);
-       return 0;
-}
-
-static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-       int order = ilog2(queue->pagesize) - 9;
-       struct ipz_small_queue_page *page;
-       unsigned long bit;
-
-       mutex_lock(&pd->lock);
-
-       if (!list_empty(&pd->free[order]))
-               page = list_entry(pd->free[order].next,
-                                 struct ipz_small_queue_page, list);
-       else {
-               page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
-               if (!page)
-                       goto out;
-
-               page->page = get_zeroed_page(GFP_KERNEL);
-               if (!page->page) {
-                       kmem_cache_free(small_qp_cache, page);
-                       goto out;
-               }
-
-               list_add(&page->list, &pd->free[order]);
-       }
-
-       bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
-       __set_bit(bit, page->bitmap);
-       page->fill++;
-
-       if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
-               list_move(&page->list, &pd->full[order]);
-
-       mutex_unlock(&pd->lock);
-
-       queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
-       queue->small_page = page;
-       queue->offset = bit << (order + 9);
-       return 1;
-
-out:
-       ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
-       mutex_unlock(&pd->lock);
-       return 0;
-}
-
-static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-       int order = ilog2(queue->pagesize) - 9;
-       struct ipz_small_queue_page *page = queue->small_page;
-       unsigned long bit;
-       int free_page = 0;
-
-       bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
-               >> (order + 9);
-
-       mutex_lock(&pd->lock);
-
-       __clear_bit(bit, page->bitmap);
-       page->fill--;
-
-       if (page->fill == 0) {
-               list_del(&page->list);
-               free_page = 1;
-       }
-
-       if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
-               /* the page was full until we freed the chunk */
-               list_move_tail(&page->list, &pd->free[order]);
-
-       mutex_unlock(&pd->lock);
-
-       if (free_page) {
-               free_page(page->page);
-               kmem_cache_free(small_qp_cache, page);
-       }
-}
-
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-                  const u32 nr_of_pages, const u32 pagesize,
-                  const u32 qe_size, const u32 nr_of_sg,
-                  int is_small)
-{
-       if (pagesize > PAGE_SIZE) {
-               ehca_gen_err("FATAL ERROR: pagesize=%x "
-                            "is greater than kernel page size", pagesize);
-               return 0;
-       }
-
-       /* init queue fields */
-       queue->queue_length = nr_of_pages * pagesize;
-       queue->pagesize = pagesize;
-       queue->qe_size = qe_size;
-       queue->act_nr_of_sg = nr_of_sg;
-       queue->current_q_offset = 0;
-       queue->toggle_state = 1;
-       queue->small_page = NULL;
-
-       /* allocate queue page pointers */
-       queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *),
-                                    GFP_KERNEL | __GFP_NOWARN);
-       if (!queue->queue_pages) {
-               queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
-               if (!queue->queue_pages) {
-                       ehca_gen_err("Couldn't allocate queue page list");
-                       return 0;
-               }
-       }
-
-       /* allocate actual queue pages */
-       if (is_small) {
-               if (!alloc_small_queue_page(queue, pd))
-                       goto ipz_queue_ctor_exit0;
-       } else
-               if (!alloc_queue_pages(queue, nr_of_pages))
-                       goto ipz_queue_ctor_exit0;
-
-       return 1;
-
-ipz_queue_ctor_exit0:
-       ehca_gen_err("Couldn't alloc pages queue=%p "
-                "nr_of_pages=%x",  queue, nr_of_pages);
-       kvfree(queue->queue_pages);
-
-       return 0;
-}
-
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
-{
-       int i, nr_pages;
-
-       if (!queue || !queue->queue_pages) {
-               ehca_gen_dbg("queue or queue_pages is NULL");
-               return 0;
-       }
-
-       if (queue->small_page)
-               free_small_queue_page(queue, pd);
-       else {
-               nr_pages = queue->queue_length / queue->pagesize;
-               for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
-                       free_page((unsigned long)queue->queue_pages[i]);
-       }
-
-       kvfree(queue->queue_pages);
-
-       return 1;
-}
-
-int ehca_init_small_qp_cache(void)
-{
-       small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
-                                          sizeof(struct ipz_small_queue_page),
-                                          0, SLAB_HWCACHE_ALIGN, NULL);
-       if (!small_qp_cache)
-               return -ENOMEM;
-
-       return 0;
-}
-
-void ehca_cleanup_small_qp_cache(void)
-{
-       kmem_cache_destroy(small_qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h
deleted file mode 100644 (file)
index a801274..0000000
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __IPZ_PT_FN_H__
-#define __IPZ_PT_FN_H__
-
-#define EHCA_PAGESHIFT   12
-#define EHCA_PAGESIZE   4096UL
-#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
-#define EHCA_PT_ENTRIES 512UL
-
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-
-struct ehca_pd;
-struct ipz_small_queue_page;
-
-extern struct kmem_cache *small_qp_cache;
-
-/* struct generic ehca page */
-struct ipz_page {
-       u8 entries[EHCA_PAGESIZE];
-};
-
-#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
-
-struct ipz_small_queue_page {
-       unsigned long page;
-       unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
-       int fill;
-       void *mapped_addr;
-       u32 mmap_count;
-       struct list_head list;
-};
-
-/* struct generic queue in linux kernel virtual memory (kv) */
-struct ipz_queue {
-       u64 current_q_offset;   /* current queue entry */
-
-       struct ipz_page **queue_pages;  /* array of pages belonging to queue */
-       u32 qe_size;            /* queue entry size */
-       u32 act_nr_of_sg;
-       u32 queue_length;       /* queue length allocated in bytes */
-       u32 pagesize;
-       u32 toggle_state;       /* toggle flag - per page */
-       u32 offset; /* save offset within page for small_qp */
-       struct ipz_small_queue_page *small_page;
-};
-
-/*
- * return current Queue Entry for a certain q_offset
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
-{
-       struct ipz_page *current_page;
-       if (q_offset >= queue->queue_length)
-               return NULL;
-       current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
-       return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
-}
-
-/*
- * return current Queue Entry
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_get(struct ipz_queue *queue)
-{
-       return ipz_qeit_calc(queue, queue->current_q_offset);
-}
-
-/*
- * return current Queue Page , increment Queue Page iterator from
- * page to page in struct ipz_queue, last increment will return 0! and
- * NOT wrap
- * returns address (kv) of Queue Page
- * warning don't use in parallel with ipz_QE_get_inc()
- */
-void *ipz_qpageit_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       queue->current_q_offset += queue->qe_size;
-       if (queue->current_q_offset >= queue->queue_length) {
-               queue->current_q_offset = 0;
-               /* toggle the valid flag */
-               queue->toggle_state = (~queue->toggle_state) & 1;
-       }
-
-       return ret;
-}
-
-/*
- * return a bool indicating whether current Queue Entry is valid
- */
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
-       struct ehca_cqe *cqe = ipz_qeit_get(queue);
-       return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
-}
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
-{
-       return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
-}
-
-/*
- * returns and resets Queue Entry iterator
- * returns address (kv) of first Queue Entry
- */
-static inline void *ipz_qeit_reset(struct ipz_queue *queue)
-{
-       queue->current_q_offset = 0;
-       return ipz_qeit_get(queue);
-}
-
-/*
- * return the q_offset corresponding to an absolute address
- */
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
-
-/*
- * return the next queue offset. don't modify the queue.
- */
-static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
-{
-       offset += queue->qe_size;
-       if (offset >= queue->queue_length) offset = 0;
-       return offset;
-}
-
-/* struct generic page table */
-struct ipz_pt {
-       u64 entries[EHCA_PT_ENTRIES];
-};
-
-/* struct page table for a queue, only to be used in pf */
-struct ipz_qpt {
-       /* queue page tables (kv), use u64 because we know the element length */
-       u64 *qpts;
-       u32 n_qpts;
-       u32 n_ptes;       /*  number of page table entries */
-       u64 *current_pte_addr;
-};
-
-/*
- * constructor for a ipz_queue_t, placement new for ipz_queue_t,
- * new for all dependent datastructors
- * all QP Tables are the same
- * flow:
- *    allocate+pin queue
- * see ipz_qpt_ctor()
- * returns true if ok, false if out of memory
- */
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-                  const u32 nr_of_pages, const u32 pagesize,
-                  const u32 qe_size, const u32 nr_of_sg,
-                  int is_small);
-
-/*
- * destructor for a ipz_queue_t
- *  -# free queue
- *  see ipz_queue_ctor()
- *  returns true if ok, false if queue was NULL-ptr of free failed
- */
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
-
-/*
- * constructor for a ipz_qpt_t,
- * placement new for struct ipz_queue, new for all dependent datastructors
- * all QP Tables are the same,
- * flow:
- * -# allocate+pin queue
- * -# initialise ptcb
- * -# allocate+pin PTs
- * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
- * -# the ring must have room for exactly nr_of_PTEs
- * see ipz_qpt_ctor()
- */
-void ipz_qpt_ctor(struct ipz_qpt *qpt,
-                 const u32 nr_of_qes,
-                 const u32 pagesize,
-                 const u32 qe_size,
-                 const u8 lowbyte, const u8 toggle,
-                 u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- * fix EQ page problems
- */
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Event Queue Entry, increment Queue Entry iterator
- * by one step in struct ipz_queue if valid, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_queue_QPageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- */
-static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u32 qe = *(u8 *)ret;
-       if ((qe >> 7) != (queue->toggle_state & 1))
-               return NULL;
-       ipz_qeit_eq_get_inc(queue); /* this is a good one */
-       return ret;
-}
-
-static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u32 qe = *(u8 *)ret;
-       if ((qe >> 7) != (queue->toggle_state & 1))
-               return NULL;
-       return ret;
-}
-
-/* returns address (GX) of first queue entry */
-static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
-{
-       return be64_to_cpu(qpt->qpts[0]);
-}
-
-/* returns address (kv) of first page of queue page table */
-static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
-{
-       return qpt->qpts;
-}
-
-#endif                         /* __IPZ_PT_FN_H__ */
index 568f185a022dffa5bf24a1af010261a4b4f7b5c0..a3f8b884fdd625fef680f5342655bc1832978eca 100644 (file)
@@ -167,10 +167,7 @@ static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
        rval = init_mregion(&mr->mr, pd, count);
        if (rval)
                goto bail;
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
+
        rval = hfi1_alloc_lkey(&mr->mr, 0);
        if (rval)
                goto bail_mregion;
@@ -187,52 +184,6 @@ bail:
        goto done;
 }
 
-/**
- * hfi1_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *buffer_list,
-                              int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct hfi1_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, pd);
-       if (IS_ERR(mr)) {
-               ret = (struct ib_mr *)mr;
-               goto bail;
-       }
-
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.access_flags = acc;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == HFI1_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
 /**
  * hfi1_reg_user_mr - register a userspace memory region
  * @pd: protection domain for this memory region
index ef0feaa684a48b7b285ce490ad9791d98c052ddd..09b8d412ee9085667da5fd3bccf35d46724dc49e 100644 (file)
@@ -2052,7 +2052,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        ibdev->poll_cq = hfi1_poll_cq;
        ibdev->req_notify_cq = hfi1_req_notify_cq;
        ibdev->get_dma_mr = hfi1_get_dma_mr;
-       ibdev->reg_phys_mr = hfi1_reg_phys_mr;
        ibdev->reg_user_mr = hfi1_reg_user_mr;
        ibdev->dereg_mr = hfi1_dereg_mr;
        ibdev->alloc_mr = hfi1_alloc_mr;
index 72106e5362b9862d448be822220f2ee7b0aa30f4..286e468b0479791c49c8b5dfeeb8b991d0ff2785 100644 (file)
@@ -1024,10 +1024,6 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 
 struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
 
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *buffer_list,
-                              int num_phys_buf, int acc, u64 *iova_start);
-
 struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                               u64 virt_addr, int mr_access_flags,
                               struct ib_udata *udata);
diff --git a/drivers/staging/rdma/ipath/Kconfig b/drivers/staging/rdma/ipath/Kconfig
deleted file mode 100644 (file)
index 041ce06..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-config INFINIBAND_IPATH
-       tristate "QLogic HTX HCA support"
-       depends on 64BIT && NET && HT_IRQ
-       ---help---
-       This is a driver for the deprecated QLogic Hyper-Transport
-       IB host channel adapter (model QHT7140),
-       including InfiniBand verbs support.  This driver allows these
-       devices to be used with both kernel upper level protocols such
-       as IP-over-InfiniBand as well as with userspace applications
-       (in conjunction with InfiniBand userspace access).
-       For QLogic PCIe QLE based cards, use the QIB driver instead.
-
-       If you have this hardware you will need to boot with PAT disabled
-       on your x86-64 systems, use the nopat kernel parameter.
-
-       Note that this driver will soon be removed entirely from the kernel.
diff --git a/drivers/staging/rdma/ipath/Makefile b/drivers/staging/rdma/ipath/Makefile
deleted file mode 100644 (file)
index 4496f28..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-       -DIPATH_KERN_TYPE=0
-
-obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
-
-ib_ipath-y := \
-       ipath_cq.o \
-       ipath_diag.o \
-       ipath_dma.o \
-       ipath_driver.o \
-       ipath_eeprom.o \
-       ipath_file_ops.o \
-       ipath_fs.o \
-       ipath_init_chip.o \
-       ipath_intr.o \
-       ipath_keys.o \
-       ipath_mad.o \
-       ipath_mmap.o \
-       ipath_mr.o \
-       ipath_qp.o \
-       ipath_rc.o \
-       ipath_ruc.o \
-       ipath_sdma.o \
-       ipath_srq.o \
-       ipath_stats.o \
-       ipath_sysfs.o \
-       ipath_uc.o \
-       ipath_ud.o \
-       ipath_user_pages.o \
-       ipath_user_sdma.o \
-       ipath_verbs_mcast.o \
-       ipath_verbs.o
-
-ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
-
-ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/staging/rdma/ipath/TODO b/drivers/staging/rdma/ipath/TODO
deleted file mode 100644 (file)
index cb00158..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-The ipath driver has been moved to staging in preparation for its removal in a
-few releases. The driver will be deleted during the 4.6 merge window.
-
-Contact Dennis Dalessandro <dennis.dalessandro@intel.com> and
-Cc: linux-rdma@vger.kernel.org
diff --git a/drivers/staging/rdma/ipath/ipath_common.h b/drivers/staging/rdma/ipath/ipath_common.h
deleted file mode 100644 (file)
index 28cfe97..0000000
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_COMMON_H
-#define _IPATH_COMMON_H
-
-/*
- * This file contains defines, structures, etc. that are used
- * to communicate between kernel and user code.
- */
-
-
-/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
-#define IPATH_SRC_OUI_1 0x00
-#define IPATH_SRC_OUI_2 0x11
-#define IPATH_SRC_OUI_3 0x75
-
-/* version of protocol header (known to chip also). In the long run,
- * we should be able to generate and accept a range of version numbers;
- * for now we only accept one, and it's compiled in.
- */
-#define IPS_PROTO_VERSION 2
-
-/*
- * These are compile time constants that you may want to enable or disable
- * if you are trying to debug problems with code or performance.
- * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
- * fastpath code
- * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
- * traced in faspath code
- * _IPATH_TRACING define as 0 if you want to remove all tracing in a
- * compilation unit
- * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
- */
-
-/*
- * The value in the BTH QP field that InfiniPath uses to differentiate
- * an infinipath protocol IB packet vs standard IB transport
- */
-#define IPATH_KD_QP 0x656b79
-
-/*
- * valid states passed to ipath_set_linkstate() user call
- */
-#define IPATH_IB_LINKDOWN              0
-#define IPATH_IB_LINKARM               1
-#define IPATH_IB_LINKACTIVE            2
-#define IPATH_IB_LINKDOWN_ONLY         3
-#define IPATH_IB_LINKDOWN_SLEEP                4
-#define IPATH_IB_LINKDOWN_DISABLE      5
-#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
-#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
-#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */
-#define IPATH_IB_LINK_HRTBT    9 /* enable heartbeat, normal, non-loopback */
-
-/*
- * These 3 values (SDR and DDR may be ORed for auto-speed
- * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
- * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs.  They
- * are also the the possible values for ipath_link_speed_enabled and active
- * The values were chosen to match values used within the IB spec.
- */
-#define IPATH_IB_SDR 1
-#define IPATH_IB_DDR 2
-
-/*
- * stats maintained by the driver.  For now, at least, this is global
- * to all minor devices.
- */
-struct infinipath_stats {
-       /* number of interrupts taken */
-       __u64 sps_ints;
-       /* number of interrupts for errors */
-       __u64 sps_errints;
-       /* number of errors from chip (not incl. packet errors or CRC) */
-       __u64 sps_errs;
-       /* number of packet errors from chip other than CRC */
-       __u64 sps_pkterrs;
-       /* number of packets with CRC errors (ICRC and VCRC) */
-       __u64 sps_crcerrs;
-       /* number of hardware errors reported (parity, etc.) */
-       __u64 sps_hwerrs;
-       /* number of times IB link changed state unexpectedly */
-       __u64 sps_iblink;
-       __u64 sps_unused; /* was fastrcvint, no longer implemented */
-       /* number of kernel (port0) packets received */
-       __u64 sps_port0pkts;
-       /* number of "ethernet" packets sent by driver */
-       __u64 sps_ether_spkts;
-       /* number of "ethernet" packets received by driver */
-       __u64 sps_ether_rpkts;
-       /* number of SMA packets sent by driver. Obsolete. */
-       __u64 sps_sma_spkts;
-       /* number of SMA packets received by driver. Obsolete. */
-       __u64 sps_sma_rpkts;
-       /* number of times all ports rcvhdrq was full and packet dropped */
-       __u64 sps_hdrqfull;
-       /* number of times all ports egrtid was full and packet dropped */
-       __u64 sps_etidfull;
-       /*
-        * number of times we tried to send from driver, but no pio buffers
-        * avail
-        */
-       __u64 sps_nopiobufs;
-       /* number of ports currently open */
-       __u64 sps_ports;
-       /* list of pkeys (other than default) accepted (0 means not set) */
-       __u16 sps_pkeys[4];
-       __u16 sps_unused16[4]; /* available; maintaining compatible layout */
-       /* number of user ports per chip (not IB ports) */
-       __u32 sps_nports;
-       /* not our interrupt, or already handled */
-       __u32 sps_nullintr;
-       /* max number of packets handled per receive call */
-       __u32 sps_maxpkts_call;
-       /* avg number of packets handled per receive call */
-       __u32 sps_avgpkts_call;
-       /* total number of pages locked */
-       __u64 sps_pagelocks;
-       /* total number of pages unlocked */
-       __u64 sps_pageunlocks;
-       /*
-        * Number of packets dropped in kernel other than errors (ether
-        * packets if ipath not configured, etc.)
-        */
-       __u64 sps_krdrops;
-       __u64 sps_txeparity; /* PIO buffer parity error, recovered */
-       /* pad for future growth */
-       __u64 __sps_pad[45];
-};
-
-/*
- * These are the status bits readable (in ascii form, 64bit value)
- * from the "status" sysfs file.
- */
-#define IPATH_STATUS_INITTED       0x1 /* basic initialization done */
-#define IPATH_STATUS_DISABLED      0x2 /* hardware disabled */
-/* Device has been disabled via admin request */
-#define IPATH_STATUS_ADMIN_DISABLED    0x4
-/* Chip has been found and initted */
-#define IPATH_STATUS_CHIP_PRESENT 0x20
-/* IB link is at ACTIVE, usable for data traffic */
-#define IPATH_STATUS_IB_READY     0x40
-/* link is configured, LID, MTU, etc. have been set */
-#define IPATH_STATUS_IB_CONF      0x80
-/* no link established, probably no cable */
-#define IPATH_STATUS_IB_NOCABLE  0x100
-/* A Fatal hardware error has occurred. */
-#define IPATH_STATUS_HWERROR     0x200
-
-/*
- * The list of usermode accessible registers.  Also see Reg_* later in file.
- */
-typedef enum _ipath_ureg {
-       /* (RO)  DMA RcvHdr to be used next. */
-       ur_rcvhdrtail = 0,
-       /* (RW)  RcvHdr entry to be processed next by host. */
-       ur_rcvhdrhead = 1,
-       /* (RO)  Index of next Eager index to use. */
-       ur_rcvegrindextail = 2,
-       /* (RW)  Eager TID to be processed next */
-       ur_rcvegrindexhead = 3,
-       /* For internal use only; max register number. */
-       _IPATH_UregMax
-} ipath_ureg;
-
-/* bit values for spi_runtime_flags */
-#define IPATH_RUNTIME_HT       0x1
-#define IPATH_RUNTIME_PCIE     0x2
-#define IPATH_RUNTIME_FORCE_WC_ORDER   0x4
-#define IPATH_RUNTIME_RCVHDR_COPY      0x8
-#define IPATH_RUNTIME_MASTER   0x10
-#define IPATH_RUNTIME_NODMA_RTAIL 0x80
-#define IPATH_RUNTIME_SDMA           0x200
-#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
-#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
-
-/*
- * This structure is returned by ipath_userinit() immediately after
- * open to get implementation-specific info, and info specific to this
- * instance.
- *
- * This struct must have explict pad fields where type sizes
- * may result in different alignments between 32 and 64 bit
- * programs, since the 64 bit * bit kernel requires the user code
- * to have matching offsets
- */
-struct ipath_base_info {
-       /* version of hardware, for feature checking. */
-       __u32 spi_hw_version;
-       /* version of software, for feature checking. */
-       __u32 spi_sw_version;
-       /* InfiniPath port assigned, goes into sent packets */
-       __u16 spi_port;
-       __u16 spi_subport;
-       /*
-        * IB MTU, packets IB data must be less than this.
-        * The MTU is in bytes, and will be a multiple of 4 bytes.
-        */
-       __u32 spi_mtu;
-       /*
-        * Size of a PIO buffer.  Any given packet's total size must be less
-        * than this (in words).  Included is the starting control word, so
-        * if 513 is returned, then total pkt size is 512 words or less.
-        */
-       __u32 spi_piosize;
-       /* size of the TID cache in infinipath, in entries */
-       __u32 spi_tidcnt;
-       /* size of the TID Eager list in infinipath, in entries */
-       __u32 spi_tidegrcnt;
-       /* size of a single receive header queue entry in words. */
-       __u32 spi_rcvhdrent_size;
-       /*
-        * Count of receive header queue entries allocated.
-        * This may be less than the spu_rcvhdrcnt passed in!.
-        */
-       __u32 spi_rcvhdr_cnt;
-
-       /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
-       __u32 spi_runtime_flags;
-
-       /* address where receive buffer queue is mapped into */
-       __u64 spi_rcvhdr_base;
-
-       /* user program. */
-
-       /* base address of eager TID receive buffers. */
-       __u64 spi_rcv_egrbufs;
-
-       /* Allocated by initialization code, not by protocol. */
-
-       /*
-        * Size of each TID buffer in host memory, starting at
-        * spi_rcv_egrbufs.  The buffers are virtually contiguous.
-        */
-       __u32 spi_rcv_egrbufsize;
-       /*
-        * The special QP (queue pair) value that identifies an infinipath
-        * protocol packet from standard IB packets.  More, probably much
-        * more, to be added.
-        */
-       __u32 spi_qpair;
-
-       /*
-        * User register base for init code, not to be used directly by
-        * protocol or applications.
-        */
-       __u64 __spi_uregbase;
-       /*
-        * Maximum buffer size in bytes that can be used in a single TID
-        * entry (assuming the buffer is aligned to this boundary).  This is
-        * the minimum of what the hardware and software support Guaranteed
-        * to be a power of 2.
-        */
-       __u32 spi_tid_maxsize;
-       /*
-        * alignment of each pio send buffer (byte count
-        * to add to spi_piobufbase to get to second buffer)
-        */
-       __u32 spi_pioalign;
-       /*
-        * The index of the first pio buffer available to this process;
-        * needed to do lookup in spi_pioavailaddr; not added to
-        * spi_piobufbase.
-        */
-       __u32 spi_pioindex;
-        /* number of buffers mapped for this process */
-       __u32 spi_piocnt;
-
-       /*
-        * Base address of writeonly pio buffers for this process.
-        * Each buffer has spi_piosize words, and is aligned on spi_pioalign
-        * boundaries.  spi_piocnt buffers are mapped from this address
-        */
-       __u64 spi_piobufbase;
-
-       /*
-        * Base address of readonly memory copy of the pioavail registers.
-        * There are 2 bits for each buffer.
-        */
-       __u64 spi_pioavailaddr;
-
-       /*
-        * Address where driver updates a copy of the interface and driver
-        * status (IPATH_STATUS_*) as a 64 bit value.  It's followed by a
-        * string indicating hardware error, if there was one.
-        */
-       __u64 spi_status;
-
-       /* number of chip ports available to user processes */
-       __u32 spi_nports;
-       /* unit number of chip we are using */
-       __u32 spi_unit;
-       /* num bufs in each contiguous set */
-       __u32 spi_rcv_egrperchunk;
-       /* size in bytes of each contiguous set */
-       __u32 spi_rcv_egrchunksize;
-       /* total size of mmap to cover full rcvegrbuffers */
-       __u32 spi_rcv_egrbuftotlen;
-       __u32 spi_filler_for_align;
-       /* address of readonly memory copy of the rcvhdrq tail register. */
-       __u64 spi_rcvhdr_tailaddr;
-
-       /* shared memory pages for subports if port is shared */
-       __u64 spi_subport_uregbase;
-       __u64 spi_subport_rcvegrbuf;
-       __u64 spi_subport_rcvhdr_base;
-
-       /* shared memory page for hardware port if it is shared */
-       __u64 spi_port_uregbase;
-       __u64 spi_port_rcvegrbuf;
-       __u64 spi_port_rcvhdr_base;
-       __u64 spi_port_rcvhdr_tailaddr;
-
-} __attribute__ ((aligned(8)));
-
-
-/*
- * This version number is given to the driver by the user code during
- * initialization in the spu_userversion field of ipath_user_info, so
- * the driver can check for compatibility with user code.
- *
- * The major version changes when data structures
- * change in an incompatible way.  The driver must be the same or higher
- * for initialization to succeed.  In some cases, a higher version
- * driver will not interoperate with older software, and initialization
- * will return an error.
- */
-#define IPATH_USER_SWMAJOR 1
-
-/*
- * Minor version differences are always compatible
- * a within a major version, however if user software is larger
- * than driver software, some new features and/or structure fields
- * may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference.
- */
-#define IPATH_USER_SWMINOR 6
-
-#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
-
-#define IPATH_KERN_TYPE 0
-
-/*
- * Similarly, this is the kernel version going back to the user.  It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a QLogic release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of ipath_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
-
-/*
- * This structure is passed to ipath_userinit() to tell the driver where
- * user code buffers are, sizes, etc.   The offsets and sizes of the
- * fields must remain unchanged, for binary compatibility.  It can
- * be extended, if userversion is changed so user code can tell, if needed
- */
-struct ipath_user_info {
-       /*
-        * version of user software, to detect compatibility issues.
-        * Should be set to IPATH_USER_SWVERSION.
-        */
-       __u32 spu_userversion;
-
-       /* desired number of receive header queue entries */
-       __u32 spu_rcvhdrcnt;
-
-       /* size of struct base_info to write to */
-       __u32 spu_base_info_size;
-
-       /*
-        * number of words in KD protocol header
-        * This tells InfiniPath how many words to copy to rcvhdrq.  If 0,
-        * kernel uses a default.  Once set, attempts to set any other value
-        * are an error (EAGAIN) until driver is reloaded.
-        */
-       __u32 spu_rcvhdrsize;
-
-       /*
-        * If two or more processes wish to share a port, each process
-        * must set the spu_subport_cnt and spu_subport_id to the same
-        * values.  The only restriction on the spu_subport_id is that
-        * it be unique for a given node.
-        */
-       __u16 spu_subport_cnt;
-       __u16 spu_subport_id;
-
-       __u32 spu_unused; /* kept for compatible layout */
-
-       /*
-        * address of struct base_info to write to
-        */
-       __u64 spu_base_info;
-
-} __attribute__ ((aligned(8)));
-
-/* User commands. */
-
-#define IPATH_CMD_MIN          16
-
-#define __IPATH_CMD_USER_INIT  16      /* old set up userspace (for old user code) */
-#define IPATH_CMD_PORT_INFO    17      /* find out what resources we got */
-#define IPATH_CMD_RECV_CTRL    18      /* control receipt of packets */
-#define IPATH_CMD_TID_UPDATE   19      /* update expected TID entries */
-#define IPATH_CMD_TID_FREE     20      /* free expected TID entries */
-#define IPATH_CMD_SET_PART_KEY 21      /* add partition key */
-#define __IPATH_CMD_SLAVE_INFO 22      /* return info on slave processes (for old user code) */
-#define IPATH_CMD_ASSIGN_PORT  23      /* allocate HCA and port */
-#define IPATH_CMD_USER_INIT    24      /* set up userspace */
-#define IPATH_CMD_UNUSED_1     25
-#define IPATH_CMD_UNUSED_2     26
-#define IPATH_CMD_PIOAVAILUPD  27      /* force an update of PIOAvail reg */
-#define IPATH_CMD_POLL_TYPE    28      /* set the kind of polling we want */
-#define IPATH_CMD_ARMLAUNCH_CTRL       29 /* armlaunch detection control */
-/* 30 is unused */
-#define IPATH_CMD_SDMA_INFLIGHT 31     /* sdma inflight counter request */
-#define IPATH_CMD_SDMA_COMPLETE 32     /* sdma completion counter request */
-
-/*
- * Poll types
- */
-#define IPATH_POLL_TYPE_URGENT  0x01
-#define IPATH_POLL_TYPE_OVERFLOW 0x02
-
-struct ipath_port_info {
-       __u32 num_active;       /* number of active units */
-       __u32 unit;             /* unit (chip) assigned to caller */
-       __u16 port;             /* port on unit assigned to caller */
-       __u16 subport;          /* subport on unit assigned to caller */
-       __u16 num_ports;        /* number of ports available on unit */
-       __u16 num_subports;     /* number of subports opened on port */
-};
-
-struct ipath_tid_info {
-       __u32 tidcnt;
-       /* make structure same size in 32 and 64 bit */
-       __u32 tid__unused;
-       /* virtual address of first page in transfer */
-       __u64 tidvaddr;
-       /* pointer (same size 32/64 bit) to __u16 tid array */
-       __u64 tidlist;
-
-       /*
-        * pointer (same size 32/64 bit) to bitmap of TIDs used
-        * for this call; checked for being large enough at open
-        */
-       __u64 tidmap;
-};
-
-struct ipath_cmd {
-       __u32 type;                     /* command type */
-       union {
-               struct ipath_tid_info tid_info;
-               struct ipath_user_info user_info;
-
-               /*
-                * address in userspace where we should put the sdma
-                * inflight counter
-                */
-               __u64 sdma_inflight;
-               /*
-                * address in userspace where we should put the sdma
-                * completion counter
-                */
-               __u64 sdma_complete;
-               /* address in userspace of struct ipath_port_info to
-                  write result to */
-               __u64 port_info;
-               /* enable/disable receipt of packets */
-               __u32 recv_ctrl;
-               /* enable/disable armlaunch errors (non-zero to enable) */
-               __u32 armlaunch_ctrl;
-               /* partition key to set */
-               __u16 part_key;
-               /* user address of __u32 bitmask of active slaves */
-               __u64 slave_mask_addr;
-               /* type of polling we want */
-               __u16 poll_type;
-       } cmd;
-};
-
-struct ipath_iovec {
-       /* Pointer to data, but same size 32 and 64 bit */
-       __u64 iov_base;
-
-       /*
-        * Length of data; don't need 64 bits, but want
-        * ipath_sendpkt to remain same size as before 32 bit changes, so...
-        */
-       __u64 iov_len;
-};
-
-/*
- * Describes a single packet for send.  Each packet can have one or more
- * buffers, but the total length (exclusive of IB headers) must be less
- * than the MTU, and if using the PIO method, entire packet length,
- * including IB headers, must be less than the ipath_piosize value (words).
- * Use of this necessitates including sys/uio.h
- */
-struct __ipath_sendpkt {
-       __u32 sps_flags;        /* flags for packet (TBD) */
-       __u32 sps_cnt;          /* number of entries to use in sps_iov */
-       /* array of iov's describing packet. TEMPORARY */
-       struct ipath_iovec sps_iov[4];
-};
-
-/*
- * diagnostics can send a packet by "writing" one of the following
- * two structs to diag data special file
- * The first is the legacy version for backward compatibility
- */
-struct ipath_diag_pkt {
-       __u32 unit;
-       __u64 data;
-       __u32 len;
-};
-
-/* The second diag_pkt struct is the expanded version that allows
- * more control over the packet, specifically, by allowing a custom
- * pbc (+ static rate) qword, so that special modes and deliberate
- * changes to CRCs can be used. The elements were also re-ordered
- * for better alignment and to avoid padding issues.
- */
-struct ipath_diag_xpkt {
-       __u64 data;
-       __u64 pbc_wd;
-       __u32 unit;
-       __u32 len;
-};
-
-/*
- * Data layout in I2C flash (for GUID, etc.)
- * All fields are little-endian binary unless otherwise stated
- */
-#define IPATH_FLASH_VERSION 2
-struct ipath_flash {
-       /* flash layout version (IPATH_FLASH_VERSION) */
-       __u8 if_fversion;
-       /* checksum protecting if_length bytes */
-       __u8 if_csum;
-       /*
-        * valid length (in use, protected by if_csum), including
-        * if_fversion and if_csum themselves)
-        */
-       __u8 if_length;
-       /* the GUID, in network order */
-       __u8 if_guid[8];
-       /* number of GUIDs to use, starting from if_guid */
-       __u8 if_numguid;
-       /* the (last 10 characters of) board serial number, in ASCII */
-       char if_serial[12];
-       /* board mfg date (YYYYMMDD ASCII) */
-       char if_mfgdate[8];
-       /* last board rework/test date (YYYYMMDD ASCII) */
-       char if_testdate[8];
-       /* logging of error counts, TBD */
-       __u8 if_errcntp[4];
-       /* powered on hours, updated at driver unload */
-       __u8 if_powerhour[2];
-       /* ASCII free-form comment field */
-       char if_comment[32];
-       /* Backwards compatible prefix for longer QLogic Serial Numbers */
-       char if_sprefix[4];
-       /* 82 bytes used, min flash size is 128 bytes */
-       __u8 if_future[46];
-};
-
-/*
- * These are the counters implemented in the chip, and are listed in order.
- * The InterCaps naming is taken straight from the chip spec.
- */
-struct infinipath_counters {
-       __u64 LBIntCnt;
-       __u64 LBFlowStallCnt;
-       __u64 TxSDmaDescCnt;    /* was Reserved1 */
-       __u64 TxUnsupVLErrCnt;
-       __u64 TxDataPktCnt;
-       __u64 TxFlowPktCnt;
-       __u64 TxDwordCnt;
-       __u64 TxLenErrCnt;
-       __u64 TxMaxMinLenErrCnt;
-       __u64 TxUnderrunCnt;
-       __u64 TxFlowStallCnt;
-       __u64 TxDroppedPktCnt;
-       __u64 RxDroppedPktCnt;
-       __u64 RxDataPktCnt;
-       __u64 RxFlowPktCnt;
-       __u64 RxDwordCnt;
-       __u64 RxLenErrCnt;
-       __u64 RxMaxMinLenErrCnt;
-       __u64 RxICRCErrCnt;
-       __u64 RxVCRCErrCnt;
-       __u64 RxFlowCtrlErrCnt;
-       __u64 RxBadFormatCnt;
-       __u64 RxLinkProblemCnt;
-       __u64 RxEBPCnt;
-       __u64 RxLPCRCErrCnt;
-       __u64 RxBufOvflCnt;
-       __u64 RxTIDFullErrCnt;
-       __u64 RxTIDValidErrCnt;
-       __u64 RxPKeyMismatchCnt;
-       __u64 RxP0HdrEgrOvflCnt;
-       __u64 RxP1HdrEgrOvflCnt;
-       __u64 RxP2HdrEgrOvflCnt;
-       __u64 RxP3HdrEgrOvflCnt;
-       __u64 RxP4HdrEgrOvflCnt;
-       __u64 RxP5HdrEgrOvflCnt;
-       __u64 RxP6HdrEgrOvflCnt;
-       __u64 RxP7HdrEgrOvflCnt;
-       __u64 RxP8HdrEgrOvflCnt;
-       __u64 RxP9HdrEgrOvflCnt;        /* was Reserved6 */
-       __u64 RxP10HdrEgrOvflCnt;       /* was Reserved7 */
-       __u64 RxP11HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP12HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP13HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP14HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP15HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP16HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 IBStatusChangeCnt;
-       __u64 IBLinkErrRecoveryCnt;
-       __u64 IBLinkDownedCnt;
-       __u64 IBSymbolErrCnt;
-       /* The following are new for IBA7220 */
-       __u64 RxVL15DroppedPktCnt;
-       __u64 RxOtherLocalPhyErrCnt;
-       __u64 PcieRetryBufDiagQwordCnt;
-       __u64 ExcessBufferOvflCnt;
-       __u64 LocalLinkIntegrityErrCnt;
-       __u64 RxVlErrCnt;
-       __u64 RxDlidFltrCnt;
-};
-
-/*
- * The next set of defines are for packet headers, and chip register
- * and memory bits that are visible to and/or used by user-mode software
- * The other bits that are used only by the driver or diags are in
- * ipath_registers.h
- */
-
-/* RcvHdrFlags bits */
-#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
-#define INFINIPATH_RHF_LENGTH_SHIFT 0
-#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
-#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
-#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
-#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
-#define INFINIPATH_RHF_SEQ_MASK 0xF
-#define INFINIPATH_RHF_SEQ_SHIFT 0
-#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
-#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
-#define INFINIPATH_RHF_H_ICRCERR   0x80000000
-#define INFINIPATH_RHF_H_VCRCERR   0x40000000
-#define INFINIPATH_RHF_H_PARITYERR 0x20000000
-#define INFINIPATH_RHF_H_LENERR    0x10000000
-#define INFINIPATH_RHF_H_MTUERR    0x08000000
-#define INFINIPATH_RHF_H_IHDRERR   0x04000000
-#define INFINIPATH_RHF_H_TIDERR    0x02000000
-#define INFINIPATH_RHF_H_MKERR     0x01000000
-#define INFINIPATH_RHF_H_IBERR     0x00800000
-#define INFINIPATH_RHF_H_ERR_MASK  0xFF800000
-#define INFINIPATH_RHF_L_USE_EGR   0x80000000
-#define INFINIPATH_RHF_L_SWA       0x00008000
-#define INFINIPATH_RHF_L_SWB       0x00004000
-
-/* infinipath header fields */
-#define INFINIPATH_I_VERS_MASK 0xF
-#define INFINIPATH_I_VERS_SHIFT 28
-#define INFINIPATH_I_PORT_MASK 0xF
-#define INFINIPATH_I_PORT_SHIFT 24
-#define INFINIPATH_I_TID_MASK 0x7FF
-#define INFINIPATH_I_TID_SHIFT 13
-#define INFINIPATH_I_OFFSET_MASK 0x1FFF
-#define INFINIPATH_I_OFFSET_SHIFT 0
-
-/* K_PktFlags bits */
-#define INFINIPATH_KPF_INTR 0x1
-#define INFINIPATH_KPF_SUBPORT_MASK 0x3
-#define INFINIPATH_KPF_SUBPORT_SHIFT 1
-
-#define INFINIPATH_MAX_SUBPORT 4
-
-/* SendPIO per-buffer control */
-#define INFINIPATH_SP_TEST    0x40
-#define INFINIPATH_SP_TESTEBP 0x20
-#define INFINIPATH_SP_TRIGGER_SHIFT  15
-
-/* SendPIOAvail bits */
-#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
-#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
-
-/* infinipath header format */
-struct ipath_header {
-       /*
-        * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
-        * 14 bits before ECO change ~28 Dec 03.  After that, Vers 4,
-        * Port 4, TID 11, offset 13.
-        */
-       __le32 ver_port_tid_offset;
-       __le16 chksum;
-       __le16 pkt_flags;
-};
-
-/* infinipath user message header format.
- * This structure contains the first 4 fields common to all protocols
- * that employ infinipath.
- */
-struct ipath_message_header {
-       __be16 lrh[4];
-       __be32 bth[3];
-       /* fields below this point are in host byte order */
-       struct ipath_header iph;
-       __u8 sub_opcode;
-};
-
-/* infinipath ethernet header format */
-struct ether_header {
-       __be16 lrh[4];
-       __be32 bth[3];
-       struct ipath_header iph;
-       __u8 sub_opcode;
-       __u8 cmd;
-       __be16 lid;
-       __u16 mac[3];
-       __u8 frag_num;
-       __u8 seq_num;
-       __le32 len;
-       /* MUST be of word size due to PIO write requirements */
-       __le32 csum;
-       __le16 csum_offset;
-       __le16 flags;
-       __u16 first_2_bytes;
-       __u8 unused[2];         /* currently unused */
-};
-
-
-/* IB - LRH header consts */
-#define IPATH_LRH_GRH 0x0003   /* 1. word of IB LRH - next header: GRH */
-#define IPATH_LRH_BTH 0x0002   /* 1. word of IB LRH - next header: BTH */
-
-/* misc. */
-#define SIZE_OF_CRC 1
-
-#define IPATH_DEFAULT_P_KEY 0xFFFF
-#define IPATH_PERMISSIVE_LID 0xFFFF
-#define IPATH_AETH_CREDIT_SHIFT 24
-#define IPATH_AETH_CREDIT_MASK 0x1F
-#define IPATH_AETH_CREDIT_INVAL 0x1F
-#define IPATH_PSN_MASK 0xFFFFFF
-#define IPATH_MSN_MASK 0xFFFFFF
-#define IPATH_QPN_MASK 0xFFFFFF
-#define IPATH_MULTICAST_LID_BASE 0xC000
-#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
-#define IPATH_MULTICAST_QPN 0xFFFFFF
-
-/* Receive Header Queue: receive type (from infinipath) */
-#define RCVHQ_RCV_TYPE_EXPECTED  0
-#define RCVHQ_RCV_TYPE_EAGER     1
-#define RCVHQ_RCV_TYPE_NON_KD    2
-#define RCVHQ_RCV_TYPE_ERROR     3
-
-
-/* sub OpCodes - ith4x  */
-#define IPATH_ITH4X_OPCODE_ENCAP 0x81
-#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
-
-#define IPATH_HEADER_QUEUE_WORDS 9
-
-/* functions for extracting fields from rcvhdrq entries for the driver.
- */
-static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
-{
-       return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
-}
-
-static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
-{
-       return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
-           & INFINIPATH_RHF_RCVTYPE_MASK;
-}
-
-static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
-{
-       return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
-               & INFINIPATH_RHF_LENGTH_MASK) << 2;
-}
-
-static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
-{
-       return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
-           & INFINIPATH_RHF_EGRINDEX_MASK;
-}
-
-static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
-{
-       return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
-               & INFINIPATH_RHF_SEQ_MASK;
-}
-
-static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
-{
-       return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
-               & INFINIPATH_RHF_HDRQ_OFFSET_MASK;
-}
-
-static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
-{
-       return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
-}
-
-static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
-{
-       return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
-           & INFINIPATH_I_VERS_MASK;
-}
-
-#endif                         /* _IPATH_COMMON_H */
diff --git a/drivers/staging/rdma/ipath/ipath_cq.c b/drivers/staging/rdma/ipath/ipath_cq.c
deleted file mode 100644 (file)
index e9dd911..0000000
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
-{
-       struct ipath_cq_wc *wc;
-       unsigned long flags;
-       u32 head;
-       u32 next;
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       /*
-        * Note that the head pointer might be writable by user processes.
-        * Take care to verify it is a sane value.
-        */
-       wc = cq->queue;
-       head = wc->head;
-       if (head >= (unsigned) cq->ibcq.cqe) {
-               head = cq->ibcq.cqe;
-               next = 0;
-       } else
-               next = head + 1;
-       if (unlikely(next == wc->tail)) {
-               spin_unlock_irqrestore(&cq->lock, flags);
-               if (cq->ibcq.event_handler) {
-                       struct ib_event ev;
-
-                       ev.device = cq->ibcq.device;
-                       ev.element.cq = &cq->ibcq;
-                       ev.event = IB_EVENT_CQ_ERR;
-                       cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-               }
-               return;
-       }
-       if (cq->ip) {
-               wc->uqueue[head].wr_id = entry->wr_id;
-               wc->uqueue[head].status = entry->status;
-               wc->uqueue[head].opcode = entry->opcode;
-               wc->uqueue[head].vendor_err = entry->vendor_err;
-               wc->uqueue[head].byte_len = entry->byte_len;
-               wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
-               wc->uqueue[head].qp_num = entry->qp->qp_num;
-               wc->uqueue[head].src_qp = entry->src_qp;
-               wc->uqueue[head].wc_flags = entry->wc_flags;
-               wc->uqueue[head].pkey_index = entry->pkey_index;
-               wc->uqueue[head].slid = entry->slid;
-               wc->uqueue[head].sl = entry->sl;
-               wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-               wc->uqueue[head].port_num = entry->port_num;
-               /* Make sure entry is written before the head index. */
-               smp_wmb();
-       } else
-               wc->kqueue[head] = *entry;
-       wc->head = next;
-
-       if (cq->notify == IB_CQ_NEXT_COMP ||
-           (cq->notify == IB_CQ_SOLICITED && solicited)) {
-               cq->notify = IB_CQ_NONE;
-               cq->triggered++;
-               /*
-                * This will cause send_complete() to be called in
-                * another thread.
-                */
-               tasklet_hi_schedule(&cq->comptask);
-       }
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       if (entry->status != IB_WC_SUCCESS)
-               to_idev(cq->ibcq.device)->n_wqe_errs++;
-}
-
-/**
- * ipath_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       struct ipath_cq_wc *wc;
-       unsigned long flags;
-       int npolled;
-       u32 tail;
-
-       /* The kernel can only poll a kernel completion queue */
-       if (cq->ip) {
-               npolled = -EINVAL;
-               goto bail;
-       }
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       wc = cq->queue;
-       tail = wc->tail;
-       if (tail > (u32) cq->ibcq.cqe)
-               tail = (u32) cq->ibcq.cqe;
-       for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-               if (tail == wc->head)
-                       break;
-               /* The kernel doesn't need a RMB since it has the lock. */
-               *entry = wc->kqueue[tail];
-               if (tail >= cq->ibcq.cqe)
-                       tail = 0;
-               else
-                       tail++;
-       }
-       wc->tail = tail;
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-       return npolled;
-}
-
-static void send_complete(unsigned long data)
-{
-       struct ipath_cq *cq = (struct ipath_cq *)data;
-
-       /*
-        * The completion handler will most likely rearm the notification
-        * and poll for all pending entries.  If a new completion entry
-        * is added while we are in this routine, tasklet_hi_schedule()
-        * won't call us again until we return so we check triggered to
-        * see if we need to call the handler again.
-        */
-       for (;;) {
-               u8 triggered = cq->triggered;
-
-               cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-
-               if (cq->triggered == triggered)
-                       return;
-       }
-}
-
-/**
- * ipath_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the InfiniPath driver
- * @udata: unused by the InfiniPath driver
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-                             const struct ib_cq_init_attr *attr,
-                             struct ib_ucontext *context,
-                             struct ib_udata *udata)
-{
-       int entries = attr->cqe;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cq *cq;
-       struct ipath_cq_wc *wc;
-       struct ib_cq *ret;
-       u32 sz;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       if (entries < 1 || entries > ib_ipath_max_cqes) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       /* Allocate the completion queue structure. */
-       cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-       if (!cq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto done;
-       }
-
-       /*
-        * Allocate the completion queue entries and head/tail pointers.
-        * This is allocated separately so that it can be resized and
-        * also mapped into user space.
-        * We need to use vmalloc() in order to support mmap and large
-        * numbers of entries.
-        */
-       sz = sizeof(*wc);
-       if (udata && udata->outlen >= sizeof(__u64))
-               sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-       else
-               sz += sizeof(struct ib_wc) * (entries + 1);
-       wc = vmalloc_user(sz);
-       if (!wc) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_cq;
-       }
-
-       /*
-        * Return the address of the WC as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               int err;
-
-               cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
-               if (!cq->ip) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_wc;
-               }
-
-               err = ib_copy_to_udata(udata, &cq->ip->offset,
-                                      sizeof(cq->ip->offset));
-               if (err) {
-                       ret = ERR_PTR(err);
-                       goto bail_ip;
-               }
-       } else
-               cq->ip = NULL;
-
-       spin_lock(&dev->n_cqs_lock);
-       if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
-               spin_unlock(&dev->n_cqs_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_cqs_allocated++;
-       spin_unlock(&dev->n_cqs_lock);
-
-       if (cq->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       /*
-        * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-        * The number of entries should be >= the number requested or return
-        * an error.
-        */
-       cq->ibcq.cqe = entries;
-       cq->notify = IB_CQ_NONE;
-       cq->triggered = 0;
-       spin_lock_init(&cq->lock);
-       tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-       wc->head = 0;
-       wc->tail = 0;
-       cq->queue = wc;
-
-       ret = &cq->ibcq;
-
-       goto done;
-
-bail_ip:
-       kfree(cq->ip);
-bail_wc:
-       vfree(wc);
-bail_cq:
-       kfree(cq);
-done:
-       return ret;
-}
-
-/**
- * ipath_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int ipath_destroy_cq(struct ib_cq *ibcq)
-{
-       struct ipath_ibdev *dev = to_idev(ibcq->device);
-       struct ipath_cq *cq = to_icq(ibcq);
-
-       tasklet_kill(&cq->comptask);
-       spin_lock(&dev->n_cqs_lock);
-       dev->n_cqs_allocated--;
-       spin_unlock(&dev->n_cqs_lock);
-       if (cq->ip)
-               kref_put(&cq->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(cq->queue);
-       kfree(cq);
-
-       return 0;
-}
-
-/**
- * ipath_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       unsigned long flags;
-       int ret = 0;
-
-       spin_lock_irqsave(&cq->lock, flags);
-       /*
-        * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-        * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-        */
-       if (cq->notify != IB_CQ_NEXT_COMP)
-               cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-       if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-           cq->queue->head != cq->queue->tail)
-               ret = 1;
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       return ret;
-}
-
-/**
- * ipath_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       struct ipath_cq_wc *old_wc;
-       struct ipath_cq_wc *wc;
-       u32 head, tail, n;
-       int ret;
-       u32 sz;
-
-       if (cqe < 1 || cqe > ib_ipath_max_cqes) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * Need to use vmalloc() if we want to support large #s of entries.
-        */
-       sz = sizeof(*wc);
-       if (udata && udata->outlen >= sizeof(__u64))
-               sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-       else
-               sz += sizeof(struct ib_wc) * (cqe + 1);
-       wc = vmalloc_user(sz);
-       if (!wc) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       /* Check that we can write the offset to mmap. */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               __u64 offset = 0;
-
-               ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-               if (ret)
-                       goto bail_free;
-       }
-
-       spin_lock_irq(&cq->lock);
-       /*
-        * Make sure head and tail are sane since they
-        * might be user writable.
-        */
-       old_wc = cq->queue;
-       head = old_wc->head;
-       if (head > (u32) cq->ibcq.cqe)
-               head = (u32) cq->ibcq.cqe;
-       tail = old_wc->tail;
-       if (tail > (u32) cq->ibcq.cqe)
-               tail = (u32) cq->ibcq.cqe;
-       if (head < tail)
-               n = cq->ibcq.cqe + 1 + head - tail;
-       else
-               n = head - tail;
-       if (unlikely((u32)cqe < n)) {
-               ret = -EINVAL;
-               goto bail_unlock;
-       }
-       for (n = 0; tail != head; n++) {
-               if (cq->ip)
-                       wc->uqueue[n] = old_wc->uqueue[tail];
-               else
-                       wc->kqueue[n] = old_wc->kqueue[tail];
-               if (tail == (u32) cq->ibcq.cqe)
-                       tail = 0;
-               else
-                       tail++;
-       }
-       cq->ibcq.cqe = cqe;
-       wc->head = n;
-       wc->tail = 0;
-       cq->queue = wc;
-       spin_unlock_irq(&cq->lock);
-
-       vfree(old_wc);
-
-       if (cq->ip) {
-               struct ipath_ibdev *dev = to_idev(ibcq->device);
-               struct ipath_mmap_info *ip = cq->ip;
-
-               ipath_update_mmap_info(dev, ip, sz, wc);
-
-               /*
-                * Return the offset to mmap.
-                * See ipath_mmap() for details.
-                */
-               if (udata && udata->outlen >= sizeof(__u64)) {
-                       ret = ib_copy_to_udata(udata, &ip->offset,
-                                              sizeof(ip->offset));
-                       if (ret)
-                               goto bail;
-               }
-
-               spin_lock_irq(&dev->pending_lock);
-               if (list_empty(&ip->pending_mmaps))
-                       list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = 0;
-       goto bail;
-
-bail_unlock:
-       spin_unlock_irq(&cq->lock);
-bail_free:
-       vfree(wc);
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_debug.h b/drivers/staging/rdma/ipath/ipath_debug.h
deleted file mode 100644 (file)
index 65926cd..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_DEBUG_H
-#define _IPATH_DEBUG_H
-
-#ifndef _IPATH_DEBUGGING       /* debugging enabled or not */
-#define _IPATH_DEBUGGING 1
-#endif
-
-#if _IPATH_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or disable
- * dynamically.  This can be set at modprobe time also:
- *      modprobe infinipath.ko infinipath_debug=7
- */
-
-#define __IPATH_INFO        0x1        /* generic low verbosity stuff */
-#define __IPATH_DBG         0x2        /* generic debug */
-#define __IPATH_TRSAMPLE    0x8        /* generate trace buffer sample entries */
-/* leave some low verbosity spots open */
-#define __IPATH_VERBDBG     0x40       /* very verbose debug */
-#define __IPATH_PKTDBG      0x80       /* print packet data */
-/* print process startup (init)/exit messages */
-#define __IPATH_PROCDBG     0x100
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG       0x200
-#define __IPATH_ERRPKTDBG   0x400
-#define __IPATH_USER_SEND   0x1000     /* use user mode send */
-#define __IPATH_KERNEL_SEND 0x2000     /* use kernel mode send */
-#define __IPATH_EPKTDBG     0x4000     /* print ethernet packet data */
-#define __IPATH_IPATHDBG    0x10000    /* Ethernet (IPATH) gen debug */
-#define __IPATH_IPATHWARN   0x20000    /* Ethernet (IPATH) warnings */
-#define __IPATH_IPATHERR    0x40000    /* Ethernet (IPATH) errors */
-#define __IPATH_IPATHPD     0x80000    /* Ethernet (IPATH) packet dump */
-#define __IPATH_IPATHTABLE  0x100000   /* Ethernet (IPATH) table dump */
-#define __IPATH_LINKVERBDBG 0x200000   /* very verbose linkchange debug */
-
-#else                          /* _IPATH_DEBUGGING */
-
-/*
- * define all of these even with debugging off, for the few places that do
- * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
- * compiler eliminate the code
- */
-
-#define __IPATH_INFO      0x0  /* generic low verbosity stuff */
-#define __IPATH_DBG       0x0  /* generic debug */
-#define __IPATH_TRSAMPLE  0x0  /* generate trace buffer sample entries */
-#define __IPATH_VERBDBG   0x0  /* very verbose debug */
-#define __IPATH_PKTDBG    0x0  /* print packet data */
-#define __IPATH_PROCDBG   0x0  /* process startup (init)/exit messages */
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG     0x0
-#define __IPATH_EPKTDBG   0x0  /* print ethernet packet data */
-#define __IPATH_IPATHDBG  0x0  /* Ethernet (IPATH) table dump on */
-#define __IPATH_IPATHWARN 0x0  /* Ethernet (IPATH) warnings on   */
-#define __IPATH_IPATHERR  0x0  /* Ethernet (IPATH) errors on   */
-#define __IPATH_IPATHPD   0x0  /* Ethernet (IPATH) packet dump on   */
-#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on   */
-#define __IPATH_LINKVERBDBG 0x0        /* very verbose linkchange debug */
-
-#endif                         /* _IPATH_DEBUGGING */
-
-#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
-
-#endif                         /* _IPATH_DEBUG_H */
diff --git a/drivers/staging/rdma/ipath/ipath_diag.c b/drivers/staging/rdma/ipath/ipath_diag.c
deleted file mode 100644 (file)
index 45802e9..0000000
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains support for diagnostic functions.  It is accessed by
- * opening the ipath_diag device, normally minor number 129.  Diagnostic use
- * of the InfiniPath chip may render the chip or board unusable until the
- * driver is unloaded, or in some cases, until the system is rebooted.
- *
- * Accesses to the chip through this interface are not similar to going
- * through the /sys/bus/pci resource mmap interface.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/export.h>
-#include <asm/uaccess.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-int ipath_diag_inuse;
-static int diag_set_link;
-
-static int ipath_diag_open(struct inode *in, struct file *fp);
-static int ipath_diag_release(struct inode *in, struct file *fp);
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-                              size_t count, loff_t *off);
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-                               size_t count, loff_t *off);
-
-static const struct file_operations diag_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_diag_write,
-       .read = ipath_diag_read,
-       .open = ipath_diag_open,
-       .release = ipath_diag_release,
-       .llseek = default_llseek,
-};
-
-static ssize_t ipath_diagpkt_write(struct file *fp,
-                                  const char __user *data,
-                                  size_t count, loff_t *off);
-
-static const struct file_operations diagpkt_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_diagpkt_write,
-       .llseek = noop_llseek,
-};
-
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-static struct cdev *diagpkt_cdev;
-static struct device *diagpkt_dev;
-
-int ipath_diag_add(struct ipath_devdata *dd)
-{
-       char name[16];
-       int ret = 0;
-
-       if (atomic_inc_return(&diagpkt_count) == 1) {
-               ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
-                                     "ipath_diagpkt", &diagpkt_file_ops,
-                                     &diagpkt_cdev, &diagpkt_dev);
-
-               if (ret) {
-                       ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
-                                     "device: %d", ret);
-                       goto done;
-               }
-       }
-
-       snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
-
-       ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
-                             &diag_file_ops, &dd->diag_cdev,
-                             &dd->diag_dev);
-       if (ret)
-               ipath_dev_err(dd, "Couldn't create %s device: %d",
-                             name, ret);
-
-done:
-       return ret;
-}
-
-void ipath_diag_remove(struct ipath_devdata *dd)
-{
-       if (atomic_dec_and_test(&diagpkt_count))
-               ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
-
-       ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
-}
-
-/**
- * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy (multiple of 32 bits)
- *
- * This function also localizes all chip memory accesses.
- * The copy should be written such that we read full cacheline packets
- * from the chip.  This is usually used for a single qword
- *
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
-                            const void __iomem *caddr, size_t count)
-{
-       const u64 __iomem *reg_addr = caddr;
-       const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-       int ret;
-
-       /* not very efficient, but it works for now */
-       if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u64 data = readq(reg_addr);
-               if (copy_to_user(uaddr, &data, sizeof(u64))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               reg_addr++;
-               uaddr += sizeof(u64);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: the number of bytes to copy (multiple of 32 bits)
- *
- * This is usually used for a single qword
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-
-static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
-                             const void __user *uaddr, size_t count)
-{
-       u64 __iomem *reg_addr = caddr;
-       const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-       int ret;
-
-       /* not very efficient, but it works for now */
-       if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u64 data;
-               if (copy_from_user(&data, uaddr, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               writeq(data, reg_addr);
-
-               reg_addr++;
-               uaddr += sizeof(u64);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy
- *
- * read 32 bit values, not 64 bit; for memories that only
- * support 32 bit reads; usually a single dword.
- */
-static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
-                            const void __iomem *caddr, size_t count)
-{
-       const u32 __iomem *reg_addr = caddr;
-       const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-       int ret;
-
-       if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-           reg_end > (u32 __iomem *) dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       /* not very efficient, but it works for now */
-       while (reg_addr < reg_end) {
-               u32 data = readl(reg_addr);
-               if (copy_to_user(uaddr, &data, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-
-               reg_addr++;
-               uaddr += sizeof(u32);
-
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: number of bytes to copy
- *
- * write 32 bit values, not 64 bit; for memories that only
- * support 32 bit write; usually a single dword.
- */
-
-static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
-                             const void __user *uaddr, size_t count)
-{
-       u32 __iomem *reg_addr = caddr;
-       const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-       int ret;
-
-       if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-           reg_end > (u32 __iomem *) dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u32 data;
-               if (copy_from_user(&data, uaddr, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               writel(data, reg_addr);
-
-               reg_addr++;
-               uaddr += sizeof(u32);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-static int ipath_diag_open(struct inode *in, struct file *fp)
-{
-       int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
-       struct ipath_devdata *dd;
-       int ret;
-
-       mutex_lock(&ipath_mutex);
-
-       if (ipath_diag_inuse) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       dd = ipath_lookup(unit);
-
-       if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
-           !dd->ipath_kregbase) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       fp->private_data = dd;
-       ipath_diag_inuse = -2;
-       diag_set_link = 0;
-       ret = 0;
-
-       /* Only expose a way to reset the device if we
-          make it into diag mode. */
-       ipath_expose_reset(&dd->pcidev->dev);
-
-bail:
-       mutex_unlock(&ipath_mutex);
-
-       return ret;
-}
-
-/**
- * ipath_diagpkt_write - write an IB packet
- * @fp: the diag data device file pointer
- * @data: ipath_diag_pkt structure saying where to get the packet
- * @count: size of data to write
- * @off: unused by this code
- */
-static ssize_t ipath_diagpkt_write(struct file *fp,
-                                  const char __user *data,
-                                  size_t count, loff_t *off)
-{
-       u32 __iomem *piobuf;
-       u32 plen, pbufn, maxlen_reserve;
-       struct ipath_diag_pkt odp;
-       struct ipath_diag_xpkt dp;
-       u32 *tmpbuf = NULL;
-       struct ipath_devdata *dd;
-       ssize_t ret = 0;
-       u64 val;
-       u32 l_state, lt_state; /* LinkState, LinkTrainingState */
-
-
-       if (count == sizeof(dp)) {
-               if (copy_from_user(&dp, data, sizeof(dp))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-       } else if (count == sizeof(odp)) {
-               if (copy_from_user(&odp, data, sizeof(odp))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               dp.len = odp.len;
-               dp.unit = odp.unit;
-               dp.data = odp.data;
-               dp.pbc_wd = 0;
-       } else {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /* send count must be an exact number of dwords */
-       if (dp.len & 3) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       plen = dp.len >> 2;
-
-       dd = ipath_lookup(dp.unit);
-       if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
-           !dd->ipath_kregbase) {
-               ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
-                          dp.unit);
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (ipath_diag_inuse && !diag_set_link &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               diag_set_link = 1;
-               ipath_cdbg(VERBOSE, "Trying to set to set link active for "
-                          "diag pkt\n");
-               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-       }
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
-               ret = -ENODEV;
-               goto bail;
-       }
-       /*
-        * Want to skip check for l_state if using custom PBC,
-        * because we might be trying to force an SM packet out.
-        * first-cut, skip _all_ state checking in that case.
-        */
-       val = ipath_ib_state(dd, dd->ipath_lastibcstat);
-       lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-       l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-       if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
-           (val != dd->ib_init && val != dd->ib_arm &&
-           val != dd->ib_active))) {
-               ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
-                          dd->ipath_unit, (unsigned long long) val);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * need total length before first word written, plus 2 Dwords. One Dword
-        * is for padding so we get the full user data when not aligned on
-        * a word boundary. The other Dword is to make sure we have room for the
-        * ICRC which gets tacked on later.
-        */
-       maxlen_reserve = 2 * sizeof(u32);
-       if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
-               ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
-                         dp.len, dd->ipath_ibmaxlen);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       plen = sizeof(u32) + dp.len;
-
-       tmpbuf = vmalloc(plen);
-       if (!tmpbuf) {
-               dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
-                        "failing\n");
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       if (copy_from_user(tmpbuf,
-                          (const void __user *) (unsigned long) dp.data,
-                          dp.len)) {
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       plen >>= 2;             /* in dwords */
-
-       piobuf = ipath_getpiobuf(dd, plen, &pbufn);
-       if (!piobuf) {
-               ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
-                          dd->ipath_unit);
-               ret = -EBUSY;
-               goto bail;
-       }
-       /* disarm it just to be extra sure */
-       ipath_disarm_piobufs(dd, pbufn, 1);
-
-       if (ipath_debug & __IPATH_PKTDBG)
-               ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
-                          dd->ipath_unit, plen - 1, pbufn);
-
-       if (dp.pbc_wd == 0)
-               dp.pbc_wd = plen;
-       writeq(dp.pbc_wd, piobuf);
-       /*
-        * Copy all by the trigger word, then flush, so it's written
-        * to chip before trigger word, then write trigger word, then
-        * flush again, so packet is sent.
-        */
-       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-               ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
-               ipath_flush_wc();
-               __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
-       } else
-               __iowrite32_copy(piobuf + 2, tmpbuf, plen);
-
-       ipath_flush_wc();
-
-       ret = sizeof(dp);
-
-bail:
-       vfree(tmpbuf);
-       return ret;
-}
-
-static int ipath_diag_release(struct inode *in, struct file *fp)
-{
-       mutex_lock(&ipath_mutex);
-       ipath_diag_inuse = 0;
-       fp->private_data = NULL;
-       mutex_unlock(&ipath_mutex);
-       return 0;
-}
-
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-                              size_t count, loff_t *off)
-{
-       struct ipath_devdata *dd = fp->private_data;
-       void __iomem *kreg_base;
-       ssize_t ret;
-
-       kreg_base = dd->ipath_kregbase;
-
-       if (count == 0)
-               ret = 0;
-       else if ((count % 4) || (*off % 4))
-               /* address or length is not 32-bit aligned, hence invalid */
-               ret = -EINVAL;
-       else if (ipath_diag_inuse < 1 && (*off || count != 8))
-               ret = -EINVAL;  /* prevent cat /dev/ipath_diag* */
-       else if ((count % 8) || (*off % 8))
-               /* address or length not 64-bit aligned; do 32-bit reads */
-               ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
-       else
-               ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
-
-       if (ret >= 0) {
-               *off += count;
-               ret = count;
-               if (ipath_diag_inuse == -2)
-                       ipath_diag_inuse++;
-       }
-
-       return ret;
-}
-
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-                               size_t count, loff_t *off)
-{
-       struct ipath_devdata *dd = fp->private_data;
-       void __iomem *kreg_base;
-       ssize_t ret;
-
-       kreg_base = dd->ipath_kregbase;
-
-       if (count == 0)
-               ret = 0;
-       else if ((count % 4) || (*off % 4))
-               /* address or length is not 32-bit aligned, hence invalid */
-               ret = -EINVAL;
-       else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
-                ipath_diag_inuse == -2)  /* read qw off 0, write qw off 0 */
-               ret = -EINVAL;  /* before any other write allowed */
-       else if ((count % 8) || (*off % 8))
-               /* address or length not 64-bit aligned; do 32-bit writes */
-               ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
-       else
-               ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
-
-       if (ret >= 0) {
-               *off += count;
-               ret = count;
-               if (ipath_diag_inuse == -1)
-                       ipath_diag_inuse = 1; /* all read/write OK now */
-       }
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_dma.c b/drivers/staging/rdma/ipath/ipath_dma.c
deleted file mode 100644 (file)
index 123a8c0..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/scatterlist.h>
-#include <linux/gfp.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 ipath_dma_map_single(struct ib_device *dev,
-                               void *cpu_addr, size_t size,
-                               enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-       return (u64) cpu_addr;
-}
-
-static void ipath_dma_unmap_single(struct ib_device *dev,
-                                  u64 addr, size_t size,
-                                  enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 ipath_dma_map_page(struct ib_device *dev,
-                             struct page *page,
-                             unsigned long offset,
-                             size_t size,
-                             enum dma_data_direction direction)
-{
-       u64 addr;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       if (offset + size > PAGE_SIZE) {
-               addr = BAD_DMA_ADDRESS;
-               goto done;
-       }
-
-       addr = (u64) page_address(page);
-       if (addr)
-               addr += offset;
-       /* TODO: handle highmem pages */
-
-done:
-       return addr;
-}
-
-static void ipath_dma_unmap_page(struct ib_device *dev,
-                                u64 addr, size_t size,
-                                enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                       int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       u64 addr;
-       int i;
-       int ret = nents;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       for_each_sg(sgl, sg, nents, i) {
-               addr = (u64) page_address(sg_page(sg));
-               /* TODO: handle highmem pages */
-               if (!addr) {
-                       ret = 0;
-                       break;
-               }
-               sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-               sg->dma_length = sg->length;
-#endif
-       }
-       return ret;
-}
-
-static void ipath_unmap_sg(struct ib_device *dev,
-                          struct scatterlist *sg, int nents,
-                          enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static void ipath_sync_single_for_cpu(struct ib_device *dev,
-                                     u64 addr,
-                                     size_t size,
-                                     enum dma_data_direction dir)
-{
-}
-
-static void ipath_sync_single_for_device(struct ib_device *dev,
-                                        u64 addr,
-                                        size_t size,
-                                        enum dma_data_direction dir)
-{
-}
-
-static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                     u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p)
-               addr = page_address(p);
-       if (dma_handle)
-               *dma_handle = (u64) addr;
-       return addr;
-}
-
-static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
-                                   void *cpu_addr, u64 dma_handle)
-{
-       free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
-       .mapping_error = ipath_mapping_error,
-       .map_single = ipath_dma_map_single,
-       .unmap_single = ipath_dma_unmap_single,
-       .map_page = ipath_dma_map_page,
-       .unmap_page = ipath_dma_unmap_page,
-       .map_sg = ipath_map_sg,
-       .unmap_sg = ipath_unmap_sg,
-       .sync_single_for_cpu = ipath_sync_single_for_cpu,
-       .sync_single_for_device = ipath_sync_single_for_device,
-       .alloc_coherent = ipath_dma_alloc_coherent,
-       .free_coherent = ipath_dma_free_coherent
-};
diff --git a/drivers/staging/rdma/ipath/ipath_driver.c b/drivers/staging/rdma/ipath/ipath_driver.c
deleted file mode 100644 (file)
index 2ab22f9..0000000
+++ /dev/null
@@ -1,2784 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/bitmap.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#ifdef CONFIG_X86_64
-#include <asm/pat.h>
-#endif
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-
-static void ipath_update_pio_bufs(struct ipath_devdata *);
-
-const char *ipath_get_unit_name(int unit)
-{
-       static char iname[16];
-       snprintf(iname, sizeof iname, "infinipath%u", unit);
-       return iname;
-}
-
-#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
-#define PFX IPATH_DRV_NAME ": "
-
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the init code.
- */
-const char ib_ipath_version[] = IPATH_IDSTR "\n";
-
-static struct idr unit_table;
-DEFINE_SPINLOCK(ipath_devs_lock);
-LIST_HEAD(ipath_dev_list);
-
-wait_queue_head_t ipath_state_wait;
-
-unsigned ipath_debug = __IPATH_INFO;
-
-module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "mask for debug prints");
-EXPORT_SYMBOL_GPL(ipath_debug);
-
-unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
-module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
-MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
-
-static unsigned ipath_hol_timeout_ms = 13000;
-module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
-MODULE_PARM_DESC(hol_timeout_ms,
-       "duration of user app suspension after link failure");
-
-unsigned ipath_linkrecovery = 1;
-module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
-
-/*
- * Table to translate the LINKTRAININGSTATE portion of
- * IBCStatus to a human-readable form.
- */
-const char *ipath_ibcstatus_str[] = {
-       "Disabled",
-       "LinkUp",
-       "PollActive",
-       "PollQuiet",
-       "SleepDelay",
-       "SleepQuiet",
-       "LState6",              /* unused */
-       "LState7",              /* unused */
-       "CfgDebounce",
-       "CfgRcvfCfg",
-       "CfgWaitRmt",
-       "CfgIdle",
-       "RecovRetrain",
-       "CfgTxRevLane",         /* unused before IBA7220 */
-       "RecovWaitRmt",
-       "RecovIdle",
-       /* below were added for IBA7220 */
-       "CfgEnhanced",
-       "CfgTest",
-       "CfgWaitRmtTest",
-       "CfgWaitCfgEnhanced",
-       "SendTS_T",
-       "SendTstIdles",
-       "RcvTS_T",
-       "SendTst_TS1s",
-       "LTState18", "LTState19", "LTState1A", "LTState1B",
-       "LTState1C", "LTState1D", "LTState1E", "LTState1F"
-};
-
-static void ipath_remove_one(struct pci_dev *);
-static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
-
-/* Only needed for registration, nothing else needs this info */
-#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
-#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
-
-/* Number of seconds before our card status check...  */
-#define STATUS_TIMEOUT 60
-
-static const struct pci_device_id ipath_pci_tbl[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
-       { 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
-
-static struct pci_driver ipath_driver = {
-       .name = IPATH_DRV_NAME,
-       .probe = ipath_init_one,
-       .remove = ipath_remove_one,
-       .id_table = ipath_pci_tbl,
-       .driver = {
-               .groups = ipath_driver_attr_groups,
-       },
-};
-
-static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
-                            u32 *bar0, u32 *bar1)
-{
-       int ret;
-
-       ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
-       if (ret)
-               ipath_dev_err(dd, "failed to read bar0 before enable: "
-                             "error %d\n", -ret);
-
-       ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
-       if (ret)
-               ipath_dev_err(dd, "failed to read bar1 before enable: "
-                             "error %d\n", -ret);
-
-       ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
-}
-
-static void ipath_free_devdata(struct pci_dev *pdev,
-                              struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       pci_set_drvdata(pdev, NULL);
-
-       if (dd->ipath_unit != -1) {
-               spin_lock_irqsave(&ipath_devs_lock, flags);
-               idr_remove(&unit_table, dd->ipath_unit);
-               list_del(&dd->ipath_list);
-               spin_unlock_irqrestore(&ipath_devs_lock, flags);
-       }
-       vfree(dd);
-}
-
-static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
-{
-       unsigned long flags;
-       struct ipath_devdata *dd;
-       int ret;
-
-       dd = vzalloc(sizeof(*dd));
-       if (!dd) {
-               dd = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-       dd->ipath_unit = -1;
-
-       idr_preload(GFP_KERNEL);
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate unit ID: error %d\n", -ret);
-               ipath_free_devdata(pdev, dd);
-               dd = ERR_PTR(ret);
-               goto bail_unlock;
-       }
-       dd->ipath_unit = ret;
-
-       dd->pcidev = pdev;
-       pci_set_drvdata(pdev, dd);
-
-       list_add(&dd->ipath_list, &ipath_dev_list);
-
-bail_unlock:
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-       idr_preload_end();
-bail:
-       return dd;
-}
-
-static inline struct ipath_devdata *__ipath_lookup(int unit)
-{
-       return idr_find(&unit_table, unit);
-}
-
-struct ipath_devdata *ipath_lookup(int unit)
-{
-       struct ipath_devdata *dd;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-       dd = __ipath_lookup(unit);
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       return dd;
-}
-
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
-{
-       int nunits, npresent, nup;
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       int maxports;
-
-       nunits = npresent = nup = maxports = 0;
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-               nunits++;
-               if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
-                       npresent++;
-               if (dd->ipath_lid &&
-                   !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
-                                        | IPATH_LINKUNK)))
-                       nup++;
-               if (dd->ipath_cfgports > maxports)
-                       maxports = dd->ipath_cfgports;
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       if (npresentp)
-               *npresentp = npresent;
-       if (nupp)
-               *nupp = nup;
-       if (maxportsp)
-               *maxportsp = maxports;
-
-       return nunits;
-}
-
-/*
- * These next two routines are placeholders in case we don't have per-arch
- * code for controlling write combining.  If explicit control of write
- * combining is not available, performance will probably be awful.
- */
-
-int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
-{
-       return -EOPNOTSUPP;
-}
-
-void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
-{
-}
-
-/*
- * Perform a PIO buffer bandwidth write test, to verify proper system
- * configuration.  Even when all the setup calls work, occasionally
- * BIOS or other issues can prevent write combining from working, or
- * can cause other bandwidth problems to the chip.
- *
- * This test simply writes the same buffer over and over again, and
- * measures close to the peak bandwidth to the chip (not testing
- * data bandwidth to the wire).   On chips that use an address-based
- * trigger to send packets to the wire, this is easy.  On chips that
- * use a count to trigger, we want to make sure that the packet doesn't
- * go out on the wire, or trigger flow control checks.
- */
-static void ipath_verify_pioperf(struct ipath_devdata *dd)
-{
-       u32 pbnum, cnt, lcnt;
-       u32 __iomem *piobuf;
-       u32 *addr;
-       u64 msecs, emsecs;
-
-       piobuf = ipath_getpiobuf(dd, 0, &pbnum);
-       if (!piobuf) {
-               dev_info(&dd->pcidev->dev,
-                       "No PIObufs for checking perf, skipping\n");
-               return;
-       }
-
-       /*
-        * Enough to give us a reasonable test, less than piobuf size, and
-        * likely multiple of store buffer length.
-        */
-       cnt = 1024;
-
-       addr = vmalloc(cnt);
-       if (!addr) {
-               dev_info(&dd->pcidev->dev,
-                       "Couldn't get memory for checking PIO perf,"
-                       " skipping\n");
-               goto done;
-       }
-
-       preempt_disable();  /* we want reasonably accurate elapsed time */
-       msecs = 1 + jiffies_to_msecs(jiffies);
-       for (lcnt = 0; lcnt < 10000U; lcnt++) {
-               /* wait until we cross msec boundary */
-               if (jiffies_to_msecs(jiffies) >= msecs)
-                       break;
-               udelay(1);
-       }
-
-       ipath_disable_armlaunch(dd);
-
-       /*
-        * length 0, no dwords actually sent, and mark as VL15
-        * on chips where that may matter (due to IB flowcontrol)
-        */
-       if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
-               writeq(1UL << 63, piobuf);
-       else
-               writeq(0, piobuf);
-       ipath_flush_wc();
-
-       /*
-        * this is only roughly accurate, since even with preempt we
-        * still take interrupts that could take a while.   Running for
-        * >= 5 msec seems to get us "close enough" to accurate values
-        */
-       msecs = jiffies_to_msecs(jiffies);
-       for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
-               __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
-               emsecs = jiffies_to_msecs(jiffies) - msecs;
-       }
-
-       /* 1 GiB/sec, slightly over IB SDR line rate */
-       if (lcnt < (emsecs * 1024U))
-               ipath_dev_err(dd,
-                       "Performance problem: bandwidth to PIO buffers is "
-                       "only %u MiB/sec\n",
-                       lcnt / (u32) emsecs);
-       else
-               ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
-                       lcnt / (u32) emsecs);
-
-       preempt_enable();
-
-       vfree(addr);
-
-done:
-       /* disarm piobuf, so it's available again */
-       ipath_disarm_piobufs(dd, pbnum, 1);
-       ipath_enable_armlaunch(dd);
-}
-
-static void cleanup_device(struct ipath_devdata *dd);
-
-static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-       int ret, len, j;
-       struct ipath_devdata *dd;
-       unsigned long long addr;
-       u32 bar0 = 0, bar1 = 0;
-
-#ifdef CONFIG_X86_64
-       if (pat_enabled()) {
-               pr_warn("ipath needs PAT disabled, boot with nopat kernel parameter\n");
-               ret = -ENODEV;
-               goto bail;
-       }
-#endif
-
-       dd = ipath_alloc_devdata(pdev);
-       if (IS_ERR(dd)) {
-               ret = PTR_ERR(dd);
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate devdata: error %d\n", -ret);
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
-
-       ret = pci_enable_device(pdev);
-       if (ret) {
-               /* This can happen iff:
-                *
-                * We did a chip reset, and then failed to reprogram the
-                * BAR, or the chip reset due to an internal error.  We then
-                * unloaded the driver and reloaded it.
-                *
-                * Both reset cases set the BAR back to initial state.  For
-                * the latter case, the AER sticky error bit at offset 0x718
-                * should be set, but the Linux kernel doesn't yet know
-                * about that, it appears.  If the original BAR was retained
-                * in the kernel data structures, this may be OK.
-                */
-               ipath_dev_err(dd, "enable unit %d failed: error %d\n",
-                             dd->ipath_unit, -ret);
-               goto bail_devdata;
-       }
-       addr = pci_resource_start(pdev, 0);
-       len = pci_resource_len(pdev, 0);
-       ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
-                  "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
-                  ent->device, ent->driver_data);
-
-       read_bars(dd, pdev, &bar0, &bar1);
-
-       if (!bar1 && !(bar0 & ~0xf)) {
-               if (addr) {
-                       dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
-                                "rewriting as %llx\n", addr);
-                       ret = pci_write_config_dword(
-                               pdev, PCI_BASE_ADDRESS_0, addr);
-                       if (ret) {
-                               ipath_dev_err(dd, "rewrite of BAR0 "
-                                             "failed: err %d\n", -ret);
-                               goto bail_disable;
-                       }
-                       ret = pci_write_config_dword(
-                               pdev, PCI_BASE_ADDRESS_1, addr >> 32);
-                       if (ret) {
-                               ipath_dev_err(dd, "rewrite of BAR1 "
-                                             "failed: err %d\n", -ret);
-                               goto bail_disable;
-                       }
-               } else {
-                       ipath_dev_err(dd, "BAR is 0 (probable RESET), "
-                                     "not usable until reboot\n");
-                       ret = -ENODEV;
-                       goto bail_disable;
-               }
-       }
-
-       ret = pci_request_regions(pdev, IPATH_DRV_NAME);
-       if (ret) {
-               dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
-                        "err %d\n", dd->ipath_unit, -ret);
-               goto bail_disable;
-       }
-
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (ret) {
-               /*
-                * if the 64 bit setup fails, try 32 bit.  Some systems
-                * do not setup 64 bit maps on systems with 2GB or less
-                * memory installed.
-                */
-               ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (ret) {
-                       dev_info(&pdev->dev,
-                               "Unable to set DMA mask for unit %u: %d\n",
-                               dd->ipath_unit, ret);
-                       goto bail_regions;
-               } else {
-                       ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
-                       ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-                       if (ret)
-                               dev_info(&pdev->dev,
-                                       "Unable to set DMA consistent mask "
-                                       "for unit %u: %d\n",
-                                       dd->ipath_unit, ret);
-
-               }
-       } else {
-               ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-               if (ret)
-                       dev_info(&pdev->dev,
-                               "Unable to set DMA consistent mask "
-                               "for unit %u: %d\n",
-                               dd->ipath_unit, ret);
-       }
-
-       pci_set_master(pdev);
-
-       /*
-        * Save BARs to rewrite after device reset.  Save all 64 bits of
-        * BAR, just in case.
-        */
-       dd->ipath_pcibar0 = addr;
-       dd->ipath_pcibar1 = addr >> 32;
-       dd->ipath_deviceid = ent->device;       /* save for later use */
-       dd->ipath_vendorid = ent->vendor;
-
-       /* setup the chip-specific functions, as early as possible. */
-       switch (ent->device) {
-       case PCI_DEVICE_ID_INFINIPATH_HT:
-               ipath_init_iba6110_funcs(dd);
-               break;
-
-       default:
-               ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
-                             "failing\n", ent->device);
-               return -ENODEV;
-       }
-
-       for (j = 0; j < 6; j++) {
-               if (!pdev->resource[j].start)
-                       continue;
-               ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
-                          j, &pdev->resource[j],
-                          (unsigned long long)pci_resource_len(pdev, j));
-       }
-
-       if (!addr) {
-               ipath_dev_err(dd, "No valid address in BAR 0!\n");
-               ret = -ENODEV;
-               goto bail_regions;
-       }
-
-       dd->ipath_pcirev = pdev->revision;
-
-#if defined(__powerpc__)
-       /* There isn't a generic way to specify writethrough mappings */
-       dd->ipath_kregbase = __ioremap(addr, len,
-               (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
-#else
-       /* XXX: split this properly to enable on PAT */
-       dd->ipath_kregbase = ioremap_nocache(addr, len);
-#endif
-
-       if (!dd->ipath_kregbase) {
-               ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
-                         addr);
-               ret = -ENOMEM;
-               goto bail_iounmap;
-       }
-       dd->ipath_kregend = (u64 __iomem *)
-               ((void __iomem *)dd->ipath_kregbase + len);
-       dd->ipath_physaddr = addr;      /* used for io_remap, etc. */
-       /* for user mmap */
-       ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
-                  addr, dd->ipath_kregbase);
-
-       if (dd->ipath_f_bus(dd, pdev))
-               ipath_dev_err(dd, "Failed to setup config space; "
-                             "continuing anyway\n");
-
-       /*
-        * set up our interrupt handler; IRQF_SHARED probably not needed,
-        * since MSI interrupts shouldn't be shared but won't  hurt for now.
-        * check 0 irq after we return from chip-specific bus setup, since
-        * that can affect this due to setup
-        */
-       if (!dd->ipath_irq)
-               ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
-                             "work\n");
-       else {
-               ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
-                                 IPATH_DRV_NAME, dd);
-               if (ret) {
-                       ipath_dev_err(dd, "Couldn't setup irq handler, "
-                                     "irq=%d: %d\n", dd->ipath_irq, ret);
-                       goto bail_iounmap;
-               }
-       }
-
-       ret = ipath_init_chip(dd, 0);   /* do the chip-specific init */
-       if (ret)
-               goto bail_irqsetup;
-
-       ret = ipath_enable_wc(dd);
-
-       if (ret)
-               ret = 0;
-
-       ipath_verify_pioperf(dd);
-
-       ipath_device_create_group(&pdev->dev, dd);
-       ipathfs_add_device(dd);
-       ipath_user_add(dd);
-       ipath_diag_add(dd);
-       ipath_register_ib_device(dd);
-
-       goto bail;
-
-bail_irqsetup:
-       cleanup_device(dd);
-
-       if (dd->ipath_irq)
-               dd->ipath_f_free_irq(dd);
-
-       if (dd->ipath_f_cleanup)
-               dd->ipath_f_cleanup(dd);
-
-bail_iounmap:
-       iounmap((volatile void __iomem *) dd->ipath_kregbase);
-
-bail_regions:
-       pci_release_regions(pdev);
-
-bail_disable:
-       pci_disable_device(pdev);
-
-bail_devdata:
-       ipath_free_devdata(pdev, dd);
-
-bail:
-       return ret;
-}
-
-static void cleanup_device(struct ipath_devdata *dd)
-{
-       int port;
-       struct ipath_portdata **tmp;
-       unsigned long flags;
-
-       if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
-               /* can't do anything more with chip; needs re-init */
-               *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
-               if (dd->ipath_kregbase) {
-                       /*
-                        * if we haven't already cleaned up before these are
-                        * to ensure any register reads/writes "fail" until
-                        * re-init
-                        */
-                       dd->ipath_kregbase = NULL;
-                       dd->ipath_uregbase = 0;
-                       dd->ipath_sregbase = 0;
-                       dd->ipath_cregbase = 0;
-                       dd->ipath_kregsize = 0;
-               }
-               ipath_disable_wc(dd);
-       }
-
-       if (dd->ipath_spectriggerhit)
-               dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
-                        dd->ipath_spectriggerhit);
-
-       if (dd->ipath_pioavailregs_dma) {
-               dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                 (void *) dd->ipath_pioavailregs_dma,
-                                 dd->ipath_pioavailregs_phys);
-               dd->ipath_pioavailregs_dma = NULL;
-       }
-       if (dd->ipath_dummy_hdrq) {
-               dma_free_coherent(&dd->pcidev->dev,
-                       dd->ipath_pd[0]->port_rcvhdrq_size,
-                       dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
-               dd->ipath_dummy_hdrq = NULL;
-       }
-
-       if (dd->ipath_pageshadow) {
-               struct page **tmpp = dd->ipath_pageshadow;
-               dma_addr_t *tmpd = dd->ipath_physshadow;
-               int i, cnt = 0;
-
-               ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
-                          "locked\n");
-               for (port = 0; port < dd->ipath_cfgports; port++) {
-                       int port_tidbase = port * dd->ipath_rcvtidcnt;
-                       int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-                       for (i = port_tidbase; i < maxtid; i++) {
-                               if (!tmpp[i])
-                                       continue;
-                               pci_unmap_page(dd->pcidev, tmpd[i],
-                                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                               ipath_release_user_pages(&tmpp[i], 1);
-                               tmpp[i] = NULL;
-                               cnt++;
-                       }
-               }
-               if (cnt) {
-                       ipath_stats.sps_pageunlocks += cnt;
-                       ipath_cdbg(VERBOSE, "There were still %u expTID "
-                                  "entries locked\n", cnt);
-               }
-               if (ipath_stats.sps_pagelocks ||
-                   ipath_stats.sps_pageunlocks)
-                       ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
-                                  "unlocked via ipath_m{un}lock\n",
-                                  (unsigned long long)
-                                  ipath_stats.sps_pagelocks,
-                                  (unsigned long long)
-                                  ipath_stats.sps_pageunlocks);
-
-               ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
-                          dd->ipath_pageshadow);
-               tmpp = dd->ipath_pageshadow;
-               dd->ipath_pageshadow = NULL;
-               vfree(tmpp);
-
-               dd->ipath_egrtidbase = NULL;
-       }
-
-       /*
-        * free any resources still in use (usually just kernel ports)
-        * at unload; we do for portcnt, because that's what we allocate.
-        * We acquire lock to be really paranoid that ipath_pd isn't being
-        * accessed from some interrupt-related code (that should not happen,
-        * but best to be sure).
-        */
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       tmp = dd->ipath_pd;
-       dd->ipath_pd = NULL;
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-       for (port = 0; port < dd->ipath_portcnt; port++) {
-               struct ipath_portdata *pd = tmp[port];
-               tmp[port] = NULL; /* debugging paranoia */
-               ipath_free_pddata(dd, pd);
-       }
-       kfree(tmp);
-}
-
-static void ipath_remove_one(struct pci_dev *pdev)
-{
-       struct ipath_devdata *dd = pci_get_drvdata(pdev);
-
-       ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
-
-       /*
-        * disable the IB link early, to be sure no new packets arrive, which
-        * complicates the shutdown process
-        */
-       ipath_shutdown_device(dd);
-
-       flush_workqueue(ib_wq);
-
-       if (dd->verbs_dev)
-               ipath_unregister_ib_device(dd->verbs_dev);
-
-       ipath_diag_remove(dd);
-       ipath_user_remove(dd);
-       ipathfs_remove_device(dd);
-       ipath_device_remove_group(&pdev->dev, dd);
-
-       ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
-                  "unit %u\n", dd, (u32) dd->ipath_unit);
-
-       cleanup_device(dd);
-
-       /*
-        * turn off rcv, send, and interrupts for all ports, all drivers
-        * should also hard reset the chip here?
-        * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
-        * for all versions of the driver, if they were allocated
-        */
-       if (dd->ipath_irq) {
-               ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
-                          dd->ipath_unit, dd->ipath_irq);
-               dd->ipath_f_free_irq(dd);
-       } else
-               ipath_dbg("irq is 0, not doing free_irq "
-                         "for unit %u\n", dd->ipath_unit);
-       /*
-        * we check for NULL here, because it's outside
-        * the kregbase check, and we need to call it
-        * after the free_irq.  Thus it's possible that
-        * the function pointers were never initialized.
-        */
-       if (dd->ipath_f_cleanup)
-               /* clean up chip-specific stuff */
-               dd->ipath_f_cleanup(dd);
-
-       ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
-       iounmap((volatile void __iomem *) dd->ipath_kregbase);
-       pci_release_regions(pdev);
-       ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
-       pci_disable_device(pdev);
-
-       ipath_free_devdata(pdev, dd);
-}
-
-/* general driver use */
-DEFINE_MUTEX(ipath_mutex);
-
-static DEFINE_SPINLOCK(ipath_pioavail_lock);
-
-/**
- * ipath_disarm_piobufs - cancel a range of PIO buffers
- * @dd: the infinipath device
- * @first: the first PIO buffer to cancel
- * @cnt: the number of PIO buffers to cancel
- *
- * cancel a range of PIO buffers, used when they might be armed, but
- * not triggered.  Used at init to ensure buffer state, and also user
- * process close, in case it died while writing to a PIO buffer
- * Also after errors.
- */
-void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
-                         unsigned cnt)
-{
-       unsigned i, last = first + cnt;
-       unsigned long flags;
-
-       ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
-       for (i = first; i < last; i++) {
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               /*
-                * The disarm-related bits are write-only, so it
-                * is ok to OR them in with our copy of sendctrl
-                * while we hold the lock.
-                */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl | INFINIPATH_S_DISARM |
-                       (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
-               /* can't disarm bufs back-to-back per iba7220 spec */
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-       /* on some older chips, update may not happen after cancel */
-       ipath_force_pio_avail_update(dd);
-}
-
-/**
- * ipath_wait_linkstate - wait for an IB link state change to occur
- * @dd: the infinipath device
- * @state: the state to wait for
- * @msecs: the number of milliseconds to wait
- *
- * wait up to msecs milliseconds for IB link state change to occur for
- * now, take the easy polling route.  Currently used only by
- * ipath_set_linkstate.  Returns 0 if state reached, otherwise
- * -ETIMEDOUT state can have multiple states set, for any of several
- * transitions.
- */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
-{
-       dd->ipath_state_wanted = state;
-       wait_event_interruptible_timeout(ipath_state_wait,
-                                        (dd->ipath_flags & state),
-                                        msecs_to_jiffies(msecs));
-       dd->ipath_state_wanted = 0;
-
-       if (!(dd->ipath_flags & state)) {
-               u64 val;
-               ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
-                          " ms\n",
-                          /* test INIT ahead of DOWN, both can be set */
-                          (state & IPATH_LINKINIT) ? "INIT" :
-                          ((state & IPATH_LINKDOWN) ? "DOWN" :
-                           ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
-                          msecs);
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-               ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
-                          (unsigned long long) ipath_read_kreg64(
-                                  dd, dd->ipath_kregs->kr_ibcctrl),
-                          (unsigned long long) val,
-                          ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
-       }
-       return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
-}
-
-static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
-       char *buf, size_t blen)
-{
-       static const struct {
-               ipath_err_t err;
-               const char *msg;
-       } errs[] = {
-               { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
-               { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
-               { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
-               { INFINIPATH_E_SDMABASE, "SDmaBase" },
-               { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
-               { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
-               { INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
-               { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
-               { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
-               { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
-               { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
-               { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
-       };
-       int i;
-       int expected;
-       size_t bidx = 0;
-
-       for (i = 0; i < ARRAY_SIZE(errs); i++) {
-               expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
-                       test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-               if ((err & errs[i].err) && !expected)
-                       bidx += snprintf(buf + bidx, blen - bidx,
-                                        "%s ", errs[i].msg);
-       }
-}
-
-/*
- * Decode the error status into strings, deciding whether to always
- * print * it or not depending on "normal packet errors" vs everything
- * else.   Return 1 if "real" errors, otherwise 0 if only packet
- * errors, so caller can decide what to print with the string.
- */
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-       ipath_err_t err)
-{
-       int iserr = 1;
-       *buf = '\0';
-       if (err & INFINIPATH_E_PKTERRS) {
-               if (!(err & ~INFINIPATH_E_PKTERRS))
-                       iserr = 0; // if only packet errors.
-               if (ipath_debug & __IPATH_ERRPKTDBG) {
-                       if (err & INFINIPATH_E_REBP)
-                               strlcat(buf, "EBP ", blen);
-                       if (err & INFINIPATH_E_RVCRC)
-                               strlcat(buf, "VCRC ", blen);
-                       if (err & INFINIPATH_E_RICRC) {
-                               strlcat(buf, "CRC ", blen);
-                               // clear for check below, so only once
-                               err &= INFINIPATH_E_RICRC;
-                       }
-                       if (err & INFINIPATH_E_RSHORTPKTLEN)
-                               strlcat(buf, "rshortpktlen ", blen);
-                       if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-                               strlcat(buf, "sdroppeddatapkt ", blen);
-                       if (err & INFINIPATH_E_SPKTLEN)
-                               strlcat(buf, "spktlen ", blen);
-               }
-               if ((err & INFINIPATH_E_RICRC) &&
-                       !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
-                       strlcat(buf, "CRC ", blen);
-               if (!iserr)
-                       goto done;
-       }
-       if (err & INFINIPATH_E_RHDRLEN)
-               strlcat(buf, "rhdrlen ", blen);
-       if (err & INFINIPATH_E_RBADTID)
-               strlcat(buf, "rbadtid ", blen);
-       if (err & INFINIPATH_E_RBADVERSION)
-               strlcat(buf, "rbadversion ", blen);
-       if (err & INFINIPATH_E_RHDR)
-               strlcat(buf, "rhdr ", blen);
-       if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
-               strlcat(buf, "sendspecialtrigger ", blen);
-       if (err & INFINIPATH_E_RLONGPKTLEN)
-               strlcat(buf, "rlongpktlen ", blen);
-       if (err & INFINIPATH_E_RMAXPKTLEN)
-               strlcat(buf, "rmaxpktlen ", blen);
-       if (err & INFINIPATH_E_RMINPKTLEN)
-               strlcat(buf, "rminpktlen ", blen);
-       if (err & INFINIPATH_E_SMINPKTLEN)
-               strlcat(buf, "sminpktlen ", blen);
-       if (err & INFINIPATH_E_RFORMATERR)
-               strlcat(buf, "rformaterr ", blen);
-       if (err & INFINIPATH_E_RUNSUPVL)
-               strlcat(buf, "runsupvl ", blen);
-       if (err & INFINIPATH_E_RUNEXPCHAR)
-               strlcat(buf, "runexpchar ", blen);
-       if (err & INFINIPATH_E_RIBFLOW)
-               strlcat(buf, "ribflow ", blen);
-       if (err & INFINIPATH_E_SUNDERRUN)
-               strlcat(buf, "sunderrun ", blen);
-       if (err & INFINIPATH_E_SPIOARMLAUNCH)
-               strlcat(buf, "spioarmlaunch ", blen);
-       if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
-               strlcat(buf, "sunexperrpktnum ", blen);
-       if (err & INFINIPATH_E_SDROPPEDSMPPKT)
-               strlcat(buf, "sdroppedsmppkt ", blen);
-       if (err & INFINIPATH_E_SMAXPKTLEN)
-               strlcat(buf, "smaxpktlen ", blen);
-       if (err & INFINIPATH_E_SUNSUPVL)
-               strlcat(buf, "sunsupVL ", blen);
-       if (err & INFINIPATH_E_INVALIDADDR)
-               strlcat(buf, "invalidaddr ", blen);
-       if (err & INFINIPATH_E_RRCVEGRFULL)
-               strlcat(buf, "rcvegrfull ", blen);
-       if (err & INFINIPATH_E_RRCVHDRFULL)
-               strlcat(buf, "rcvhdrfull ", blen);
-       if (err & INFINIPATH_E_IBSTATUSCHANGED)
-               strlcat(buf, "ibcstatuschg ", blen);
-       if (err & INFINIPATH_E_RIBLOSTLINK)
-               strlcat(buf, "riblostlink ", blen);
-       if (err & INFINIPATH_E_HARDWARE)
-               strlcat(buf, "hardware ", blen);
-       if (err & INFINIPATH_E_RESET)
-               strlcat(buf, "reset ", blen);
-       if (err & INFINIPATH_E_SDMAERRS)
-               decode_sdma_errs(dd, err, buf, blen);
-       if (err & INFINIPATH_E_INVALIDEEPCMD)
-               strlcat(buf, "invalideepromcmd ", blen);
-done:
-       return iserr;
-}
-
-/**
- * get_rhf_errstring - decode RHF errors
- * @err: the err number
- * @msg: the output buffer
- * @len: the length of the output buffer
- *
- * only used one place now, may want more later
- */
-static void get_rhf_errstring(u32 err, char *msg, size_t len)
-{
-       /* if no errors, and so don't need to check what's first */
-       *msg = '\0';
-
-       if (err & INFINIPATH_RHF_H_ICRCERR)
-               strlcat(msg, "icrcerr ", len);
-       if (err & INFINIPATH_RHF_H_VCRCERR)
-               strlcat(msg, "vcrcerr ", len);
-       if (err & INFINIPATH_RHF_H_PARITYERR)
-               strlcat(msg, "parityerr ", len);
-       if (err & INFINIPATH_RHF_H_LENERR)
-               strlcat(msg, "lenerr ", len);
-       if (err & INFINIPATH_RHF_H_MTUERR)
-               strlcat(msg, "mtuerr ", len);
-       if (err & INFINIPATH_RHF_H_IHDRERR)
-               /* infinipath hdr checksum error */
-               strlcat(msg, "ipathhdrerr ", len);
-       if (err & INFINIPATH_RHF_H_TIDERR)
-               strlcat(msg, "tiderr ", len);
-       if (err & INFINIPATH_RHF_H_MKERR)
-               /* bad port, offset, etc. */
-               strlcat(msg, "invalid ipathhdr ", len);
-       if (err & INFINIPATH_RHF_H_IBERR)
-               strlcat(msg, "iberr ", len);
-       if (err & INFINIPATH_RHF_L_SWA)
-               strlcat(msg, "swA ", len);
-       if (err & INFINIPATH_RHF_L_SWB)
-               strlcat(msg, "swB ", len);
-}
-
-/**
- * ipath_get_egrbuf - get an eager buffer
- * @dd: the infinipath device
- * @bufnum: the eager buffer to get
- *
- * must only be called if ipath_pd[port] is known to be allocated
- */
-static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
-{
-       return dd->ipath_port0_skbinfo ?
-               (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
-}
-
-/**
- * ipath_alloc_skb - allocate an skb and buffer with possible constraints
- * @dd: the infinipath device
- * @gfp_mask: the sk_buff SFP mask
- */
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
-                               gfp_t gfp_mask)
-{
-       struct sk_buff *skb;
-       u32 len;
-
-       /*
-        * Only fully supported way to handle this is to allocate lots
-        * extra, align as needed, and then do skb_reserve().  That wastes
-        * a lot of memory...  I'll have to hack this into infinipath_copy
-        * also.
-        */
-
-       /*
-        * We need 2 extra bytes for ipath_ether data sent in the
-        * key header.  In order to keep everything dword aligned,
-        * we'll reserve 4 bytes.
-        */
-       len = dd->ipath_ibmaxlen + 4;
-
-       if (dd->ipath_flags & IPATH_4BYTE_TID) {
-               /* We need a 2KB multiple alignment, and there is no way
-                * to do it except to allocate extra and then skb_reserve
-                * enough to bring it up to the right alignment.
-                */
-               len += 2047;
-       }
-
-       skb = __dev_alloc_skb(len, gfp_mask);
-       if (!skb) {
-               ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
-                             len);
-               goto bail;
-       }
-
-       skb_reserve(skb, 4);
-
-       if (dd->ipath_flags & IPATH_4BYTE_TID) {
-               u32 una = (unsigned long)skb->data & 2047;
-               if (una)
-                       skb_reserve(skb, 2048 - una);
-       }
-
-bail:
-       return skb;
-}
-
-static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
-                            u32 eflags,
-                            u32 l,
-                            u32 etail,
-                            __le32 *rhf_addr,
-                            struct ipath_message_header *hdr)
-{
-       char emsg[128];
-
-       get_rhf_errstring(eflags, emsg, sizeof emsg);
-       ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
-                  "tlen=%x opcode=%x egridx=%x: %s\n",
-                  eflags, l,
-                  ipath_hdrget_rcv_type(rhf_addr),
-                  ipath_hdrget_length_in_bytes(rhf_addr),
-                  be32_to_cpu(hdr->bth[0]) >> 24,
-                  etail, emsg);
-
-       /* Count local link integrity errors. */
-       if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
-               u8 n = (dd->ipath_ibcctrl >>
-                       INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-                       INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-
-               if (++dd->ipath_lli_counter > n) {
-                       dd->ipath_lli_counter = 0;
-                       dd->ipath_lli_errors++;
-               }
-       }
-}
-
-/*
- * ipath_kreceive - receive a packet
- * @pd: the infinipath port
- *
- * called from interrupt handler for errors or receive interrupt
- */
-void ipath_kreceive(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       __le32 *rhf_addr;
-       void *ebuf;
-       const u32 rsize = dd->ipath_rcvhdrentsize;      /* words */
-       const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
-       u32 etail = -1, l, hdrqtail;
-       struct ipath_message_header *hdr;
-       u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
-       static u64 totcalls;    /* stats, may eventually remove */
-       int last;
-
-       l = pd->port_head;
-       rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
-       if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-               u32 seq = ipath_hdrget_seq(rhf_addr);
-
-               if (seq != pd->port_seq_cnt)
-                       goto bail;
-               hdrqtail = 0;
-       } else {
-               hdrqtail = ipath_get_rcvhdrtail(pd);
-               if (l == hdrqtail)
-                       goto bail;
-               smp_rmb();
-       }
-
-reloop:
-       for (last = 0, i = 1; !last; i += !last) {
-               hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
-               eflags = ipath_hdrget_err_flags(rhf_addr);
-               etype = ipath_hdrget_rcv_type(rhf_addr);
-               /* total length */
-               tlen = ipath_hdrget_length_in_bytes(rhf_addr);
-               ebuf = NULL;
-               if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
-                   ipath_hdrget_use_egr_buf(rhf_addr) :
-                   (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
-                       /*
-                        * It turns out that the chip uses an eager buffer
-                        * for all non-expected packets, whether it "needs"
-                        * one or not.  So always get the index, but don't
-                        * set ebuf (so we try to copy data) unless the
-                        * length requires it.
-                        */
-                       etail = ipath_hdrget_index(rhf_addr);
-                       updegr = 1;
-                       if (tlen > sizeof(*hdr) ||
-                           etype == RCVHQ_RCV_TYPE_NON_KD)
-                               ebuf = ipath_get_egrbuf(dd, etail);
-               }
-
-               /*
-                * both tiderr and ipathhdrerr are set for all plain IB
-                * packets; only ipathhdrerr should be set.
-                */
-
-               if (etype != RCVHQ_RCV_TYPE_NON_KD &&
-                   etype != RCVHQ_RCV_TYPE_ERROR &&
-                   ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
-                   IPS_PROTO_VERSION)
-                       ipath_cdbg(PKT, "Bad InfiniPath protocol version "
-                                  "%x\n", etype);
-
-               if (unlikely(eflags))
-                       ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
-               else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-                       ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
-                       if (dd->ipath_lli_counter)
-                               dd->ipath_lli_counter--;
-               } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-                       u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
-                       u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
-                       ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-                                  "qp=%x), len %x; ignored\n",
-                                  etype, opcode, qp, tlen);
-               } else if (etype == RCVHQ_RCV_TYPE_EXPECTED) {
-                       ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
-                                 be32_to_cpu(hdr->bth[0]) >> 24);
-               } else {
-                       /*
-                        * error packet, type of error unknown.
-                        * Probably type 3, but we don't know, so don't
-                        * even try to print the opcode, etc.
-                        * Usually caused by a "bad packet", that has no
-                        * BTH, when the LRH says it should.
-                        */
-                       ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
-                                 " %x, len %x hdrq+%x rhf: %Lx\n",
-                                 etail, tlen, l, (unsigned long long)
-                                 le64_to_cpu(*(__le64 *) rhf_addr));
-                       if (ipath_debug & __IPATH_ERRPKTDBG) {
-                               u32 j, *d, dw = rsize-2;
-                               if (rsize > (tlen>>2))
-                                       dw = tlen>>2;
-                               d = (u32 *)hdr;
-                               printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
-                                       dw);
-                               for (j = 0; j < dw; j++)
-                                       printk(KERN_DEBUG "%8x%s", d[j],
-                                               (j%8) == 7 ? "\n" : " ");
-                               printk(KERN_DEBUG ".\n");
-                       }
-               }
-               l += rsize;
-               if (l >= maxcnt)
-                       l = 0;
-               rhf_addr = (__le32 *) pd->port_rcvhdrq +
-                       l + dd->ipath_rhf_offset;
-               if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-                       u32 seq = ipath_hdrget_seq(rhf_addr);
-
-                       if (++pd->port_seq_cnt > 13)
-                               pd->port_seq_cnt = 1;
-                       if (seq != pd->port_seq_cnt)
-                               last = 1;
-               } else if (l == hdrqtail) {
-                       last = 1;
-               }
-               /*
-                * update head regs on last packet, and every 16 packets.
-                * Reduce bus traffic, while still trying to prevent
-                * rcvhdrq overflows, for when the queue is nearly full
-                */
-               if (last || !(i & 0xf)) {
-                       u64 lval = l;
-
-                       /* request IBA6120 and 7220 interrupt only on last */
-                       if (last)
-                               lval |= dd->ipath_rhdrhead_intr_off;
-                       ipath_write_ureg(dd, ur_rcvhdrhead, lval,
-                               pd->port_port);
-                       if (updegr) {
-                               ipath_write_ureg(dd, ur_rcvegrindexhead,
-                                                etail, pd->port_port);
-                               updegr = 0;
-                       }
-               }
-       }
-
-       if (!dd->ipath_rhdrhead_intr_off && !reloop &&
-           !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-               /* IBA6110 workaround; we can have a race clearing chip
-                * interrupt with another interrupt about to be delivered,
-                * and can clear it before it is delivered on the GPIO
-                * workaround.  By doing the extra check here for the
-                * in-memory tail register updating while we were doing
-                * earlier packets, we "almost" guarantee we have covered
-                * that case.
-                */
-               u32 hqtail = ipath_get_rcvhdrtail(pd);
-               if (hqtail != hdrqtail) {
-                       hdrqtail = hqtail;
-                       reloop = 1; /* loop 1 extra time at most */
-                       goto reloop;
-               }
-       }
-
-       pkttot += i;
-
-       pd->port_head = l;
-
-       if (pkttot > ipath_stats.sps_maxpkts_call)
-               ipath_stats.sps_maxpkts_call = pkttot;
-       ipath_stats.sps_port0pkts += pkttot;
-       ipath_stats.sps_avgpkts_call =
-               ipath_stats.sps_port0pkts / ++totcalls;
-
-bail:;
-}
-
-/**
- * ipath_update_pio_bufs - update shadow copy of the PIO availability map
- * @dd: the infinipath device
- *
- * called whenever our local copy indicates we have run out of send buffers
- * NOTE: This can be called from interrupt context by some code
- * and from non-interrupt context by ipath_getpiobuf().
- */
-
-static void ipath_update_pio_bufs(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int i;
-       const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
-
-       /* If the generation (check) bits have changed, then we update the
-        * busy bit for the corresponding PIO buffer.  This algorithm will
-        * modify positions to the value they already have in some cases
-        * (i.e., no change), but it's faster than changing only the bits
-        * that have changed.
-        *
-        * We would like to do this atomicly, to avoid spinlocks in the
-        * critical send path, but that's not really possible, given the
-        * type of changes, and that this routine could be called on
-        * multiple cpu's simultaneously, so we lock in this routine only,
-        * to avoid conflicting updates; all we change is the shadow, and
-        * it's a single 64 bit memory location, so by definition the update
-        * is atomic in terms of what other cpu's can see in testing the
-        * bits.  The spin_lock overhead isn't too bad, since it only
-        * happens when all buffers are in use, so only cpu overhead, not
-        * latency or bandwidth is affected.
-        */
-       if (!dd->ipath_pioavailregs_dma) {
-               ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
-               return;
-       }
-       if (ipath_debug & __IPATH_VERBDBG) {
-               /* only if packet debug and verbose */
-               volatile __le64 *dma = dd->ipath_pioavailregs_dma;
-               unsigned long *shadow = dd->ipath_pioavailshadow;
-
-               ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
-                          "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
-                          "s3=%lx\n",
-                          (unsigned long long) le64_to_cpu(dma[0]),
-                          shadow[0],
-                          (unsigned long long) le64_to_cpu(dma[1]),
-                          shadow[1],
-                          (unsigned long long) le64_to_cpu(dma[2]),
-                          shadow[2],
-                          (unsigned long long) le64_to_cpu(dma[3]),
-                          shadow[3]);
-               if (piobregs > 4)
-                       ipath_cdbg(
-                               PKT, "2nd group, dma4=%llx shad4=%lx, "
-                               "d5=%llx s5=%lx, d6=%llx s6=%lx, "
-                               "d7=%llx s7=%lx\n",
-                               (unsigned long long) le64_to_cpu(dma[4]),
-                               shadow[4],
-                               (unsigned long long) le64_to_cpu(dma[5]),
-                               shadow[5],
-                               (unsigned long long) le64_to_cpu(dma[6]),
-                               shadow[6],
-                               (unsigned long long) le64_to_cpu(dma[7]),
-                               shadow[7]);
-       }
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (i = 0; i < piobregs; i++) {
-               u64 pchbusy, pchg, piov, pnew;
-               /*
-                * Chip Errata: bug 6641; even and odd qwords>3 are swapped
-                */
-               if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-                       piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
-               else
-                       piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
-               pchg = dd->ipath_pioavailkernel[i] &
-                       ~(dd->ipath_pioavailshadow[i] ^ piov);
-               pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
-               if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
-                       pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
-                       pnew |= piov & pchbusy;
-                       dd->ipath_pioavailshadow[i] = pnew;
-               }
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/*
- * used to force update of pioavailshadow if we can't get a pio buffer.
- * Needed primarily due to exitting freeze mode after recovering
- * from errors.  Done lazily, because it's safer (known to not
- * be writing pio buffers).
- */
-static void ipath_reset_availshadow(struct ipath_devdata *dd)
-{
-       int i, im;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (i = 0; i < dd->ipath_pioavregs; i++) {
-               u64 val, oldval;
-               /* deal with 6110 chip bug on high register #s */
-               im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-                       i ^ 1 : i;
-               val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
-               /*
-                * busy out the buffers not in the kernel avail list,
-                * without changing the generation bits.
-                */
-               oldval = dd->ipath_pioavailshadow[i];
-               dd->ipath_pioavailshadow[i] = val |
-                       ((~dd->ipath_pioavailkernel[i] <<
-                       INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
-                       0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
-               if (oldval != dd->ipath_pioavailshadow[i])
-                       ipath_dbg("shadow[%d] was %Lx, now %lx\n",
-                               i, (unsigned long long) oldval,
-                               dd->ipath_pioavailshadow[i]);
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/**
- * ipath_setrcvhdrsize - set the receive header size
- * @dd: the infinipath device
- * @rhdrsize: the receive header size
- *
- * called from user init code, and also layered driver init
- */
-int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
-{
-       int ret = 0;
-
-       if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
-               if (dd->ipath_rcvhdrsize != rhdrsize) {
-                       dev_info(&dd->pcidev->dev,
-                                "Error: can't set protocol header "
-                                "size %u, already %u\n",
-                                rhdrsize, dd->ipath_rcvhdrsize);
-                       ret = -EAGAIN;
-               } else
-                       ipath_cdbg(VERBOSE, "Reuse same protocol header "
-                                  "size %u\n", dd->ipath_rcvhdrsize);
-       } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
-                              (sizeof(u64) / sizeof(u32)))) {
-               ipath_dbg("Error: can't set protocol header size %u "
-                         "(> max %u)\n", rhdrsize,
-                         dd->ipath_rcvhdrentsize -
-                         (u32) (sizeof(u64) / sizeof(u32)));
-               ret = -EOVERFLOW;
-       } else {
-               dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
-               dd->ipath_rcvhdrsize = rhdrsize;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-                                dd->ipath_rcvhdrsize);
-               ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
-                          dd->ipath_rcvhdrsize);
-       }
-       return ret;
-}
-
-/*
- * debugging code and stats updates if no pio buffers available.
- */
-static noinline void no_pio_bufs(struct ipath_devdata *dd)
-{
-       unsigned long *shadow = dd->ipath_pioavailshadow;
-       __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
-
-       dd->ipath_upd_pio_shadow = 1;
-
-       /*
-        * not atomic, but if we lose a stat count in a while, that's OK
-        */
-       ipath_stats.sps_nopiobufs++;
-       if (!(++dd->ipath_consec_nopiobuf % 100000)) {
-               ipath_force_pio_avail_update(dd); /* at start */
-               ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
-                       "%llx %llx %llx %llx\n"
-                       "ipath  shadow:  %lx %lx %lx %lx\n",
-                       dd->ipath_consec_nopiobuf,
-                       (unsigned long)get_cycles(),
-                       (unsigned long long) le64_to_cpu(dma[0]),
-                       (unsigned long long) le64_to_cpu(dma[1]),
-                       (unsigned long long) le64_to_cpu(dma[2]),
-                       (unsigned long long) le64_to_cpu(dma[3]),
-                       shadow[0], shadow[1], shadow[2], shadow[3]);
-               /*
-                * 4 buffers per byte, 4 registers above, cover rest
-                * below
-                */
-               if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
-                   (sizeof(shadow[0]) * 4 * 4))
-                       ipath_dbg("2nd group: dmacopy: "
-                                 "%llx %llx %llx %llx\n"
-                                 "ipath  shadow:  %lx %lx %lx %lx\n",
-                                 (unsigned long long)le64_to_cpu(dma[4]),
-                                 (unsigned long long)le64_to_cpu(dma[5]),
-                                 (unsigned long long)le64_to_cpu(dma[6]),
-                                 (unsigned long long)le64_to_cpu(dma[7]),
-                                 shadow[4], shadow[5], shadow[6], shadow[7]);
-
-               /* at end, so update likely happened */
-               ipath_reset_availshadow(dd);
-       }
-}
-
-/*
- * common code for normal driver pio buffer allocation, and reserved
- * allocation.
- *
- * do appropriate marking as busy, etc.
- * returns buffer number if one found (>=0), negative number is error.
- */
-static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
-       u32 *pbufnum, u32 first, u32 last, u32 firsti)
-{
-       int i, j, updated = 0;
-       unsigned piobcnt;
-       unsigned long flags;
-       unsigned long *shadow = dd->ipath_pioavailshadow;
-       u32 __iomem *buf;
-
-       piobcnt = last - first;
-       if (dd->ipath_upd_pio_shadow) {
-               /*
-                * Minor optimization.  If we had no buffers on last call,
-                * start out by doing the update; continue and do scan even
-                * if no buffers were updated, to be paranoid
-                */
-               ipath_update_pio_bufs(dd);
-               updated++;
-               i = first;
-       } else
-               i = firsti;
-rescan:
-       /*
-        * while test_and_set_bit() is atomic, we do that and then the
-        * change_bit(), and the pair is not.  See if this is the cause
-        * of the remaining armlaunch errors.
-        */
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (j = 0; j < piobcnt; j++, i++) {
-               if (i >= last)
-                       i = first;
-               if (__test_and_set_bit((2 * i) + 1, shadow))
-                       continue;
-               /* flip generation bit */
-               __change_bit(2 * i, shadow);
-               break;
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-       if (j == piobcnt) {
-               if (!updated) {
-                       /*
-                        * first time through; shadow exhausted, but may be
-                        * buffers available, try an update and then rescan.
-                        */
-                       ipath_update_pio_bufs(dd);
-                       updated++;
-                       i = first;
-                       goto rescan;
-               } else if (updated == 1 && piobcnt <=
-                       ((dd->ipath_sendctrl
-                       >> INFINIPATH_S_UPDTHRESH_SHIFT) &
-                       INFINIPATH_S_UPDTHRESH_MASK)) {
-                       /*
-                        * for chips supporting and using the update
-                        * threshold we need to force an update of the
-                        * in-memory copy if the count is less than the
-                        * thershold, then check one more time.
-                        */
-                       ipath_force_pio_avail_update(dd);
-                       ipath_update_pio_bufs(dd);
-                       updated++;
-                       i = first;
-                       goto rescan;
-               }
-
-               no_pio_bufs(dd);
-               buf = NULL;
-       } else {
-               if (i < dd->ipath_piobcnt2k)
-                       buf = (u32 __iomem *) (dd->ipath_pio2kbase +
-                                              i * dd->ipath_palign);
-               else
-                       buf = (u32 __iomem *)
-                               (dd->ipath_pio4kbase +
-                                (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
-               if (pbufnum)
-                       *pbufnum = i;
-       }
-
-       return buf;
-}
-
-/**
- * ipath_getpiobuf - find an available pio buffer
- * @dd: the infinipath device
- * @plen: the size of the PIO buffer needed in 32-bit words
- * @pbufnum: the buffer number is placed here
- */
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
-{
-       u32 __iomem *buf;
-       u32 pnum, nbufs;
-       u32 first, lasti;
-
-       if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
-               first = dd->ipath_piobcnt2k;
-               lasti = dd->ipath_lastpioindexl;
-       } else {
-               first = 0;
-               lasti = dd->ipath_lastpioindex;
-       }
-       nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
-
-       if (buf) {
-               /*
-                * Set next starting place.  It's just an optimization,
-                * it doesn't matter who wins on this, so no locking
-                */
-               if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-                       dd->ipath_lastpioindexl = pnum + 1;
-               else
-                       dd->ipath_lastpioindex = pnum + 1;
-               if (dd->ipath_upd_pio_shadow)
-                       dd->ipath_upd_pio_shadow = 0;
-               if (dd->ipath_consec_nopiobuf)
-                       dd->ipath_consec_nopiobuf = 0;
-               ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
-                          pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
-               if (pbufnum)
-                       *pbufnum = pnum;
-
-       }
-       return buf;
-}
-
-/**
- * ipath_chg_pioavailkernel - change which send buffers are available for kernel
- * @dd: the infinipath device
- * @start: the starting send buffer number
- * @len: the number of send buffers
- * @avail: true if the buffers are available for kernel use, false otherwise
- */
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-                             unsigned len, int avail)
-{
-       unsigned long flags;
-       unsigned end, cnt = 0;
-
-       /* There are two bits per send buffer (busy and generation) */
-       start *= 2;
-       end = start + len * 2;
-
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       /* Set or clear the busy bit in the shadow. */
-       while (start < end) {
-               if (avail) {
-                       unsigned long dma;
-                       int i, im;
-                       /*
-                        * the BUSY bit will never be set, because we disarm
-                        * the user buffers before we hand them back to the
-                        * kernel.  We do have to make sure the generation
-                        * bit is set correctly in shadow, since it could
-                        * have changed many times while allocated to user.
-                        * We can't use the bitmap functions on the full
-                        * dma array because it is always little-endian, so
-                        * we have to flip to host-order first.
-                        * BITS_PER_LONG is slightly wrong, since it's
-                        * always 64 bits per register in chip...
-                        * We only work on 64 bit kernels, so that's OK.
-                        */
-                       /* deal with 6110 chip bug on high register #s */
-                       i = start / BITS_PER_LONG;
-                       im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-                               i ^ 1 : i;
-                       __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
-                               + start, dd->ipath_pioavailshadow);
-                       dma = (unsigned long) le64_to_cpu(
-                               dd->ipath_pioavailregs_dma[im]);
-                       if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                               + start) % BITS_PER_LONG, &dma))
-                               __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                                       + start, dd->ipath_pioavailshadow);
-                       else
-                               __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                                       + start, dd->ipath_pioavailshadow);
-                       __set_bit(start, dd->ipath_pioavailkernel);
-               } else {
-                       __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
-                               dd->ipath_pioavailshadow);
-                       __clear_bit(start, dd->ipath_pioavailkernel);
-               }
-               start += 2;
-       }
-
-       if (dd->ipath_pioupd_thresh) {
-               end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-               cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-       /*
-        * When moving buffers from kernel to user, if number assigned to
-        * the user is less than the pio update threshold, and threshold
-        * is supported (cnt was computed > 0), drop the update threshold
-        * so we update at least once per allocated number of buffers.
-        * In any case, if the kernel buffers are less than the threshold,
-        * drop the threshold.  We don't bother increasing it, having once
-        * decreased it, since it would typically just cycle back and forth.
-        * If we don't decrease below buffers in use, we can wait a long
-        * time for an update, until some other context uses PIO buffers.
-        */
-       if (!avail && len < cnt)
-               cnt = len;
-       if (cnt < dd->ipath_pioupd_thresh) {
-               dd->ipath_pioupd_thresh = cnt;
-               ipath_dbg("Decreased pio update threshold to %u\n",
-                       dd->ipath_pioupd_thresh);
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
-                       << INFINIPATH_S_UPDTHRESH_SHIFT);
-               dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-                       << INFINIPATH_S_UPDTHRESH_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-}
-
-/**
- * ipath_create_rcvhdrq - create a receive header queue
- * @dd: the infinipath device
- * @pd: the port data
- *
- * this must be contiguous memory (from an i/o perspective), and must be
- * DMA'able (which means for some systems, it will go through an IOMMU,
- * or be forced into a low address range).
- */
-int ipath_create_rcvhdrq(struct ipath_devdata *dd,
-                        struct ipath_portdata *pd)
-{
-       int ret = 0;
-
-       if (!pd->port_rcvhdrq) {
-               dma_addr_t phys_hdrqtail;
-               gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-               int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-                               sizeof(u32), PAGE_SIZE);
-
-               pd->port_rcvhdrq = dma_alloc_coherent(
-                       &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
-                       gfp_flags);
-
-               if (!pd->port_rcvhdrq) {
-                       ipath_dev_err(dd, "attempt to allocate %d bytes "
-                                     "for port %u rcvhdrq failed\n",
-                                     amt, pd->port_port);
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-                       pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
-                               &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
-                               GFP_KERNEL);
-                       if (!pd->port_rcvhdrtail_kvaddr) {
-                               ipath_dev_err(dd, "attempt to allocate 1 page "
-                                       "for port %u rcvhdrqtailaddr "
-                                       "failed\n", pd->port_port);
-                               ret = -ENOMEM;
-                               dma_free_coherent(&dd->pcidev->dev, amt,
-                                       pd->port_rcvhdrq,
-                                       pd->port_rcvhdrq_phys);
-                               pd->port_rcvhdrq = NULL;
-                               goto bail;
-                       }
-                       pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
-                       ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
-                                  "physical\n", pd->port_port,
-                                  (unsigned long long) phys_hdrqtail);
-               }
-
-               pd->port_rcvhdrq_size = amt;
-
-               ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
-                          "for port %u rcvhdr Q\n",
-                          amt >> PAGE_SHIFT, pd->port_rcvhdrq,
-                          (unsigned long) pd->port_rcvhdrq_phys,
-                          (unsigned long) pd->port_rcvhdrq_size,
-                          pd->port_port);
-       } else {
-               ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
-                          "hdrtailaddr@%p %llx physical\n",
-                          pd->port_port, pd->port_rcvhdrq,
-                          (unsigned long long) pd->port_rcvhdrq_phys,
-                          pd->port_rcvhdrtail_kvaddr, (unsigned long long)
-                          pd->port_rcvhdrqtailaddr_phys);
-       }
-       /* clear for security and sanity on each use */
-       memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
-       if (pd->port_rcvhdrtail_kvaddr)
-               memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
-
-       /*
-        * tell chip each time we init it, even if we are re-using previous
-        * memory (we zero the register at process close)
-        */
-       ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
-                             pd->port_port, pd->port_rcvhdrqtailaddr_phys);
-       ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-                             pd->port_port, pd->port_rcvhdrq_phys);
-
-bail:
-       return ret;
-}
-
-
-/*
- * Flush all sends that might be in the ready to send state, as well as any
- * that are in the process of being sent.   Used whenever we need to be
- * sure the send side is idle.  Cleans up all buffer state by canceling
- * all pio buffers, and issuing an abort, which cleans up anything in the
- * launch fifo.  The cancel is superfluous on some chip versions, but
- * it's safer to always do it.
- * PIOAvail bits are updated by the chip as if normal send had happened.
- */
-void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
-{
-       unsigned long flags;
-
-       if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
-               ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
-               goto bail;
-       }
-       /*
-        * If we have SDMA, and it's not disabled, we have to kick off the
-        * abort state machine, provided we aren't already aborting.
-        * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
-        * we skip the rest of this routine. It is already "in progress"
-        */
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
-               int skip_cancel;
-               unsigned long *statp = &dd->ipath_sdma_status;
-
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               skip_cancel =
-                       test_and_set_bit(IPATH_SDMA_ABORTING, statp)
-                       && !test_bit(IPATH_SDMA_DISABLED, statp);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-               if (skip_cancel)
-                       goto bail;
-       }
-
-       ipath_dbg("Cancelling all in-progress send buffers\n");
-
-       /* skip armlaunch errs for a while */
-       dd->ipath_lastcancel = jiffies + HZ / 2;
-
-       /*
-        * The abort bit is auto-clearing.  We also don't want pioavail
-        * update happening during this, and we don't want any other
-        * sends going out, so turn those off for the duration.  We read
-        * the scratch register to be sure that cancels and the abort
-        * have taken effect in the chip.  Otherwise two parts are same
-        * as ipath_force_pio_avail_update()
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
-               | INFINIPATH_S_PIOENABLE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-               dd->ipath_sendctrl | INFINIPATH_S_ABORT);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /* disarm all send buffers */
-       ipath_disarm_piobufs(dd, 0,
-               dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-
-       if (restore_sendctrl) {
-               /* else done by caller later if needed */
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
-                       INFINIPATH_S_PIOENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               /* and again, be sure all have hit the chip */
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-
-       if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
-           !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
-           test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               /* only wait so long for intr */
-               dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
-               dd->ipath_sdma_reset_wait = 200;
-               if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       }
-bail:;
-}
-
-/*
- * Force an update of in-memory copy of the pioavail registers, when
- * needed for any of a variety of reasons.  We read the scratch register
- * to make it highly likely that the update will have happened by the
- * time we return.  If already off (as in cancel_sends above), this
- * routine is a nop, on the assumption that the caller will "do the
- * right thing".
- */
-void ipath_force_pio_avail_update(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
-                               int linitcmd)
-{
-       u64 mod_wd;
-       static const char *what[4] = {
-               [0] = "NOP",
-               [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
-               [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
-               [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
-       };
-
-       if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
-               /*
-                * If we are told to disable, note that so link-recovery
-                * code does not attempt to bring us back up.
-                */
-               preempt_disable();
-               dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-               preempt_enable();
-       } else if (linitcmd) {
-               /*
-                * Any other linkinitcmd will lead to LINKDOWN and then
-                * to INIT (if all is well), so clear flag to let
-                * link-recovery code attempt to bring us back up.
-                */
-               preempt_disable();
-               dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-               preempt_enable();
-       }
-
-       mod_wd = (linkcmd << dd->ibcc_lc_shift) |
-               (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-       ipath_cdbg(VERBOSE,
-               "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
-               dd->ipath_unit, what[linkcmd], linitcmd,
-               ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
-                       ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                        dd->ipath_ibcctrl | mod_wd);
-       /* read from chip so write is flushed */
-       (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-}
-
-int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
-{
-       u32 lstate;
-       int ret;
-
-       switch (newstate) {
-       case IPATH_IB_LINKDOWN_ONLY:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_POLL);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_SLEEP:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_SLEEP);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_DISABLE:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKARM:
-               if (dd->ipath_flags & IPATH_LINKARMED) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags &
-                     (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
-
-               /*
-                * Since the port can transition to ACTIVE by receiving
-                * a non VL 15 packet, wait for either state.
-                */
-               lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-               break;
-
-       case IPATH_IB_LINKACTIVE:
-               if (dd->ipath_flags & IPATH_LINKACTIVE) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
-               lstate = IPATH_LINKACTIVE;
-               break;
-
-       case IPATH_IB_LINK_LOOPBACK:
-               dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
-               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-
-               /* turn heartbeat off, as it causes loopback to fail */
-               dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                                      IPATH_IB_HRTBT_OFF);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINK_EXTERNAL:
-               dev_info(&dd->pcidev->dev,
-                       "Disabling IB local loopback (normal)\n");
-               dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                                      IPATH_IB_HRTBT_ON);
-               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       /*
-        * Heartbeat can be explicitly enabled by the user via
-        * "hrtbt_enable" "file", and if disabled, trying to enable here
-        * will have no effect.  Implicit changes (heartbeat off when
-        * loopback on, and vice versa) are included to ease testing.
-        */
-       case IPATH_IB_LINK_HRTBT:
-               ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                       IPATH_IB_HRTBT_ON);
-               goto bail;
-
-       case IPATH_IB_LINK_NO_HRTBT:
-               ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                       IPATH_IB_HRTBT_OFF);
-               goto bail;
-
-       default:
-               ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-               ret = -EINVAL;
-               goto bail;
-       }
-       ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link INIT state...
- */
-int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-       u32 piosize;
-       int changed = 0;
-       int ret;
-
-       /*
-        * mtu is IB data payload max.  It's the largest power of 2 less
-        * than piosize (or even larger, since it only really controls the
-        * largest we can receive; we can send the max of the mtu and
-        * piosize).  We check that it's one of the valid IB sizes.
-        */
-       if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-           (arg != 4096 || !ipath_mtu4096)) {
-               ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-               ret = -EINVAL;
-               goto bail;
-       }
-       if (dd->ipath_ibmtu == arg) {
-               ret = 0;        /* same as current */
-               goto bail;
-       }
-
-       piosize = dd->ipath_ibmaxlen;
-       dd->ipath_ibmtu = arg;
-
-       if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-               /* Only if it's not the initial value (or reset to it) */
-               if (piosize != dd->ipath_init_ibmaxlen) {
-                       if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
-                               piosize = dd->ipath_init_ibmaxlen;
-                       dd->ipath_ibmaxlen = piosize;
-                       changed = 1;
-               }
-       } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-               piosize = arg + IPATH_PIO_MAXIBHDR;
-               ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-                          "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-                          arg);
-               dd->ipath_ibmaxlen = piosize;
-               changed = 1;
-       }
-
-       if (changed) {
-               u64 ibc = dd->ipath_ibcctrl, ibdw;
-               /*
-                * update our housekeeping variables, and set IBC max
-                * size, same as init code; max IBC is max we allow in
-                * buffer, less the qword pbc, plus 1 for ICRC, in dwords
-                */
-               dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
-               ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
-               ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-                        dd->ibcc_mpl_shift);
-               ibc |= ibdw << dd->ibcc_mpl_shift;
-               dd->ipath_ibcctrl = ibc;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-               dd->ipath_f_tidtemplate(dd);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
-{
-       dd->ipath_lid = lid;
-       dd->ipath_lmc = lmc;
-
-       dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
-               (~((1U << lmc) - 1)) << 16);
-
-       dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
-
-       return 0;
-}
-
-
-/**
- * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to write
- * @port: the port containing the register
- * @value: the value to write
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
-                         unsigned port, u64 value)
-{
-       u16 where;
-
-       if (port < dd->ipath_portcnt &&
-           (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-            regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-               where = regno + port;
-       else
-               where = -1;
-
-       ipath_write_kreg(dd, where, value);
-}
-
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDS, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
-
-static void ipath_run_led_override(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-       int timeoff;
-       int pidx;
-       u64 lstate, ltstate, val;
-
-       if (!(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       pidx = dd->ipath_led_override_phase++ & 1;
-       dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
-       timeoff = dd->ipath_led_override_timeoff;
-
-       /*
-        * below potentially restores the LED values per current status,
-        * should also possibly setup the traffic-blink register,
-        * but leave that to per-chip functions.
-        */
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-       ltstate = ipath_ib_linktrstate(dd, val);
-       lstate = ipath_ib_linkstate(dd, val);
-
-       dd->ipath_f_setextled(dd, lstate, ltstate);
-       mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
-}
-
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
-{
-       int timeoff, freq;
-
-       if (!(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       /* First check if we are blinking. If not, use 1HZ polling */
-       timeoff = HZ;
-       freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
-
-       if (freq) {
-               /* For blink, set each phase from one nybble of val */
-               dd->ipath_led_override_vals[0] = val & 0xF;
-               dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
-               timeoff = (HZ << 4)/freq;
-       } else {
-               /* Non-blink set both phases the same. */
-               dd->ipath_led_override_vals[0] = val & 0xF;
-               dd->ipath_led_override_vals[1] = val & 0xF;
-       }
-       dd->ipath_led_override_timeoff = timeoff;
-
-       /*
-        * If the timer has not already been started, do so. Use a "quick"
-        * timeout so the function will be called soon, to look at our request.
-        */
-       if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
-               /* Need to start timer */
-               setup_timer(&dd->ipath_led_override_timer,
-                               ipath_run_led_override, (unsigned long)dd);
-
-               dd->ipath_led_override_timer.expires = jiffies + 1;
-               add_timer(&dd->ipath_led_override_timer);
-       } else
-               atomic_dec(&dd->ipath_led_override_timer_active);
-}
-
-/**
- * ipath_shutdown_device - shut down a device
- * @dd: the infinipath device
- *
- * This is called to make the device quiet when we are about to
- * unload the driver, and also when the device is administratively
- * disabled.   It does not free any data structures.
- * Everything it does has to be setup again by ipath_init_chip(dd,1)
- */
-void ipath_shutdown_device(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       ipath_dbg("Shutting down the device\n");
-
-       ipath_hol_up(dd); /* make sure user processes aren't suspended */
-
-       dd->ipath_flags |= IPATH_LINKUNK;
-       dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
-                            IPATH_LINKINIT | IPATH_LINKARMED |
-                            IPATH_LINKACTIVE);
-       *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
-                               IPATH_STATUS_IB_READY);
-
-       /* mask interrupts, but not errors */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-       dd->ipath_rcvctrl = 0;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               teardown_sdma(dd);
-
-       /*
-        * gracefully stop all sends allowing any in progress to trickle out
-        * first.
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl = 0;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       /* flush it */
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /*
-        * enough for anything that's going to trickle out to have actually
-        * done so.
-        */
-       udelay(5);
-
-       dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
-
-       ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-       ipath_cancel_sends(dd, 0);
-
-       /*
-        * we are shutting down, so tell components that care.  We don't do
-        * this on just a link state change, much like ethernet, a cable
-        * unplug, etc. doesn't change driver state
-        */
-       signal_ib_event(dd, IB_EVENT_PORT_ERR);
-
-       /* disable IBC */
-       dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control | INFINIPATH_C_FREEZEMODE);
-
-       /*
-        * clear SerdesEnable and turn the leds off; do this here because
-        * we are unloading, so don't count on interrupts to move along
-        * Turn the LEDs off explicitly for the same reason.
-        */
-       dd->ipath_f_quiet_serdes(dd);
-
-       /* stop all the timers that might still be running */
-       del_timer_sync(&dd->ipath_hol_timer);
-       if (dd->ipath_stats_timer_active) {
-               del_timer_sync(&dd->ipath_stats_timer);
-               dd->ipath_stats_timer_active = 0;
-       }
-       if (dd->ipath_intrchk_timer.data) {
-               del_timer_sync(&dd->ipath_intrchk_timer);
-               dd->ipath_intrchk_timer.data = 0;
-       }
-       if (atomic_read(&dd->ipath_led_override_timer_active)) {
-               del_timer_sync(&dd->ipath_led_override_timer);
-               atomic_set(&dd->ipath_led_override_timer_active, 0);
-       }
-
-       /*
-        * clear all interrupts and errors, so that the next time the driver
-        * is loaded or device is enabled, we know that whatever is set
-        * happened while we were unloaded
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-       ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
-       ipath_update_eeprom_log(dd);
-}
-
-/**
- * ipath_free_pddata - free a port's allocated data
- * @dd: the infinipath device
- * @pd: the portdata structure
- *
- * free up any allocated data for a port
- * This should not touch anything that would affect a simultaneous
- * re-allocation of port data, because it is called after ipath_mutex
- * is released (and can be called from reinit as well).
- * It should never change any chip state, or global driver state.
- * (The only exception to global state is freeing the port0 port0_skbs.)
- */
-void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
-{
-       if (!pd)
-               return;
-
-       if (pd->port_rcvhdrq) {
-               ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
-                          "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
-                          (unsigned long) pd->port_rcvhdrq_size);
-               dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
-                                 pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
-               pd->port_rcvhdrq = NULL;
-               if (pd->port_rcvhdrtail_kvaddr) {
-                       dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                        pd->port_rcvhdrtail_kvaddr,
-                                        pd->port_rcvhdrqtailaddr_phys);
-                       pd->port_rcvhdrtail_kvaddr = NULL;
-               }
-       }
-       if (pd->port_port && pd->port_rcvegrbuf) {
-               unsigned e;
-
-               for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-                       void *base = pd->port_rcvegrbuf[e];
-                       size_t size = pd->port_rcvegrbuf_size;
-
-                       ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
-                                  "chunk %u/%u\n", base,
-                                  (unsigned long) size,
-                                  e, pd->port_rcvegrbuf_chunks);
-                       dma_free_coherent(&dd->pcidev->dev, size,
-                               base, pd->port_rcvegrbuf_phys[e]);
-               }
-               kfree(pd->port_rcvegrbuf);
-               pd->port_rcvegrbuf = NULL;
-               kfree(pd->port_rcvegrbuf_phys);
-               pd->port_rcvegrbuf_phys = NULL;
-               pd->port_rcvegrbuf_chunks = 0;
-       } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
-               unsigned e;
-               struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
-
-               dd->ipath_port0_skbinfo = NULL;
-               ipath_cdbg(VERBOSE, "free closed port %d "
-                          "ipath_port0_skbinfo @ %p\n", pd->port_port,
-                          skbinfo);
-               for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
-                       if (skbinfo[e].skb) {
-                               pci_unmap_single(dd->pcidev, skbinfo[e].phys,
-                                                dd->ipath_ibmaxlen,
-                                                PCI_DMA_FROMDEVICE);
-                               dev_kfree_skb(skbinfo[e].skb);
-                       }
-               vfree(skbinfo);
-       }
-       kfree(pd->port_tid_pg_list);
-       vfree(pd->subport_uregbase);
-       vfree(pd->subport_rcvegrbuf);
-       vfree(pd->subport_rcvhdr_base);
-       kfree(pd);
-}
-
-static int __init infinipath_init(void)
-{
-       int ret;
-
-       if (ipath_debug & __IPATH_DBG)
-               printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
-
-       /*
-        * These must be called before the driver is registered with
-        * the PCI subsystem.
-        */
-       idr_init(&unit_table);
-
-       ret = pci_register_driver(&ipath_driver);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Unable to register driver: error %d\n", -ret);
-               goto bail_unit;
-       }
-
-       ret = ipath_init_ipathfs();
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
-                      "ipathfs: error %d\n", -ret);
-               goto bail_pci;
-       }
-
-       goto bail;
-
-bail_pci:
-       pci_unregister_driver(&ipath_driver);
-
-bail_unit:
-       idr_destroy(&unit_table);
-
-bail:
-       return ret;
-}
-
-static void __exit infinipath_cleanup(void)
-{
-       ipath_exit_ipathfs();
-
-       ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
-       pci_unregister_driver(&ipath_driver);
-
-       idr_destroy(&unit_table);
-}
-
-/**
- * ipath_reset_device - reset the chip if possible
- * @unit: the device to reset
- *
- * Whether or not reset is successful, we attempt to re-initialize the chip
- * (that is, much like a driver unload/reload).  We clear the INITTED flag
- * so that the various entry points will fail until we reinitialize.  For
- * now, we only allow this if no user ports are open that use chip resources
- */
-int ipath_reset_device(int unit)
-{
-       int ret, i;
-       struct ipath_devdata *dd = ipath_lookup(unit);
-       unsigned long flags;
-
-       if (!dd) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (atomic_read(&dd->ipath_led_override_timer_active)) {
-               /* Need to stop LED timer, _then_ shut off LEDs */
-               del_timer_sync(&dd->ipath_led_override_timer);
-               atomic_set(&dd->ipath_led_override_timer_active, 0);
-       }
-
-       /* Shut off LEDs after we are sure timer is not running */
-       dd->ipath_led_override = LED_OVER_BOTH_OFF;
-       dd->ipath_f_setextled(dd, 0, 0);
-
-       dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
-
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
-               dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
-                        "not initialized or not present\n", unit);
-               ret = -ENXIO;
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       if (dd->ipath_pd)
-               for (i = 1; i < dd->ipath_cfgports; i++) {
-                       if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-                               continue;
-                       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-                       ipath_dbg("unit %u port %d is in use "
-                                 "(PID %u cmd %s), can't reset\n",
-                                 unit, i,
-                                 pid_nr(dd->ipath_pd[i]->port_pid),
-                                 dd->ipath_pd[i]->port_comm);
-                       ret = -EBUSY;
-                       goto bail;
-               }
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               teardown_sdma(dd);
-
-       dd->ipath_flags &= ~IPATH_INITTED;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-       ret = dd->ipath_f_reset(dd);
-       if (ret == 1) {
-               ipath_dbg("Reinitializing unit %u after reset attempt\n",
-                         unit);
-               ret = ipath_init_chip(dd, 1);
-       } else
-               ret = -EAGAIN;
-       if (ret)
-               ipath_dev_err(dd, "Reinitialize unit %u after "
-                             "reset failed with %d\n", unit, ret);
-       else
-               dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
-                        "resetting\n", unit);
-
-bail:
-       return ret;
-}
-
-/*
- * send a signal to all the processes that have the driver open
- * through the normal interfaces (i.e., everything other than diags
- * interface).  Returns number of signalled processes.
- */
-static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
-{
-       int i, sub, any = 0;
-       struct pid *pid;
-       unsigned long flags;
-
-       if (!dd->ipath_pd)
-               return 0;
-
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-                       continue;
-               pid = dd->ipath_pd[i]->port_pid;
-               if (!pid)
-                       continue;
-
-               dev_info(&dd->pcidev->dev, "context %d in use "
-                         "(PID %u), sending signal %d\n",
-                         i, pid_nr(pid), sig);
-               kill_pid(pid, sig, 1);
-               any++;
-               for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
-                       pid = dd->ipath_pd[i]->port_subpid[sub];
-                       if (!pid)
-                               continue;
-                       dev_info(&dd->pcidev->dev, "sub-context "
-                               "%d:%d in use (PID %u), sending "
-                               "signal %d\n", i, sub, pid_nr(pid), sig);
-                       kill_pid(pid, sig, 1);
-                       any++;
-               }
-       }
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-       return any;
-}
-
-static void ipath_hol_signal_down(struct ipath_devdata *dd)
-{
-       if (ipath_signal_procs(dd, SIGSTOP))
-               ipath_dbg("Stopped some processes\n");
-       ipath_cancel_sends(dd, 1);
-}
-
-
-static void ipath_hol_signal_up(struct ipath_devdata *dd)
-{
-       if (ipath_signal_procs(dd, SIGCONT))
-               ipath_dbg("Continued some processes\n");
-}
-
-/*
- * link is down, stop any users processes, and flush pending sends
- * to prevent HoL blocking, then start the HoL timer that
- * periodically continues, then stop procs, so they can detect
- * link down if they want, and do something about it.
- * Timer may already be running, so use mod_timer, not add_timer.
- */
-void ipath_hol_down(struct ipath_devdata *dd)
-{
-       dd->ipath_hol_state = IPATH_HOL_DOWN;
-       ipath_hol_signal_down(dd);
-       dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-       dd->ipath_hol_timer.expires = jiffies +
-               msecs_to_jiffies(ipath_hol_timeout_ms);
-       mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
-}
-
-/*
- * link is up, continue any user processes, and ensure timer
- * is a nop, if running.  Let timer keep running, if set; it
- * will nop when it sees the link is up
- */
-void ipath_hol_up(struct ipath_devdata *dd)
-{
-       ipath_hol_signal_up(dd);
-       dd->ipath_hol_state = IPATH_HOL_UP;
-}
-
-/*
- * toggle the running/not running state of user proceses
- * to prevent HoL blocking on chip resources, but still allow
- * user processes to do link down special case handling.
- * Should only be called via the timer
- */
-void ipath_hol_event(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
-               && dd->ipath_hol_state != IPATH_HOL_UP) {
-               dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-               ipath_dbg("Stopping processes\n");
-               ipath_hol_signal_down(dd);
-       } else { /* may do "extra" if also in ipath_hol_up() */
-               dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
-               ipath_dbg("Continuing processes\n");
-               ipath_hol_signal_up(dd);
-       }
-       if (dd->ipath_hol_state == IPATH_HOL_UP)
-               ipath_dbg("link's up, don't resched timer\n");
-       else {
-               dd->ipath_hol_timer.expires = jiffies +
-                       msecs_to_jiffies(ipath_hol_timeout_ms);
-               mod_timer(&dd->ipath_hol_timer,
-                       dd->ipath_hol_timer.expires);
-       }
-}
-
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
-{
-       u64 val;
-
-       if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
-               return -1;
-       if (dd->ipath_rx_pol_inv != new_pol_inv) {
-               dd->ipath_rx_pol_inv = new_pol_inv;
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT);
-               val |= ((u64)dd->ipath_rx_pol_inv) <<
-                       INFINIPATH_XGXS_RX_POL_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-       }
-       return 0;
-}
-
-/*
- * Disable and enable the armlaunch error.  Used for PIO bandwidth testing on
- * the 7220, which is count-based, rather than trigger-based.  Safe for the
- * driver check, since it's at init.   Not completely safe when used for
- * user-mode checking, since some error checking can be lost, but not
- * particularly risky, and only has problematic side-effects in the face of
- * very buggy user code.  There is no reference counting, but that's also
- * fine, given the intended use.
- */
-void ipath_enable_armlaunch(struct ipath_devdata *dd)
-{
-       dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-               INFINIPATH_E_SPIOARMLAUNCH);
-       dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-}
-
-void ipath_disable_armlaunch(struct ipath_devdata *dd)
-{
-       /* so don't re-enable if already set */
-       dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-}
-
-module_init(infinipath_init);
-module_exit(infinipath_cleanup);
diff --git a/drivers/staging/rdma/ipath/ipath_eeprom.c b/drivers/staging/rdma/ipath/ipath_eeprom.c
deleted file mode 100644 (file)
index ef84107..0000000
+++ /dev/null
@@ -1,1183 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-
-/*
- * InfiniPath I2C driver for a serial eeprom.  This is not a generic
- * I2C interface.  For a start, the device we're using (Atmel AT24C11)
- * doesn't work like a regular I2C device.  It looks like one
- * electrically, but not logically.  Normal I2C devices have a single
- * 7-bit or 10-bit I2C address that they respond to.  Valid 7-bit
- * addresses range from 0x03 to 0x77.  Addresses 0x00 to 0x02 and 0x78
- * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
- * call" address.)  The Atmel device, on the other hand, responds to ALL
- * 7-bit addresses.  It's designed to be the only device on a given I2C
- * bus.  A 7-bit address corresponds to the memory address within the
- * Atmel device itself.
- *
- * Also, the timing requirements mean more than simple software
- * bitbanging, with readbacks from chip to ensure timing (simple udelay
- * is not enough).
- *
- * This all means that accessing the device is specialized enough
- * that using the standard kernel I2C bitbanging interface would be
- * impossible.  For example, the core I2C eeprom driver expects to find
- * a device at one or more of a limited set of addresses only.  It doesn't
- * allow writing to an eeprom.  It also doesn't provide any means of
- * accessing eeprom contents from within the kernel, only via sysfs.
- */
-
-/* Added functionality for IBA7220-based cards */
-#define IPATH_EEPROM_DEV_V1 0xA0
-#define IPATH_EEPROM_DEV_V2 0xA2
-#define IPATH_TEMP_DEV 0x98
-#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
-#define IPATH_NO_DEV (0xFF)
-
-/*
- * The number of I2C chains is proliferating. Table below brings
- * some order to the madness. The basic principle is that the
- * table is scanned from the top, and a "probe" is made to the
- * device probe_dev. If that succeeds, the chain is considered
- * to be of that type, and dd->i2c_chain_type is set to the index+1
- * of the entry.
- * The +1 is so static initialization can mean "unknown, do probe."
- */
-static struct i2c_chain_desc {
-       u8 probe_dev;   /* If seen at probe, chain is this type */
-       u8 eeprom_dev;  /* Dev addr (if any) for EEPROM */
-       u8 temp_dev;    /* Dev Addr (if any) for Temp-sense */
-} i2c_chains[] = {
-       { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
-       { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
-       { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
-       { IPATH_NO_DEV }
-};
-
-enum i2c_type {
-       i2c_line_scl = 0,
-       i2c_line_sda
-};
-
-enum i2c_state {
-       i2c_line_low = 0,
-       i2c_line_high
-};
-
-#define READ_CMD 1
-#define WRITE_CMD 0
-
-/**
- * i2c_gpio_set - set a GPIO line
- * @dd: the infinipath device
- * @line: the line to set
- * @new_line_state: the state to set
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.
- */
-static int i2c_gpio_set(struct ipath_devdata *dd,
-                       enum i2c_type line,
-                       enum i2c_state new_line_state)
-{
-       u64 out_mask, dir_mask, *gpioval;
-       unsigned long flags = 0;
-
-       gpioval = &dd->ipath_gpio_out;
-
-       if (line == i2c_line_scl) {
-               dir_mask = dd->ipath_gpio_scl;
-               out_mask = (1UL << dd->ipath_gpio_scl_num);
-       } else {
-               dir_mask = dd->ipath_gpio_sda;
-               out_mask = (1UL << dd->ipath_gpio_sda_num);
-       }
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       if (new_line_state == i2c_line_high) {
-               /* tri-state the output rather than force high */
-               dd->ipath_extctrl &= ~dir_mask;
-       } else {
-               /* config line to be an output */
-               dd->ipath_extctrl |= dir_mask;
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-
-       /* set output as well (no real verify) */
-       if (new_line_state == i2c_line_high)
-               *gpioval |= out_mask;
-       else
-               *gpioval &= ~out_mask;
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       return 0;
-}
-
-/**
- * i2c_gpio_get - get a GPIO line state
- * @dd: the infinipath device
- * @line: the line to get
- * @curr_statep: where to put the line state
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.  curr_state is not set on error.
- */
-static int i2c_gpio_get(struct ipath_devdata *dd,
-                       enum i2c_type line,
-                       enum i2c_state *curr_statep)
-{
-       u64 read_val, mask;
-       int ret;
-       unsigned long flags = 0;
-
-       /* check args */
-       if (curr_statep == NULL) {
-               ret = 1;
-               goto bail;
-       }
-
-       /* config line to be an input */
-       if (line == i2c_line_scl)
-               mask = dd->ipath_gpio_scl;
-       else
-               mask = dd->ipath_gpio_sda;
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       dd->ipath_extctrl &= ~mask;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-       /*
-        * Below is very unlikely to reflect true input state if Output
-        * Enable actually changed.
-        */
-       read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       if (read_val & mask)
-               *curr_statep = i2c_line_high;
-       else
-               *curr_statep = i2c_line_low;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * i2c_wait_for_writes - wait for a write
- * @dd: the infinipath device
- *
- * We use this instead of udelay directly, so we can make sure
- * that previous register writes have been flushed all the way
- * to the chip.  Since we are delaying anyway, the cost doesn't
- * hurt, and makes the bit twiddling more regular
- */
-static void i2c_wait_for_writes(struct ipath_devdata *dd)
-{
-       (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-       rmb();
-}
-
-static void scl_out(struct ipath_devdata *dd, u8 bit)
-{
-       udelay(1);
-       i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
-
-       i2c_wait_for_writes(dd);
-}
-
-static void sda_out(struct ipath_devdata *dd, u8 bit)
-{
-       i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
-
-       i2c_wait_for_writes(dd);
-}
-
-static u8 sda_in(struct ipath_devdata *dd, int wait)
-{
-       enum i2c_state bit;
-
-       if (i2c_gpio_get(dd, i2c_line_sda, &bit))
-               ipath_dbg("get bit failed!\n");
-
-       if (wait)
-               i2c_wait_for_writes(dd);
-
-       return bit == i2c_line_high ? 1U : 0;
-}
-
-/**
- * i2c_ackrcv - see if ack following write is true
- * @dd: the infinipath device
- */
-static int i2c_ackrcv(struct ipath_devdata *dd)
-{
-       u8 ack_received;
-
-       /* AT ENTRY SCL = LOW */
-       /* change direction, ignore data */
-       ack_received = sda_in(dd, 1);
-       scl_out(dd, i2c_line_high);
-       ack_received = sda_in(dd, 1) == 0;
-       scl_out(dd, i2c_line_low);
-       return ack_received;
-}
-
-/**
- * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
- * @dd: the infinipath device
- *
- * Returns byte shifted out of device
- */
-static int rd_byte(struct ipath_devdata *dd)
-{
-       int bit_cntr, data;
-
-       data = 0;
-
-       for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
-               data <<= 1;
-               scl_out(dd, i2c_line_high);
-               data |= sda_in(dd, 0);
-               scl_out(dd, i2c_line_low);
-       }
-       return data;
-}
-
-/**
- * wr_byte - write a byte, one bit at a time
- * @dd: the infinipath device
- * @data: the byte to write
- *
- * Returns 0 if we got the following ack, otherwise 1
- */
-static int wr_byte(struct ipath_devdata *dd, u8 data)
-{
-       int bit_cntr;
-       u8 bit;
-
-       for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
-               bit = (data >> bit_cntr) & 1;
-               sda_out(dd, bit);
-               scl_out(dd, i2c_line_high);
-               scl_out(dd, i2c_line_low);
-       }
-       return (!i2c_ackrcv(dd)) ? 1 : 0;
-}
-
-static void send_ack(struct ipath_devdata *dd)
-{
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_high);
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_high);
-}
-
-/**
- * i2c_startcmd - transmit the start condition, followed by address/cmd
- * @dd: the infinipath device
- * @offset_dir: direction byte
- *
- *      (both clock/data high, clock high, data low while clock is high)
- */
-static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
-{
-       int res;
-
-       /* issue start sequence */
-       sda_out(dd, i2c_line_high);
-       scl_out(dd, i2c_line_high);
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_low);
-
-       /* issue length and direction byte */
-       res = wr_byte(dd, offset_dir);
-
-       if (res)
-               ipath_cdbg(VERBOSE, "No ack to complete start\n");
-
-       return res;
-}
-
-/**
- * stop_cmd - transmit the stop condition
- * @dd: the infinipath device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_cmd(struct ipath_devdata *dd)
-{
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_high);
-       sda_out(dd, i2c_line_high);
-       udelay(2);
-}
-
-/**
- * eeprom_reset - reset I2C communication
- * @dd: the infinipath device
- */
-
-static int eeprom_reset(struct ipath_devdata *dd)
-{
-       int clock_cycles_left = 9;
-       u64 *gpioval = &dd->ipath_gpio_out;
-       int ret;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       /* Make sure shadows are consistent */
-       dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
-       *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
-                  "is %llx\n", (unsigned long long) *gpioval);
-
-       /*
-        * This is to get the i2c into a known state, by first going low,
-        * then tristate sda (and then tristate scl as first thing
-        * in loop)
-        */
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_high);
-
-       /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
-       while (clock_cycles_left--) {
-               scl_out(dd, i2c_line_high);
-
-               /* SDA seen high, issue START by dropping it while SCL high */
-               if (sda_in(dd, 0)) {
-                       sda_out(dd, i2c_line_low);
-                       scl_out(dd, i2c_line_low);
-                       /* ATMEL spec says must be followed by STOP. */
-                       scl_out(dd, i2c_line_high);
-                       sda_out(dd, i2c_line_high);
-                       ret = 0;
-                       goto bail;
-               }
-
-               scl_out(dd, i2c_line_low);
-       }
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/*
- * Probe for I2C device at specified address. Returns 0 for "success"
- * to match rest of this file.
- * Leave bus in "reasonable" state for further commands.
- */
-static int i2c_probe(struct ipath_devdata *dd, int devaddr)
-{
-       int ret;
-
-       ret = eeprom_reset(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
-                             devaddr);
-               return ret;
-       }
-       /*
-        * Reset no longer leaves bus in start condition, so normal
-        * i2c_startcmd() will do.
-        */
-       ret = i2c_startcmd(dd, devaddr | READ_CMD);
-       if (ret)
-               ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
-                          devaddr);
-       else {
-               /*
-                * Device did respond. Complete a single-byte read, because some
-                * devices apparently cannot handle STOP immediately after they
-                * ACK the start-cmd.
-                */
-               int data;
-               data = rd_byte(dd);
-               stop_cmd(dd);
-               ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
-       }
-       return ret;
-}
-
-/*
- * Returns the "i2c type". This is a pointer to a struct that describes
- * the I2C chain on this board. To minimize impact on struct ipath_devdata,
- * the (small integer) index into the table is actually memoized, rather
- * then the pointer.
- * Memoization is because the type is determined on the first call per chip.
- * An alternative would be to move type determination to early
- * init code.
- */
-static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
-{
-       int idx;
-
-       /* Get memoized index, from previous successful probes */
-       idx = dd->ipath_i2c_chain_type - 1;
-       if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
-               goto done;
-
-       idx = 0;
-       while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
-               /* if probe succeeds, this is type */
-               if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
-                       break;
-               ++idx;
-       }
-
-       /*
-        * Old EEPROM (first entry) may require a reset after probe,
-        * rather than being able to "start" after "stop"
-        */
-       if (idx == 0)
-               eeprom_reset(dd);
-
-       if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
-               idx = -1;
-       else
-               dd->ipath_i2c_chain_type = idx + 1;
-done:
-       return (idx >= 0) ? i2c_chains + idx : NULL;
-}
-
-static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
-                                       u8 eeprom_offset, void *buffer, int len)
-{
-       int ret;
-       struct i2c_chain_desc *icd;
-       u8 *bp = buffer;
-
-       ret = 1;
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->eeprom_dev == IPATH_NO_DEV) {
-               /* legacy not-really-I2C */
-               ipath_cdbg(VERBOSE, "Start command only address\n");
-               eeprom_offset = (eeprom_offset << 1) | READ_CMD;
-               ret = i2c_startcmd(dd, eeprom_offset);
-       } else {
-               /* Actual I2C */
-               ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
-               if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-                       ipath_dbg("Failed EEPROM startcmd\n");
-                       stop_cmd(dd);
-                       ret = 1;
-                       goto bail;
-               }
-               ret = wr_byte(dd, eeprom_offset);
-               stop_cmd(dd);
-               if (ret) {
-                       ipath_dev_err(dd, "Failed to write EEPROM address\n");
-                       ret = 1;
-                       goto bail;
-               }
-               ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
-       }
-       if (ret) {
-               ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
-               stop_cmd(dd);
-               ret = 1;
-               goto bail;
-       }
-
-       /*
-        * eeprom keeps clocking data out as long as we ack, automatically
-        * incrementing the address.
-        */
-       while (len-- > 0) {
-               /* get and store data */
-               *bp++ = rd_byte(dd);
-               /* send ack if not the last byte */
-               if (len)
-                       send_ack(dd);
-       }
-
-       stop_cmd(dd);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
-                                      const void *buffer, int len)
-{
-       int sub_len;
-       const u8 *bp = buffer;
-       int max_wait_time, i;
-       int ret;
-       struct i2c_chain_desc *icd;
-
-       ret = 1;
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       while (len > 0) {
-               if (icd->eeprom_dev == IPATH_NO_DEV) {
-                       if (i2c_startcmd(dd,
-                                        (eeprom_offset << 1) | WRITE_CMD)) {
-                               ipath_dbg("Failed to start cmd offset %u\n",
-                                       eeprom_offset);
-                               goto failed_write;
-                       }
-               } else {
-                       /* Real I2C */
-                       if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-                               ipath_dbg("Failed EEPROM startcmd\n");
-                               goto failed_write;
-                       }
-                       ret = wr_byte(dd, eeprom_offset);
-                       if (ret) {
-                               ipath_dev_err(dd, "Failed to write EEPROM "
-                                             "address\n");
-                               goto failed_write;
-                       }
-               }
-
-               sub_len = min(len, 4);
-               eeprom_offset += sub_len;
-               len -= sub_len;
-
-               for (i = 0; i < sub_len; i++) {
-                       if (wr_byte(dd, *bp++)) {
-                               ipath_dbg("no ack after byte %u/%u (%u "
-                                         "total remain)\n", i, sub_len,
-                                         len + sub_len - i);
-                               goto failed_write;
-                       }
-               }
-
-               stop_cmd(dd);
-
-               /*
-                * wait for write complete by waiting for a successful
-                * read (the chip replies with a zero after the write
-                * cmd completes, and before it writes to the eeprom.
-                * The startcmd for the read will fail the ack until
-                * the writes have completed.   We do this inline to avoid
-                * the debug prints that are in the real read routine
-                * if the startcmd fails.
-                * We also use the proper device address, so it doesn't matter
-                * whether we have real eeprom_dev. legacy likes any address.
-                */
-               max_wait_time = 100;
-               while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
-                       stop_cmd(dd);
-                       if (!--max_wait_time) {
-                               ipath_dbg("Did not get successful read to "
-                                         "complete write\n");
-                               goto failed_write;
-                       }
-               }
-               /* now read (and ignore) the resulting byte */
-               rd_byte(dd);
-               stop_cmd(dd);
-       }
-
-       ret = 0;
-       goto bail;
-
-failed_write:
-       stop_cmd(dd);
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_eeprom_read - receives bytes from the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: address to read from
- * @buffer: where to store result
- * @len: number of bytes to receive
- */
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
-                       void *buff, int len)
-{
-       int ret;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       return ret;
-}
-
-/**
- * ipath_eeprom_write - writes data to the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: where to place data
- * @buffer: data to write
- * @len: number of bytes to write
- */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
-                       const void *buff, int len)
-{
-       int ret;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       return ret;
-}
-
-static u8 flash_csum(struct ipath_flash *ifp, int adjust)
-{
-       u8 *ip = (u8 *) ifp;
-       u8 csum = 0, len;
-
-       /*
-        * Limit length checksummed to max length of actual data.
-        * Checksum of erased eeprom will still be bad, but we avoid
-        * reading past the end of the buffer we were passed.
-        */
-       len = ifp->if_length;
-       if (len > sizeof(struct ipath_flash))
-               len = sizeof(struct ipath_flash);
-       while (len--)
-               csum += *ip++;
-       csum -= ifp->if_csum;
-       csum = ~csum;
-       if (adjust)
-               ifp->if_csum = csum;
-
-       return csum;
-}
-
-/**
- * ipath_get_guid - get the GUID from the i2c device
- * @dd: the infinipath device
- *
- * We have the capability to use the ipath_nguid field, and get
- * the guid from the first chip's flash, to use for all of them.
- */
-void ipath_get_eeprom_info(struct ipath_devdata *dd)
-{
-       void *buf;
-       struct ipath_flash *ifp;
-       __be64 guid;
-       int len, eep_stat;
-       u8 csum, *bguid;
-       int t = dd->ipath_unit;
-       struct ipath_devdata *dd0 = ipath_lookup(0);
-
-       if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
-               u8 oguid;
-               dd->ipath_guid = dd0->ipath_guid;
-               bguid = (u8 *) & dd->ipath_guid;
-
-               oguid = bguid[7];
-               bguid[7] += t;
-               if (oguid > bguid[7]) {
-                       if (bguid[6] == 0xff) {
-                               if (bguid[5] == 0xff) {
-                                       ipath_dev_err(
-                                               dd,
-                                               "Can't set %s GUID from "
-                                               "base, wraps to OUI!\n",
-                                               ipath_get_unit_name(t));
-                                       dd->ipath_guid = 0;
-                                       goto bail;
-                               }
-                               bguid[5]++;
-                       }
-                       bguid[6]++;
-               }
-               dd->ipath_nguid = 1;
-
-               ipath_dbg("nguid %u, so adding %u to device 0 guid, "
-                         "for %llx\n",
-                         dd0->ipath_nguid, t,
-                         (unsigned long long) be64_to_cpu(dd->ipath_guid));
-               goto bail;
-       }
-
-       /*
-        * read full flash, not just currently used part, since it may have
-        * been written with a newer definition
-        * */
-       len = sizeof(struct ipath_flash);
-       buf = vmalloc(len);
-       if (!buf) {
-               ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-                             "bytes from eeprom for GUID\n", len);
-               goto bail;
-       }
-
-       mutex_lock(&dd->ipath_eep_lock);
-       eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
-       mutex_unlock(&dd->ipath_eep_lock);
-
-       if (eep_stat) {
-               ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
-               goto done;
-       }
-       ifp = (struct ipath_flash *)buf;
-
-       csum = flash_csum(ifp, 0);
-       if (csum != ifp->if_csum) {
-               dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
-                        "0x%x, not 0x%x\n", csum, ifp->if_csum);
-               goto done;
-       }
-       if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
-           *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
-               ipath_dev_err(dd, "Invalid GUID %llx from flash; "
-                             "ignoring\n",
-                             *(unsigned long long *) ifp->if_guid);
-               /* don't allow GUID if all 0 or all 1's */
-               goto done;
-       }
-
-       /* complain, but allow it */
-       if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
-               dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
-                        "default, probably not correct!\n",
-                        *(unsigned long long *) ifp->if_guid);
-
-       bguid = ifp->if_guid;
-       if (!bguid[0] && !bguid[1] && !bguid[2]) {
-               /* original incorrect GUID format in flash; fix in
-                * core copy, by shifting up 2 octets; don't need to
-                * change top octet, since both it and shifted are
-                * 0.. */
-               bguid[1] = bguid[3];
-               bguid[2] = bguid[4];
-               bguid[3] = bguid[4] = 0;
-               guid = *(__be64 *) ifp->if_guid;
-               ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
-                          "shifting 2 octets\n");
-       } else
-               guid = *(__be64 *) ifp->if_guid;
-       dd->ipath_guid = guid;
-       dd->ipath_nguid = ifp->if_numguid;
-       /*
-        * Things are slightly complicated by the desire to transparently
-        * support both the Pathscale 10-digit serial number and the QLogic
-        * 13-character version.
-        */
-       if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
-               && ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
-               /* This board has a Serial-prefix, which is stored
-                * elsewhere for backward-compatibility.
-                */
-               char *snp = dd->ipath_serial;
-               memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
-               snp[sizeof ifp->if_sprefix] = '\0';
-               len = strlen(snp);
-               snp += len;
-               len = (sizeof dd->ipath_serial) - len;
-               if (len > sizeof ifp->if_serial) {
-                       len = sizeof ifp->if_serial;
-               }
-               memcpy(snp, ifp->if_serial, len);
-       } else
-               memcpy(dd->ipath_serial, ifp->if_serial,
-                      sizeof ifp->if_serial);
-       if (!strstr(ifp->if_comment, "Tested successfully"))
-               ipath_dev_err(dd, "Board SN %s did not pass functional "
-                       "test: %s\n", dd->ipath_serial,
-                       ifp->if_comment);
-
-       ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
-                  (unsigned long long) be64_to_cpu(dd->ipath_guid));
-
-       memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
-       /*
-        * Power-on (actually "active") hours are kept as little-endian value
-        * in EEPROM, but as seconds in a (possibly as small as 24-bit)
-        * atomic_t while running.
-        */
-       atomic_set(&dd->ipath_active_time, 0);
-       dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
-
-done:
-       vfree(buf);
-
-bail:;
-}
-
-/**
- * ipath_update_eeprom_log - copy active-time and error counters to eeprom
- * @dd: the infinipath device
- *
- * Although the time is kept as seconds in the ipath_devdata struct, it is
- * rounded to hours for re-write, as we have only 16 bits in EEPROM.
- * First-cut code reads whole (expected) struct ipath_flash, modifies,
- * re-writes. Future direction: read/write only what we need, assuming
- * that the EEPROM had to have been "good enough" for driver init, and
- * if not, we aren't making it worse.
- *
- */
-
-int ipath_update_eeprom_log(struct ipath_devdata *dd)
-{
-       void *buf;
-       struct ipath_flash *ifp;
-       int len, hi_water;
-       uint32_t new_time, new_hrs;
-       u8 csum;
-       int ret, idx;
-       unsigned long flags;
-
-       /* first, check if we actually need to do anything. */
-       ret = 0;
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               if (dd->ipath_eep_st_new_errs[idx]) {
-                       ret = 1;
-                       break;
-               }
-       }
-       new_time = atomic_read(&dd->ipath_active_time);
-
-       if (ret == 0 && new_time < 3600)
-               return 0;
-
-       /*
-        * The quick-check above determined that there is something worthy
-        * of logging, so get current contents and do a more detailed idea.
-        * read full flash, not just currently used part, since it may have
-        * been written with a newer definition
-        */
-       len = sizeof(struct ipath_flash);
-       buf = vmalloc(len);
-       ret = 1;
-       if (!buf) {
-               ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-                               "bytes from eeprom for logging\n", len);
-               goto bail;
-       }
-
-       /* Grab semaphore and read current EEPROM. If we get an
-        * error, let go, but if not, keep it until we finish write.
-        */
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (ret) {
-               ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
-               goto free_bail;
-       }
-       ret = ipath_eeprom_internal_read(dd, 0, buf, len);
-       if (ret) {
-               mutex_unlock(&dd->ipath_eep_lock);
-               ipath_dev_err(dd, "Unable read EEPROM for logging\n");
-               goto free_bail;
-       }
-       ifp = (struct ipath_flash *)buf;
-
-       csum = flash_csum(ifp, 0);
-       if (csum != ifp->if_csum) {
-               mutex_unlock(&dd->ipath_eep_lock);
-               ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
-                               csum, ifp->if_csum);
-               ret = 1;
-               goto free_bail;
-       }
-       hi_water = 0;
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               int new_val = dd->ipath_eep_st_new_errs[idx];
-               if (new_val) {
-                       /*
-                        * If we have seen any errors, add to EEPROM values
-                        * We need to saturate at 0xFF (255) and we also
-                        * would need to adjust the checksum if we were
-                        * trying to minimize EEPROM traffic
-                        * Note that we add to actual current count in EEPROM,
-                        * in case it was altered while we were running.
-                        */
-                       new_val += ifp->if_errcntp[idx];
-                       if (new_val > 0xFF)
-                               new_val = 0xFF;
-                       if (ifp->if_errcntp[idx] != new_val) {
-                               ifp->if_errcntp[idx] = new_val;
-                               hi_water = offsetof(struct ipath_flash,
-                                               if_errcntp) + idx;
-                       }
-                       /*
-                        * update our shadow (used to minimize EEPROM
-                        * traffic), to match what we are about to write.
-                        */
-                       dd->ipath_eep_st_errs[idx] = new_val;
-                       dd->ipath_eep_st_new_errs[idx] = 0;
-               }
-       }
-       /*
-        * now update active-time. We would like to round to the nearest hour
-        * but unless atomic_t are sure to be proper signed ints we cannot,
-        * because we need to account for what we "transfer" to EEPROM and
-        * if we log an hour at 31 minutes, then we would need to set
-        * active_time to -29 to accurately count the _next_ hour.
-        */
-       if (new_time >= 3600) {
-               new_hrs = new_time / 3600;
-               atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
-               new_hrs += dd->ipath_eep_hrs;
-               if (new_hrs > 0xFFFF)
-                       new_hrs = 0xFFFF;
-               dd->ipath_eep_hrs = new_hrs;
-               if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
-                       ifp->if_powerhour[0] = new_hrs & 0xFF;
-                       hi_water = offsetof(struct ipath_flash, if_powerhour);
-               }
-               if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
-                       ifp->if_powerhour[1] = new_hrs >> 8;
-                       hi_water = offsetof(struct ipath_flash, if_powerhour)
-                                       + 1;
-               }
-       }
-       /*
-        * There is a tiny possibility that we could somehow fail to write
-        * the EEPROM after updating our shadows, but problems from holding
-        * the spinlock too long are a much bigger issue.
-        */
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-       if (hi_water) {
-               /* we made some change to the data, uopdate cksum and write */
-               csum = flash_csum(ifp, 1);
-               ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
-       }
-       mutex_unlock(&dd->ipath_eep_lock);
-       if (ret)
-               ipath_dev_err(dd, "Failed updating EEPROM\n");
-
-free_bail:
-       vfree(buf);
-bail:
-       return ret;
-
-}
-
-/**
- * ipath_inc_eeprom_err - increment one of the four error counters
- * that are logged to EEPROM.
- * @dd: the infinipath device
- * @eidx: 0..3, the counter to increment
- * @incr: how much to add
- *
- * Each counter is 8-bits, and saturates at 255 (0xFF). They
- * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
- * is called, but it can only be called in a context that allows sleep.
- * This function can be called even at interrupt level.
- */
-
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
-{
-       uint new_val;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
-       if (new_val > 255)
-               new_val = 255;
-       dd->ipath_eep_st_new_errs[eidx] = new_val;
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-       return;
-}
-
-static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
-{
-       int ret;
-       struct i2c_chain_desc *icd;
-
-       ret = -ENOENT;
-
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->temp_dev == IPATH_NO_DEV) {
-               /* tempsense only exists on new, real-I2C boards */
-               ret = -ENXIO;
-               goto bail;
-       }
-
-       if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-               ipath_dbg("Failed tempsense startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, regnum);
-       stop_cmd(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
-                             regnum);
-               ret = -ENXIO;
-               goto bail;
-       }
-       if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
-               ipath_dbg("Failed tempsense RD startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       /*
-        * We can only clock out one byte per command, sensibly
-        */
-       ret = rd_byte(dd);
-       stop_cmd(dd);
-
-bail:
-       return ret;
-}
-
-#define VALID_TS_RD_REG_MASK 0xBF
-
-/**
- * ipath_tempsense_read - read register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to read from
- *
- * returns reg contents (0..255) or < 0 for error
- */
-int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
-{
-       int ret;
-
-       if (regnum > 7)
-               return -EINVAL;
-
-       /* return a bogus value for (the one) register we do not have */
-       if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
-               return 0;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_tempsense_internal_read(dd, regnum);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       /*
-        * There are three possibilities here:
-        * ret is actual value (0..255)
-        * ret is -ENXIO or -EINVAL from code in this file
-        * ret is -EINTR from mutex_lock_interruptible.
-        */
-       return ret;
-}
-
-static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
-                                         u8 regnum, u8 data)
-{
-       int ret = -ENOENT;
-       struct i2c_chain_desc *icd;
-
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->temp_dev == IPATH_NO_DEV) {
-               /* tempsense only exists on new, real-I2C boards */
-               ret = -ENXIO;
-               goto bail;
-       }
-       if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-               ipath_dbg("Failed tempsense startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, regnum);
-       if (ret) {
-               stop_cmd(dd);
-               ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
-                             regnum);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, data);
-       stop_cmd(dd);
-       ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
-       if (ret) {
-               ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
-                             regnum);
-               ret = -ENXIO;
-       }
-
-bail:
-       return ret;
-}
-
-#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
-
-/**
- * ipath_tempsense_write - write register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to write
- * @data: data to write
- *
- * returns 0 for success or < 0 for error
- */
-int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
-{
-       int ret;
-
-       if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
-               return -EINVAL;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_tempsense_internal_write(dd, regnum, data);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       /*
-        * There are three possibilities here:
-        * ret is 0 for success
-        * ret is -ENXIO or -EINVAL from code in this file
-        * ret is -EINTR from mutex_lock_interruptible.
-        */
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
deleted file mode 100644 (file)
index 6187b84..0000000
+++ /dev/null
@@ -1,2619 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/cdev.h>
-#include <linux/swap.h>
-#include <linux/export.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/jiffies.h>
-#include <linux/cpu.h>
-#include <linux/uio.h>
-#include <asm/pgtable.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-#include "ipath_user_sdma.h"
-
-static int ipath_open(struct inode *, struct file *);
-static int ipath_close(struct inode *, struct file *);
-static ssize_t ipath_write(struct file *, const char __user *, size_t,
-                          loff_t *);
-static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from);
-static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
-static int ipath_mmap(struct file *, struct vm_area_struct *);
-
-/*
- * This is really, really weird shit - write() and writev() here
- * have completely unrelated semantics.  Sucky userland ABI,
- * film at 11.
- */
-static const struct file_operations ipath_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_write,
-       .write_iter = ipath_write_iter,
-       .open = ipath_open,
-       .release = ipath_close,
-       .poll = ipath_poll,
-       .mmap = ipath_mmap,
-       .llseek = noop_llseek,
-};
-
-/*
- * Convert kernel virtual addresses to physical addresses so they don't
- * potentially conflict with the chip addresses used as mmap offsets.
- * It doesn't really matter what mmap offset we use as long as we can
- * interpret it correctly.
- */
-static u64 cvt_kvaddr(void *p)
-{
-       struct page *page;
-       u64 paddr = 0;
-
-       page = vmalloc_to_page(p);
-       if (page)
-               paddr = page_to_pfn(page) << PAGE_SHIFT;
-
-       return paddr;
-}
-
-static int ipath_get_base_info(struct file *fp,
-                              void __user *ubase, size_t ubase_size)
-{
-       struct ipath_portdata *pd = port_fp(fp);
-       int ret = 0;
-       struct ipath_base_info *kinfo = NULL;
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned subport_cnt;
-       int shared, master;
-       size_t sz;
-
-       subport_cnt = pd->port_subport_cnt;
-       if (!subport_cnt) {
-               shared = 0;
-               master = 0;
-               subport_cnt = 1;
-       } else {
-               shared = 1;
-               master = !subport_fp(fp);
-       }
-
-       sz = sizeof(*kinfo);
-       /* If port sharing is not requested, allow the old size structure */
-       if (!shared)
-               sz -= 7 * sizeof(u64);
-       if (ubase_size < sz) {
-               ipath_cdbg(PROC,
-                          "Base size %zu, need %zu (version mismatch?)\n",
-                          ubase_size, sz);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
-       if (kinfo == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       ret = dd->ipath_f_get_base_info(pd, kinfo);
-       if (ret < 0)
-               goto bail;
-
-       kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
-       kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
-       kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
-       kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
-       /*
-        * have to mmap whole thing
-        */
-       kinfo->spi_rcv_egrbuftotlen =
-               pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-       kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
-       kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
-               pd->port_rcvegrbuf_chunks;
-       kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
-       if (master)
-               kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
-       /*
-        * for this use, may be ipath_cfgports summed over all chips that
-        * are are configured and present
-        */
-       kinfo->spi_nports = dd->ipath_cfgports;
-       /* unit (chip/board) our port is on */
-       kinfo->spi_unit = dd->ipath_unit;
-       /* for now, only a single page */
-       kinfo->spi_tid_maxsize = PAGE_SIZE;
-
-       /*
-        * Doing this per port, and based on the skip value, etc.  This has
-        * to be the actual buffer size, since the protocol code treats it
-        * as an array.
-        *
-        * These have to be set to user addresses in the user code via mmap.
-        * These values are used on return to user code for the mmap target
-        * addresses only.  For 32 bit, same 44 bit address problem, so use
-        * the physical address, not virtual.  Before 2.6.11, using the
-        * page_address() macro worked, but in 2.6.11, even that returns the
-        * full 64 bit address (upper bits all 1's).  So far, using the
-        * physical addresses (or chip offsets, for chip mapping) works, but
-        * no doubt some future kernel release will change that, and we'll be
-        * on to yet another method of dealing with this.
-        */
-       kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
-       kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
-       kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
-       kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
-       kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
-               (void *) dd->ipath_statusp -
-               (void *) dd->ipath_pioavailregs_dma;
-       if (!shared) {
-               kinfo->spi_piocnt = pd->port_piocnt;
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs;
-               kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
-                       dd->ipath_ureg_align * pd->port_port;
-       } else if (master) {
-               kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
-                                   (pd->port_piocnt % subport_cnt);
-               /* Master's PIO buffers are after all the slave's */
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-                       dd->ipath_palign *
-                       (pd->port_piocnt - kinfo->spi_piocnt);
-       } else {
-               unsigned slave = subport_fp(fp) - 1;
-
-               kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-                       dd->ipath_palign * kinfo->spi_piocnt * slave;
-       }
-
-       if (shared) {
-               kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
-                       dd->ipath_ureg_align * pd->port_port;
-               kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
-               kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
-               kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
-
-               kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
-                       PAGE_SIZE * subport_fp(fp));
-
-               kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
-                       pd->port_rcvhdrq_size * subport_fp(fp));
-               kinfo->spi_rcvhdr_tailaddr = 0;
-               kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
-                       pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
-                       subport_fp(fp));
-
-               kinfo->spi_subport_uregbase =
-                       cvt_kvaddr(pd->subport_uregbase);
-               kinfo->spi_subport_rcvegrbuf =
-                       cvt_kvaddr(pd->subport_rcvegrbuf);
-               kinfo->spi_subport_rcvhdr_base =
-                       cvt_kvaddr(pd->subport_rcvhdr_base);
-               ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
-                       kinfo->spi_port, kinfo->spi_runtime_flags,
-                       (unsigned long long) kinfo->spi_subport_uregbase,
-                       (unsigned long long) kinfo->spi_subport_rcvegrbuf,
-                       (unsigned long long) kinfo->spi_subport_rcvhdr_base);
-       }
-
-       /*
-        * All user buffers are 2KB buffers.  If we ever support
-        * giving 4KB buffers to user processes, this will need some
-        * work.
-        */
-       kinfo->spi_pioindex = (kinfo->spi_piobufbase -
-               (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
-       kinfo->spi_pioalign = dd->ipath_palign;
-
-       kinfo->spi_qpair = IPATH_KD_QP;
-       /*
-        * user mode PIO buffers are always 2KB, even when 4KB can
-        * be received, and sent via the kernel; this is ibmaxlen
-        * for 2K MTU.
-        */
-       kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
-       kinfo->spi_mtu = dd->ipath_ibmaxlen;    /* maxlen, not ibmtu */
-       kinfo->spi_port = pd->port_port;
-       kinfo->spi_subport = subport_fp(fp);
-       kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
-       kinfo->spi_hw_version = dd->ipath_revision;
-
-       if (master) {
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
-       }
-
-       sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
-       if (copy_to_user(ubase, kinfo, sz))
-               ret = -EFAULT;
-
-bail:
-       kfree(kinfo);
-       return ret;
-}
-
-/**
- * ipath_tid_update - update a port TID
- * @pd: the port
- * @fp: the ipath device file
- * @ti: the TID information
- *
- * The new implementation as of Oct 2004 is that the driver assigns
- * the tid and returns it to the caller.   To make it easier to
- * catch bugs, and to reduce search time, we keep a cursor for
- * each port, walking the shadow tid array to find one that's not
- * in use.
- *
- * For now, if we can't allocate the full list, we fail, although
- * in the long run, we'll allocate as many as we can, and the
- * caller will deal with that by trying the remaining pages later.
- * That means that when we fail, we have to mark the tids as not in
- * use again, in our shadow copy.
- *
- * It's up to the caller to free the tids when they are done.
- * We'll unlock the pages as they free them.
- *
- * Also, right now we are locking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.
- */
-static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
-                           const struct ipath_tid_info *ti)
-{
-       int ret = 0, ntids;
-       u32 tid, porttid, cnt, i, tidcnt, tidoff;
-       u16 *tidlist;
-       struct ipath_devdata *dd = pd->port_dd;
-       u64 physaddr;
-       unsigned long vaddr;
-       u64 __iomem *tidbase;
-       unsigned long tidmap[8];
-       struct page **pagep = NULL;
-       unsigned subport = subport_fp(fp);
-
-       if (!dd->ipath_pageshadow) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       cnt = ti->tidcnt;
-       if (!cnt) {
-               ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
-                         (unsigned long long) ti->tidlist);
-               /*
-                * Should we treat as success?  likely a bug
-                */
-               ret = -EFAULT;
-               goto done;
-       }
-       porttid = pd->port_port * dd->ipath_rcvtidcnt;
-       if (!pd->port_subport_cnt) {
-               tidcnt = dd->ipath_rcvtidcnt;
-               tid = pd->port_tidcursor;
-               tidoff = 0;
-       } else if (!subport) {
-               tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-                        (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-               tidoff = dd->ipath_rcvtidcnt - tidcnt;
-               porttid += tidoff;
-               tid = tidcursor_fp(fp);
-       } else {
-               tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-               tidoff = tidcnt * (subport - 1);
-               porttid += tidoff;
-               tid = tidcursor_fp(fp);
-       }
-       if (cnt > tidcnt) {
-               /* make sure it all fits in port_tid_pg_list */
-               dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
-                        "TIDs, only trying max (%u)\n", cnt, tidcnt);
-               cnt = tidcnt;
-       }
-       pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
-       tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
-
-       memset(tidmap, 0, sizeof(tidmap));
-       /* before decrement; chip actual # */
-       ntids = tidcnt;
-       tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  porttid * sizeof(*tidbase));
-
-       ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
-                  pd->port_port, cnt, tid, tidbase);
-
-       /* virtual address of first page in transfer */
-       vaddr = ti->tidvaddr;
-       if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
-                      cnt * PAGE_SIZE)) {
-               ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
-                         (void *)vaddr, cnt);
-               ret = -EFAULT;
-               goto done;
-       }
-       ret = ipath_get_user_pages(vaddr, cnt, pagep);
-       if (ret) {
-               if (ret == -EBUSY) {
-                       ipath_dbg("Failed to lock addr %p, %u pages "
-                                 "(already locked)\n",
-                                 (void *) vaddr, cnt);
-                       /*
-                        * for now, continue, and see what happens but with
-                        * the new implementation, this should never happen,
-                        * unless perhaps the user has mpin'ed the pages
-                        * themselves (something we need to test)
-                        */
-                       ret = 0;
-               } else {
-                       dev_info(&dd->pcidev->dev,
-                                "Failed to lock addr %p, %u pages: "
-                                "errno %d\n", (void *) vaddr, cnt, -ret);
-                       goto done;
-               }
-       }
-       for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
-               for (; ntids--; tid++) {
-                       if (tid == tidcnt)
-                               tid = 0;
-                       if (!dd->ipath_pageshadow[porttid + tid])
-                               break;
-               }
-               if (ntids < 0) {
-                       /*
-                        * oops, wrapped all the way through their TIDs,
-                        * and didn't have enough free; see comments at
-                        * start of routine
-                        */
-                       ipath_dbg("Not enough free TIDs for %u pages "
-                                 "(index %d), failing\n", cnt, i);
-                       i--;    /* last tidlist[i] not filled in */
-                       ret = -ENOMEM;
-                       break;
-               }
-               tidlist[i] = tid + tidoff;
-               ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
-                          "vaddr %lx\n", i, tid + tidoff, vaddr);
-               /* we "know" system pages and TID pages are same size */
-               dd->ipath_pageshadow[porttid + tid] = pagep[i];
-               dd->ipath_physshadow[porttid + tid] = ipath_map_page(
-                       dd->pcidev, pagep[i], 0, PAGE_SIZE,
-                       PCI_DMA_FROMDEVICE);
-               /*
-                * don't need atomic or it's overhead
-                */
-               __set_bit(tid, tidmap);
-               physaddr = dd->ipath_physshadow[porttid + tid];
-               ipath_stats.sps_pagelocks++;
-               ipath_cdbg(VERBOSE,
-                          "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
-                          tid, vaddr, (unsigned long long) physaddr,
-                          pagep[i]);
-               dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
-                                   physaddr);
-               /*
-                * don't check this tid in ipath_portshadow, since we
-                * just filled it in; start with the next one.
-                */
-               tid++;
-       }
-
-       if (ret) {
-               u32 limit;
-       cleanup:
-               /* jump here if copy out of updated info failed... */
-               ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
-                         -ret, i, cnt);
-               /* same code that's in ipath_free_tid() */
-               limit = sizeof(tidmap) * BITS_PER_BYTE;
-               if (limit > tidcnt)
-                       /* just in case size changes in future */
-                       limit = tidcnt;
-               tid = find_first_bit((const unsigned long *)tidmap, limit);
-               for (; tid < limit; tid++) {
-                       if (!test_bit(tid, tidmap))
-                               continue;
-                       if (dd->ipath_pageshadow[porttid + tid]) {
-                               ipath_cdbg(VERBOSE, "Freeing TID %u\n",
-                                          tid);
-                               dd->ipath_f_put_tid(dd, &tidbase[tid],
-                                                   RCVHQ_RCV_TYPE_EXPECTED,
-                                                   dd->ipath_tidinvalid);
-                               pci_unmap_page(dd->pcidev,
-                                       dd->ipath_physshadow[porttid + tid],
-                                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                               dd->ipath_pageshadow[porttid + tid] = NULL;
-                               ipath_stats.sps_pageunlocks++;
-                       }
-               }
-               ipath_release_user_pages(pagep, cnt);
-       } else {
-               /*
-                * Copy the updated array, with ipath_tid's filled in, back
-                * to user.  Since we did the copy in already, this "should
-                * never fail" If it does, we have to clean up...
-                */
-               if (copy_to_user((void __user *)
-                                (unsigned long) ti->tidlist,
-                                tidlist, cnt * sizeof(*tidlist))) {
-                       ret = -EFAULT;
-                       goto cleanup;
-               }
-               if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
-                                tidmap, sizeof tidmap)) {
-                       ret = -EFAULT;
-                       goto cleanup;
-               }
-               if (tid == tidcnt)
-                       tid = 0;
-               if (!pd->port_subport_cnt)
-                       pd->port_tidcursor = tid;
-               else
-                       tidcursor_fp(fp) = tid;
-       }
-
-done:
-       if (ret)
-               ipath_dbg("Failed to map %u TID pages, failing with %d\n",
-                         ti->tidcnt, -ret);
-       return ret;
-}
-
-/**
- * ipath_tid_free - free a port TID
- * @pd: the port
- * @subport: the subport
- * @ti: the TID info
- *
- * right now we are unlocking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.  We check that the TID is in range for this port
- * but otherwise don't check validity; if user has an error and
- * frees the wrong tid, it's only their own data that can thereby
- * be corrupted.  We do check that the TID was in use, for sanity
- * We always use our idea of the saved address, not the address that
- * they pass in to us.
- */
-
-static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
-                         const struct ipath_tid_info *ti)
-{
-       int ret = 0;
-       u32 tid, porttid, cnt, limit, tidcnt;
-       struct ipath_devdata *dd = pd->port_dd;
-       u64 __iomem *tidbase;
-       unsigned long tidmap[8];
-
-       if (!dd->ipath_pageshadow) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
-                          sizeof tidmap)) {
-               ret = -EFAULT;
-               goto done;
-       }
-
-       porttid = pd->port_port * dd->ipath_rcvtidcnt;
-       if (!pd->port_subport_cnt)
-               tidcnt = dd->ipath_rcvtidcnt;
-       else if (!subport) {
-               tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-                        (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-               porttid += dd->ipath_rcvtidcnt - tidcnt;
-       } else {
-               tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-               porttid += tidcnt * (subport - 1);
-       }
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  porttid * sizeof(*tidbase));
-
-       limit = sizeof(tidmap) * BITS_PER_BYTE;
-       if (limit > tidcnt)
-               /* just in case size changes in future */
-               limit = tidcnt;
-       tid = find_first_bit(tidmap, limit);
-       ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
-                  "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
-                  limit, tid, porttid);
-       for (cnt = 0; tid < limit; tid++) {
-               /*
-                * small optimization; if we detect a run of 3 or so without
-                * any set, use find_first_bit again.  That's mainly to
-                * accelerate the case where we wrapped, so we have some at
-                * the beginning, and some at the end, and a big gap
-                * in the middle.
-                */
-               if (!test_bit(tid, tidmap))
-                       continue;
-               cnt++;
-               if (dd->ipath_pageshadow[porttid + tid]) {
-                       struct page *p;
-                       p = dd->ipath_pageshadow[porttid + tid];
-                       dd->ipath_pageshadow[porttid + tid] = NULL;
-                       ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
-                                  pid_nr(pd->port_pid), tid);
-                       dd->ipath_f_put_tid(dd, &tidbase[tid],
-                                           RCVHQ_RCV_TYPE_EXPECTED,
-                                           dd->ipath_tidinvalid);
-                       pci_unmap_page(dd->pcidev,
-                               dd->ipath_physshadow[porttid + tid],
-                               PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                       ipath_release_user_pages(&p, 1);
-                       ipath_stats.sps_pageunlocks++;
-               } else
-                       ipath_dbg("Unused tid %u, ignoring\n", tid);
-       }
-       if (cnt != ti->tidcnt)
-               ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
-                         ti->tidcnt, cnt);
-done:
-       if (ret)
-               ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
-                         ti->tidcnt, -ret);
-       return ret;
-}
-
-/**
- * ipath_set_part_key - set a partition key
- * @pd: the port
- * @key: the key
- *
- * We can have up to 4 active at a time (other than the default, which is
- * always allowed).  This is somewhat tricky, since multiple ports may set
- * the same key, so we reference count them, and clean up at exit.  All 4
- * partition keys are packed into a single infinipath register.  It's an
- * error for a process to set the same pkey multiple times.  We provide no
- * mechanism to de-allocate a pkey at this time, we may eventually need to
- * do that.  I've used the atomic operations, and no locking, and only make
- * a single pass through what's available.  This should be more than
- * adequate for some time. I'll think about spinlocks or the like if and as
- * it's necessary.
- */
-static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       int i, any = 0, pidx = -1;
-       u16 lkey = key & 0x7FFF;
-       int ret;
-
-       if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
-               /* nothing to do; this key always valid */
-               ret = 0;
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
-                  "%hx:%x %hx:%x %hx:%x %hx:%x\n",
-                  pd->port_port, key, dd->ipath_pkeys[0],
-                  atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
-                  atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
-                  atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
-                  atomic_read(&dd->ipath_pkeyrefs[3]));
-
-       if (!lkey) {
-               ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
-                          pd->port_port);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * Set the full membership bit, because it has to be
-        * set in the register or the packet, and it seems
-        * cleaner to set in the register than to force all
-        * callers to set it. (see bug 4331)
-        */
-       key |= 0x8000;
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               if (!pd->port_pkeys[i] && pidx == -1)
-                       pidx = i;
-               if (pd->port_pkeys[i] == key) {
-                       ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
-                                  "(%x) more than once\n",
-                                  pd->port_port, key);
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (pidx == -1) {
-               ipath_dbg("All pkeys for port %u already in use, "
-                         "can't set %x\n", pd->port_port, key);
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i]) {
-                       any++;
-                       continue;
-               }
-               if (dd->ipath_pkeys[i] == key) {
-                       atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
-
-                       if (atomic_inc_return(pkrefs) > 1) {
-                               pd->port_pkeys[pidx] = key;
-                               ipath_cdbg(VERBOSE, "p%u set key %x "
-                                          "matches #%d, count now %d\n",
-                                          pd->port_port, key, i,
-                                          atomic_read(pkrefs));
-                               ret = 0;
-                               goto bail;
-                       } else {
-                               /*
-                                * lost race, decrement count, catch below
-                                */
-                               atomic_dec(pkrefs);
-                               ipath_cdbg(VERBOSE, "Lost race, count was "
-                                          "0, after dec, it's %d\n",
-                                          atomic_read(pkrefs));
-                               any++;
-                       }
-               }
-               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-                       /*
-                        * It makes no sense to have both the limited and
-                        * full membership PKEY set at the same time since
-                        * the unlimited one will disable the limited one.
-                        */
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (!any) {
-               ipath_dbg("port %u, all pkeys already in use, "
-                         "can't set %x\n", pd->port_port, key);
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i] &&
-                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-                       u64 pkey;
-
-                       /* for ipathstats, etc. */
-                       ipath_stats.sps_pkeys[i] = lkey;
-                       pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
-                       pkey =
-                               (u64) dd->ipath_pkeys[0] |
-                               ((u64) dd->ipath_pkeys[1] << 16) |
-                               ((u64) dd->ipath_pkeys[2] << 32) |
-                               ((u64) dd->ipath_pkeys[3] << 48);
-                       ipath_cdbg(PROC, "p%u set key %x in #%d, "
-                                  "portidx %d, new pkey reg %llx\n",
-                                  pd->port_port, key, i, pidx,
-                                  (unsigned long long) pkey);
-                       ipath_write_kreg(
-                               dd, dd->ipath_kregs->kr_partitionkey, pkey);
-
-                       ret = 0;
-                       goto bail;
-               }
-       }
-       ipath_dbg("port %u, all pkeys already in use 2nd pass, "
-                 "can't set %x\n", pd->port_port, key);
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_manage_rcvq - manage a port's receive queue
- * @pd: the port
- * @subport: the subport
- * @start_stop: action to carry out
- *
- * start_stop == 0 disables receive on the port, for use in queue
- * overflow conditions.  start_stop==1 re-enables, to be used to
- * re-init the software copy of the head register
- */
-static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
-                            int start_stop)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-
-       ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
-                  start_stop ? "en" : "dis", dd->ipath_unit,
-                  pd->port_port, subport);
-       if (subport)
-               goto bail;
-       /* atomically clear receive enable port. */
-       if (start_stop) {
-               /*
-                * On enable, force in-memory copy of the tail register to
-                * 0, so that protocol code doesn't have to worry about
-                * whether or not the chip has yet updated the in-memory
-                * copy or not on return from the system call. The chip
-                * always resets it's tail register back to 0 on a
-                * transition from disabled to enabled.  This could cause a
-                * problem if software was broken, and did the enable w/o
-                * the disable, but eventually the in-memory copy will be
-                * updated and correct itself, even in the face of software
-                * bugs.
-                */
-               if (pd->port_rcvhdrtail_kvaddr)
-                       ipath_clear_rcvhdrtail(pd);
-               set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-                       &dd->ipath_rcvctrl);
-       } else
-               clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
-                         &dd->ipath_rcvctrl);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-       /* now be sure chip saw it before we return */
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       if (start_stop) {
-               /*
-                * And try to be sure that tail reg update has happened too.
-                * This should in theory interlock with the RXE changes to
-                * the tail register.  Don't assign it to the tail register
-                * in memory copy, since we could overwrite an update by the
-                * chip if we did.
-                */
-               ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-       }
-       /* always; new head should be equal to new tail; see above */
-bail:
-       return 0;
-}
-
-static void ipath_clean_part_key(struct ipath_portdata *pd,
-                                struct ipath_devdata *dd)
-{
-       int i, j, pchanged = 0;
-       u64 oldpkey;
-
-       /* for debugging only */
-       oldpkey = (u64) dd->ipath_pkeys[0] |
-               ((u64) dd->ipath_pkeys[1] << 16) |
-               ((u64) dd->ipath_pkeys[2] << 32) |
-               ((u64) dd->ipath_pkeys[3] << 48);
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               if (!pd->port_pkeys[i])
-                       continue;
-               ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
-                          pd->port_pkeys[i]);
-               for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
-                       /* check for match independent of the global bit */
-                       if ((dd->ipath_pkeys[j] & 0x7fff) !=
-                           (pd->port_pkeys[i] & 0x7fff))
-                               continue;
-                       if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
-                               ipath_cdbg(VERBOSE, "p%u clear key "
-                                          "%x matches #%d\n",
-                                          pd->port_port,
-                                          pd->port_pkeys[i], j);
-                               ipath_stats.sps_pkeys[j] =
-                                       dd->ipath_pkeys[j] = 0;
-                               pchanged++;
-                       } else {
-                               ipath_cdbg(VERBOSE, "p%u key %x matches #%d, "
-                                          "but ref still %d\n", pd->port_port,
-                                          pd->port_pkeys[i], j,
-                                          atomic_read(&dd->ipath_pkeyrefs[j]));
-                               break;
-                       }
-               }
-               pd->port_pkeys[i] = 0;
-       }
-       if (pchanged) {
-               u64 pkey = (u64) dd->ipath_pkeys[0] |
-                       ((u64) dd->ipath_pkeys[1] << 16) |
-                       ((u64) dd->ipath_pkeys[2] << 32) |
-                       ((u64) dd->ipath_pkeys[3] << 48);
-               ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
-                          "new pkey reg %llx\n", pd->port_port,
-                          (unsigned long long) oldpkey,
-                          (unsigned long long) pkey);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-                                pkey);
-       }
-}
-
-/*
- * Initialize the port data with the receive buffer sizes
- * so this can be done while the master port is locked.
- * Otherwise, there is a race with a slave opening the port
- * and seeing these fields uninitialized.
- */
-static void init_user_egr_sizes(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned egrperchunk, egrcnt, size;
-
-       /*
-        * to avoid wasting a lot of memory, we allocate 32KB chunks of
-        * physically contiguous memory, advance through it until used up
-        * and then allocate more.  Of course, we need memory to store those
-        * extra pointers, now.  Started out with 256KB, but under heavy
-        * memory pressure (creating large files and then copying them over
-        * NFS while doing lots of MPI jobs), we hit some allocation
-        * failures, even though we can sleep...  (2.6.10) Still get
-        * failures at 64K.  32K is the lowest we can go without wasting
-        * additional memory.
-        */
-       size = 0x8000;
-       egrperchunk = size / dd->ipath_rcvegrbufsize;
-       egrcnt = dd->ipath_rcvegrcnt;
-       pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
-       pd->port_rcvegrbufs_perchunk = egrperchunk;
-       pd->port_rcvegrbuf_size = size;
-}
-
-/**
- * ipath_create_user_egr - allocate eager TID buffers
- * @pd: the port to allocate TID buffers for
- *
- * This routine is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance
- * This is the user port version
- *
- * Allocate the eager TID buffers and program them into infinipath
- * They are no longer completely contiguous, we do multiple allocation
- * calls.
- */
-static int ipath_create_user_egr(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
-       size_t size;
-       int ret;
-       gfp_t gfp_flags;
-
-       /*
-        * GFP_USER, but without GFP_FS, so buffer cache can be
-        * coalesced (we hope); otherwise, even at order 4,
-        * heavy filesystem activity makes these fail, and we can
-        * use compound pages.
-        */
-       gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
-       egrcnt = dd->ipath_rcvegrcnt;
-       /* TID number offset for this port */
-       egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
-       egrsize = dd->ipath_rcvegrbufsize;
-       ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
-                  "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
-
-       chunk = pd->port_rcvegrbuf_chunks;
-       egrperchunk = pd->port_rcvegrbufs_perchunk;
-       size = pd->port_rcvegrbuf_size;
-       pd->port_rcvegrbuf = kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf[0]),
-                                          GFP_KERNEL);
-       if (!pd->port_rcvegrbuf) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       pd->port_rcvegrbuf_phys =
-               kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf_phys[0]),
-                             GFP_KERNEL);
-       if (!pd->port_rcvegrbuf_phys) {
-               ret = -ENOMEM;
-               goto bail_rcvegrbuf;
-       }
-       for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-
-               pd->port_rcvegrbuf[e] = dma_alloc_coherent(
-                       &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
-                       gfp_flags);
-
-               if (!pd->port_rcvegrbuf[e]) {
-                       ret = -ENOMEM;
-                       goto bail_rcvegrbuf_phys;
-               }
-       }
-
-       pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
-
-       for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
-               dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
-               unsigned i;
-
-               for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
-                       dd->ipath_f_put_tid(dd, e + egroff +
-                                           (u64 __iomem *)
-                                           ((char __iomem *)
-                                            dd->ipath_kregbase +
-                                            dd->ipath_rcvegrbase),
-                                           RCVHQ_RCV_TYPE_EAGER, pa);
-                       pa += egrsize;
-               }
-               cond_resched(); /* don't hog the cpu */
-       }
-
-       ret = 0;
-       goto bail;
-
-bail_rcvegrbuf_phys:
-       for (e = 0; e < pd->port_rcvegrbuf_chunks &&
-               pd->port_rcvegrbuf[e]; e++) {
-               dma_free_coherent(&dd->pcidev->dev, size,
-                                 pd->port_rcvegrbuf[e],
-                                 pd->port_rcvegrbuf_phys[e]);
-
-       }
-       kfree(pd->port_rcvegrbuf_phys);
-       pd->port_rcvegrbuf_phys = NULL;
-bail_rcvegrbuf:
-       kfree(pd->port_rcvegrbuf);
-       pd->port_rcvegrbuf = NULL;
-bail:
-       return ret;
-}
-
-
-/* common code for the mappings on dma_alloc_coherent mem */
-static int ipath_mmap_mem(struct vm_area_struct *vma,
-       struct ipath_portdata *pd, unsigned len, int write_ok,
-       void *kvaddr, char *what)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned long pfn;
-       int ret;
-
-       if ((vma->vm_end - vma->vm_start) > len) {
-               dev_info(&dd->pcidev->dev,
-                        "FAIL on %s: len %lx > %x\n", what,
-                        vma->vm_end - vma->vm_start, len);
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       if (!write_ok) {
-               if (vma->vm_flags & VM_WRITE) {
-                       dev_info(&dd->pcidev->dev,
-                                "%s must be mapped readonly\n", what);
-                       ret = -EPERM;
-                       goto bail;
-               }
-
-               /* don't allow them to later change with mprotect */
-               vma->vm_flags &= ~VM_MAYWRITE;
-       }
-
-       pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
-       ret = remap_pfn_range(vma, vma->vm_start, pfn,
-                             len, vma->vm_page_prot);
-       if (ret)
-               dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
-                        "bytes r%c failed: %d\n", what, pd->port_port,
-                        pfn, len, write_ok?'w':'o', ret);
-       else
-               ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
-                          "r%c\n", what, pd->port_port, pfn, len,
-                          write_ok?'w':'o');
-bail:
-       return ret;
-}
-
-static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
-                    u64 ureg)
-{
-       unsigned long phys;
-       int ret;
-
-       /*
-        * This is real hardware, so use io_remap.  This is the mechanism
-        * for the user process to update the head registers for their port
-        * in the chip.
-        */
-       if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
-               dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
-                        "%lx > PAGE\n", vma->vm_end - vma->vm_start);
-               ret = -EFAULT;
-       } else {
-               phys = dd->ipath_physaddr + ureg;
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-               vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-               ret = io_remap_pfn_range(vma, vma->vm_start,
-                                        phys >> PAGE_SHIFT,
-                                        vma->vm_end - vma->vm_start,
-                                        vma->vm_page_prot);
-       }
-       return ret;
-}
-
-static int mmap_piobufs(struct vm_area_struct *vma,
-                       struct ipath_devdata *dd,
-                       struct ipath_portdata *pd,
-                       unsigned piobufs, unsigned piocnt)
-{
-       unsigned long phys;
-       int ret;
-
-       /*
-        * When we map the PIO buffers in the chip, we want to map them as
-        * writeonly, no read possible.   This prevents access to previous
-        * process data, and catches users who might try to read the i/o
-        * space due to a bug.
-        */
-       if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
-               dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
-                        "reqlen %lx > PAGE\n",
-                        vma->vm_end - vma->vm_start);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       phys = dd->ipath_physaddr + piobufs;
-
-#if defined(__powerpc__)
-       /* There isn't a generic way to specify writethrough mappings */
-       pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
-       pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
-       pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
-#endif
-
-       /*
-        * don't allow them to later change to readable with mprotect (for when
-        * not initially mapped readable, as is normally the case)
-        */
-       vma->vm_flags &= ~VM_MAYREAD;
-       vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-
-       ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
-                                vma->vm_end - vma->vm_start,
-                                vma->vm_page_prot);
-bail:
-       return ret;
-}
-
-static int mmap_rcvegrbufs(struct vm_area_struct *vma,
-                          struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned long start, size;
-       size_t total_size, i;
-       unsigned long pfn;
-       int ret;
-
-       size = pd->port_rcvegrbuf_size;
-       total_size = pd->port_rcvegrbuf_chunks * size;
-       if ((vma->vm_end - vma->vm_start) > total_size) {
-               dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
-                        "reqlen %lx > actual %lx\n",
-                        vma->vm_end - vma->vm_start,
-                        (unsigned long) total_size);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (vma->vm_flags & VM_WRITE) {
-               dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
-                        "writable (flags=%lx)\n", vma->vm_flags);
-               ret = -EPERM;
-               goto bail;
-       }
-       /* don't allow them to later change to writeable with mprotect */
-       vma->vm_flags &= ~VM_MAYWRITE;
-
-       start = vma->vm_start;
-
-       for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
-               pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
-               ret = remap_pfn_range(vma, start, pfn, size,
-                                     vma->vm_page_prot);
-               if (ret < 0)
-                       goto bail;
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/*
- * ipath_file_vma_fault - handle a VMA page fault.
- */
-static int ipath_file_vma_fault(struct vm_area_struct *vma,
-                                       struct vm_fault *vmf)
-{
-       struct page *page;
-
-       page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
-       if (!page)
-               return VM_FAULT_SIGBUS;
-       get_page(page);
-       vmf->page = page;
-
-       return 0;
-}
-
-static const struct vm_operations_struct ipath_file_vm_ops = {
-       .fault = ipath_file_vma_fault,
-};
-
-static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
-                      struct ipath_portdata *pd, unsigned subport)
-{
-       unsigned long len;
-       struct ipath_devdata *dd;
-       void *addr;
-       size_t size;
-       int ret = 0;
-
-       /* If the port is not shared, all addresses should be physical */
-       if (!pd->port_subport_cnt)
-               goto bail;
-
-       dd = pd->port_dd;
-       size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-
-       /*
-        * Each process has all the subport uregbase, rcvhdrq, and
-        * rcvegrbufs mmapped - as an array for all the processes,
-        * and also separately for this process.
-        */
-       if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
-               addr = pd->subport_uregbase;
-               size = PAGE_SIZE * pd->port_subport_cnt;
-       } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
-               addr = pd->subport_rcvhdr_base;
-               size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
-       } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
-               addr = pd->subport_rcvegrbuf;
-               size *= pd->port_subport_cnt;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
-                                        PAGE_SIZE * subport)) {
-                addr = pd->subport_uregbase + PAGE_SIZE * subport;
-                size = PAGE_SIZE;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
-                                pd->port_rcvhdrq_size * subport)) {
-                addr = pd->subport_rcvhdr_base +
-                        pd->port_rcvhdrq_size * subport;
-                size = pd->port_rcvhdrq_size;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
-                               size * subport)) {
-                addr = pd->subport_rcvegrbuf + size * subport;
-                /* rcvegrbufs are read-only on the slave */
-                if (vma->vm_flags & VM_WRITE) {
-                        dev_info(&dd->pcidev->dev,
-                                 "Can't map eager buffers as "
-                                 "writable (flags=%lx)\n", vma->vm_flags);
-                        ret = -EPERM;
-                        goto bail;
-                }
-                /*
-                 * Don't allow permission to later change to writeable
-                 * with mprotect.
-                 */
-                vma->vm_flags &= ~VM_MAYWRITE;
-       } else {
-               goto bail;
-       }
-       len = vma->vm_end - vma->vm_start;
-       if (len > size) {
-               ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
-       vma->vm_ops = &ipath_file_vm_ops;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_mmap - mmap various structures into user space
- * @fp: the file pointer
- * @vma: the VM area
- *
- * We use this to have a shared buffer between the kernel and the user code
- * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
- * buffers in the chip.  We have the open and close entries so we can bump
- * the ref count and keep the driver from being unloaded while still mapped.
- */
-static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-       struct ipath_portdata *pd;
-       struct ipath_devdata *dd;
-       u64 pgaddr, ureg;
-       unsigned piobufs, piocnt;
-       int ret;
-
-       pd = port_fp(fp);
-       if (!pd) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       dd = pd->port_dd;
-
-       /*
-        * This is the ipath_do_user_init() code, mapping the shared buffers
-        * into the user process. The address referred to by vm_pgoff is the
-        * file offset passed via mmap().  For shared ports, this is the
-        * kernel vmalloc() address of the pages to share with the master.
-        * For non-shared or master ports, this is a physical address.
-        * We only do one mmap for each space mapped.
-        */
-       pgaddr = vma->vm_pgoff << PAGE_SHIFT;
-
-       /*
-        * Check for 0 in case one of the allocations failed, but user
-        * called mmap anyway.
-        */
-       if (!pgaddr)  {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
-                  (unsigned long long) pgaddr, vma->vm_start,
-                  vma->vm_end - vma->vm_start, dd->ipath_unit,
-                  pd->port_port, subport_fp(fp));
-
-       /*
-        * Physical addresses must fit in 40 bits for our hardware.
-        * Check for kernel virtual addresses first, anything else must
-        * match a HW or memory address.
-        */
-       ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
-       if (ret) {
-               if (ret > 0)
-                       ret = 0;
-               goto bail;
-       }
-
-       ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
-       if (!pd->port_subport_cnt) {
-               /* port is not shared */
-               piocnt = pd->port_piocnt;
-               piobufs = pd->port_piobufs;
-       } else if (!subport_fp(fp)) {
-               /* caller is the master */
-               piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
-                        (pd->port_piocnt % pd->port_subport_cnt);
-               piobufs = pd->port_piobufs +
-                       dd->ipath_palign * (pd->port_piocnt - piocnt);
-       } else {
-               unsigned slave = subport_fp(fp) - 1;
-
-               /* caller is a slave */
-               piocnt = pd->port_piocnt / pd->port_subport_cnt;
-               piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
-       }
-
-       if (pgaddr == ureg)
-               ret = mmap_ureg(vma, dd, ureg);
-       else if (pgaddr == piobufs)
-               ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
-       else if (pgaddr == dd->ipath_pioavailregs_phys)
-               /* in-memory copy of pioavail registers */
-               ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-                                    (void *) dd->ipath_pioavailregs_dma,
-                                    "pioavail registers");
-       else if (pgaddr == pd->port_rcvegr_phys)
-               ret = mmap_rcvegrbufs(vma, pd);
-       else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
-               /*
-                * The rcvhdrq itself; readonly except on HT (so have
-                * to allow writable mapping), multiple pages, contiguous
-                * from an i/o perspective.
-                */
-               ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
-                                    pd->port_rcvhdrq,
-                                    "rcvhdrq");
-       else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
-               /* in-memory copy of rcvhdrq tail register */
-               ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-                                    pd->port_rcvhdrtail_kvaddr,
-                                    "rcvhdrq tail");
-       else
-               ret = -EINVAL;
-
-       vma->vm_private_data = NULL;
-
-       if (ret < 0)
-               dev_info(&dd->pcidev->dev,
-                        "Failure %d on off %llx len %lx\n",
-                        -ret, (unsigned long long)pgaddr,
-                        vma->vm_end - vma->vm_start);
-bail:
-       return ret;
-}
-
-static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
-{
-       unsigned pollflag = 0;
-
-       if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
-           pd->port_hdrqfull != pd->port_hdrqfull_poll) {
-               pollflag |= POLLIN | POLLRDNORM;
-               pd->port_hdrqfull_poll = pd->port_hdrqfull;
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
-                                     struct file *fp,
-                                     struct poll_table_struct *pt)
-{
-       unsigned pollflag = 0;
-       struct ipath_devdata *dd;
-
-       dd = pd->port_dd;
-
-       /* variable access in ipath_poll_hdrqfull() needs this */
-       rmb();
-       pollflag = ipath_poll_hdrqfull(pd);
-
-       if (pd->port_urgent != pd->port_urgent_poll) {
-               pollflag |= POLLIN | POLLRDNORM;
-               pd->port_urgent_poll = pd->port_urgent;
-       }
-
-       if (!pollflag) {
-               /* this saves a spin_lock/unlock in interrupt handler... */
-               set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
-               /* flush waiting flag so don't miss an event... */
-               wmb();
-               poll_wait(fp, &pd->port_wait, pt);
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll_next(struct ipath_portdata *pd,
-                                   struct file *fp,
-                                   struct poll_table_struct *pt)
-{
-       u32 head;
-       u32 tail;
-       unsigned pollflag = 0;
-       struct ipath_devdata *dd;
-
-       dd = pd->port_dd;
-
-       /* variable access in ipath_poll_hdrqfull() needs this */
-       rmb();
-       pollflag = ipath_poll_hdrqfull(pd);
-
-       head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
-       if (pd->port_rcvhdrtail_kvaddr)
-               tail = ipath_get_rcvhdrtail(pd);
-       else
-               tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-
-       if (head != tail)
-               pollflag |= POLLIN | POLLRDNORM;
-       else {
-               /* this saves a spin_lock/unlock in interrupt handler */
-               set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-               /* flush waiting flag so we don't miss an event */
-               wmb();
-
-               set_bit(pd->port_port + dd->ipath_r_intravail_shift,
-                       &dd->ipath_rcvctrl);
-
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                                dd->ipath_rcvctrl);
-
-               if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
-                       ipath_write_ureg(dd, ur_rcvhdrhead,
-                                        dd->ipath_rhdrhead_intr_off | head,
-                                        pd->port_port);
-
-               poll_wait(fp, &pd->port_wait, pt);
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll(struct file *fp,
-                              struct poll_table_struct *pt)
-{
-       struct ipath_portdata *pd;
-       unsigned pollflag;
-
-       pd = port_fp(fp);
-       if (!pd)
-               pollflag = 0;
-       else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
-               pollflag = ipath_poll_urgent(pd, fp, pt);
-       else
-               pollflag = ipath_poll_next(pd, fp, pt);
-
-       return pollflag;
-}
-
-static int ipath_supports_subports(int user_swmajor, int user_swminor)
-{
-       /* no subport implementation prior to software version 1.3 */
-       return (user_swmajor > 1) || (user_swminor >= 3);
-}
-
-static int ipath_compatible_subports(int user_swmajor, int user_swminor)
-{
-       /* this code is written long-hand for clarity */
-       if (IPATH_USER_SWMAJOR != user_swmajor) {
-               /* no promise of compatibility if major mismatch */
-               return 0;
-       }
-       if (IPATH_USER_SWMAJOR == 1) {
-               switch (IPATH_USER_SWMINOR) {
-               case 0:
-               case 1:
-               case 2:
-                       /* no subport implementation so cannot be compatible */
-                       return 0;
-               case 3:
-                       /* 3 is only compatible with itself */
-                       return user_swminor == 3;
-               default:
-                       /* >= 4 are compatible (or are expected to be) */
-                       return user_swminor >= 4;
-               }
-       }
-       /* make no promises yet for future major versions */
-       return 0;
-}
-
-static int init_subports(struct ipath_devdata *dd,
-                        struct ipath_portdata *pd,
-                        const struct ipath_user_info *uinfo)
-{
-       int ret = 0;
-       unsigned num_subports;
-       size_t size;
-
-       /*
-        * If the user is requesting zero subports,
-        * skip the subport allocation.
-        */
-       if (uinfo->spu_subport_cnt <= 0)
-               goto bail;
-
-       /* Self-consistency check for ipath_compatible_subports() */
-       if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
-           !ipath_compatible_subports(IPATH_USER_SWMAJOR,
-                                      IPATH_USER_SWMINOR)) {
-               dev_info(&dd->pcidev->dev,
-                        "Inconsistent ipath_compatible_subports()\n");
-               goto bail;
-       }
-
-       /* Check for subport compatibility */
-       if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
-                                      uinfo->spu_userversion & 0xffff)) {
-               dev_info(&dd->pcidev->dev,
-                        "Mismatched user version (%d.%d) and driver "
-                        "version (%d.%d) while port sharing. Ensure "
-                         "that driver and library are from the same "
-                         "release.\n",
-                        (int) (uinfo->spu_userversion >> 16),
-                         (int) (uinfo->spu_userversion & 0xffff),
-                        IPATH_USER_SWMAJOR,
-                        IPATH_USER_SWMINOR);
-               goto bail;
-       }
-       if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       num_subports = uinfo->spu_subport_cnt;
-       pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
-       if (!pd->subport_uregbase) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
-       size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-                    sizeof(u32), PAGE_SIZE) * num_subports;
-       pd->subport_rcvhdr_base = vzalloc(size);
-       if (!pd->subport_rcvhdr_base) {
-               ret = -ENOMEM;
-               goto bail_ureg;
-       }
-
-       pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
-                                       pd->port_rcvegrbuf_size *
-                                       num_subports);
-       if (!pd->subport_rcvegrbuf) {
-               ret = -ENOMEM;
-               goto bail_rhdr;
-       }
-
-       pd->port_subport_cnt = uinfo->spu_subport_cnt;
-       pd->port_subport_id = uinfo->spu_subport_id;
-       pd->active_slaves = 1;
-       set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-       goto bail;
-
-bail_rhdr:
-       vfree(pd->subport_rcvhdr_base);
-bail_ureg:
-       vfree(pd->subport_uregbase);
-       pd->subport_uregbase = NULL;
-bail:
-       return ret;
-}
-
-static int try_alloc_port(struct ipath_devdata *dd, int port,
-                         struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       struct ipath_portdata *pd;
-       int ret;
-
-       if (!(pd = dd->ipath_pd[port])) {
-               void *ptmp;
-
-               pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
-
-               /*
-                * Allocate memory for use in ipath_tid_update() just once
-                * at open, not per call.  Reduces cost of expected send
-                * setup.
-                */
-               ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
-                              dd->ipath_rcvtidcnt * sizeof(struct page **),
-                              GFP_KERNEL);
-               if (!pd || !ptmp) {
-                       ipath_dev_err(dd, "Unable to allocate portdata "
-                                     "memory, failing open\n");
-                       ret = -ENOMEM;
-                       kfree(pd);
-                       kfree(ptmp);
-                       goto bail;
-               }
-               dd->ipath_pd[port] = pd;
-               dd->ipath_pd[port]->port_port = port;
-               dd->ipath_pd[port]->port_dd = dd;
-               dd->ipath_pd[port]->port_tid_pg_list = ptmp;
-               init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
-       }
-       if (!pd->port_cnt) {
-               pd->userversion = uinfo->spu_userversion;
-               init_user_egr_sizes(pd);
-               if ((ret = init_subports(dd, pd, uinfo)) != 0)
-                       goto bail;
-               ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
-                          current->comm, current->pid, dd->ipath_unit,
-                          port);
-               pd->port_cnt = 1;
-               port_fp(fp) = pd;
-               pd->port_pid = get_pid(task_pid(current));
-               strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
-               ipath_stats.sps_ports++;
-               ret = 0;
-       } else
-               ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-static inline int usable(struct ipath_devdata *dd)
-{
-       return dd &&
-               (dd->ipath_flags & IPATH_PRESENT) &&
-               dd->ipath_kregbase &&
-               dd->ipath_lid &&
-               !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
-                                    | IPATH_LINKUNK));
-}
-
-static int find_free_port(int unit, struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       struct ipath_devdata *dd = ipath_lookup(unit);
-       int ret, i;
-
-       if (!dd) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (!usable(dd)) {
-               ret = -ENETDOWN;
-               goto bail;
-       }
-
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               ret = try_alloc_port(dd, i, fp, uinfo);
-               if (ret != -EBUSY)
-                       goto bail;
-       }
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-static int find_best_unit(struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       int ret = 0, i, prefunit = -1, devmax;
-       int maxofallports, npresent, nup;
-       int ndev;
-
-       devmax = ipath_count_units(&npresent, &nup, &maxofallports);
-
-       /*
-        * This code is present to allow a knowledgeable person to
-        * specify the layout of processes to processors before opening
-        * this driver, and then we'll assign the process to the "closest"
-        * InfiniPath chip to that processor (we assume reasonable connectivity,
-        * for now).  This code assumes that if affinity has been set
-        * before this point, that at most one cpu is set; for now this
-        * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
-        * in case some kernel variant sets none of the bits when no
-        * affinity is set.  2.6.11 and 12 kernels have all present
-        * cpus set.  Some day we'll have to fix it up further to handle
-        * a cpu subset.  This algorithm fails for two HT chips connected
-        * in tunnel fashion.  Eventually this needs real topology
-        * information.  There may be some issues with dual core numbering
-        * as well.  This needs more work prior to release.
-        */
-       if (!cpumask_empty(tsk_cpus_allowed(current)) &&
-           !cpumask_full(tsk_cpus_allowed(current))) {
-               int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
-               get_online_cpus();
-               for_each_online_cpu(i)
-                       if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
-                               ipath_cdbg(PROC, "%s[%u] affinity set for "
-                                          "cpu %d/%d\n", current->comm,
-                                          current->pid, i, ncpus);
-                               curcpu = i;
-                               nset++;
-                       }
-               put_online_cpus();
-               if (curcpu != -1 && nset != ncpus) {
-                       if (npresent) {
-                               prefunit = curcpu / (ncpus / npresent);
-                               ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
-                                         "%d cpus/chip, select unit %d\n",
-                                         current->comm, current->pid,
-                                         npresent, ncpus, ncpus / npresent,
-                                         prefunit);
-                       }
-               }
-       }
-
-       /*
-        * user ports start at 1, kernel port is 0
-        * For now, we do round-robin access across all chips
-        */
-
-       if (prefunit != -1)
-               devmax = prefunit + 1;
-recheck:
-       for (i = 1; i < maxofallports; i++) {
-               for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
-                    ndev++) {
-                       struct ipath_devdata *dd = ipath_lookup(ndev);
-
-                       if (!usable(dd))
-                               continue; /* can't use this unit */
-                       if (i >= dd->ipath_cfgports)
-                               /*
-                                * Maxed out on users of this unit. Try
-                                * next.
-                                */
-                               continue;
-                       ret = try_alloc_port(dd, i, fp, uinfo);
-                       if (!ret)
-                               goto done;
-               }
-       }
-
-       if (npresent) {
-               if (nup == 0) {
-                       ret = -ENETDOWN;
-                       ipath_dbg("No ports available (none initialized "
-                                 "and ready)\n");
-               } else {
-                       if (prefunit > 0) {
-                               /* if started above 0, retry from 0 */
-                               ipath_cdbg(PROC,
-                                          "%s[%u] no ports on prefunit "
-                                          "%d, clear and re-check\n",
-                                          current->comm, current->pid,
-                                          prefunit);
-                               devmax = ipath_count_units(NULL, NULL,
-                                                          NULL);
-                               prefunit = -1;
-                               goto recheck;
-                       }
-                       ret = -EBUSY;
-                       ipath_dbg("No ports available\n");
-               }
-       } else {
-               ret = -ENXIO;
-               ipath_dbg("No boards found\n");
-       }
-
-done:
-       return ret;
-}
-
-static int find_shared_port(struct file *fp,
-                           const struct ipath_user_info *uinfo)
-{
-       int devmax, ndev, i;
-       int ret = 0;
-
-       devmax = ipath_count_units(NULL, NULL, NULL);
-
-       for (ndev = 0; ndev < devmax; ndev++) {
-               struct ipath_devdata *dd = ipath_lookup(ndev);
-
-               if (!usable(dd))
-                       continue;
-               for (i = 1; i < dd->ipath_cfgports; i++) {
-                       struct ipath_portdata *pd = dd->ipath_pd[i];
-
-                       /* Skip ports which are not yet open */
-                       if (!pd || !pd->port_cnt)
-                               continue;
-                       /* Skip port if it doesn't match the requested one */
-                       if (pd->port_subport_id != uinfo->spu_subport_id)
-                               continue;
-                       /* Verify the sharing process matches the master */
-                       if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
-                           pd->userversion != uinfo->spu_userversion ||
-                           pd->port_cnt >= pd->port_subport_cnt) {
-                               ret = -EINVAL;
-                               goto done;
-                       }
-                       port_fp(fp) = pd;
-                       subport_fp(fp) = pd->port_cnt++;
-                       pd->port_subpid[subport_fp(fp)] =
-                               get_pid(task_pid(current));
-                       tidcursor_fp(fp) = 0;
-                       pd->active_slaves |= 1 << subport_fp(fp);
-                       ipath_cdbg(PROC,
-                                  "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
-                                  current->comm, current->pid,
-                                  subport_fp(fp),
-                                  pd->port_comm, pid_nr(pd->port_pid),
-                                  dd->ipath_unit, pd->port_port);
-                       ret = 1;
-                       goto done;
-               }
-       }
-
-done:
-       return ret;
-}
-
-static int ipath_open(struct inode *in, struct file *fp)
-{
-       /* The real work is performed later in ipath_assign_port() */
-       fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
-       return fp->private_data ? 0 : -ENOMEM;
-}
-
-/* Get port early, so can set affinity prior to memory allocation */
-static int ipath_assign_port(struct file *fp,
-                             const struct ipath_user_info *uinfo)
-{
-       int ret;
-       int i_minor;
-       unsigned swmajor, swminor;
-
-       /* Check to be sure we haven't already initialized this file */
-       if (port_fp(fp)) {
-               ret = -EINVAL;
-               goto done;
-       }
-
-       /* for now, if major version is different, bail */
-       swmajor = uinfo->spu_userversion >> 16;
-       if (swmajor != IPATH_USER_SWMAJOR) {
-               ipath_dbg("User major version %d not same as driver "
-                         "major %d\n", uinfo->spu_userversion >> 16,
-                         IPATH_USER_SWMAJOR);
-               ret = -ENODEV;
-               goto done;
-       }
-
-       swminor = uinfo->spu_userversion & 0xffff;
-       if (swminor != IPATH_USER_SWMINOR)
-               ipath_dbg("User minor version %d not same as driver "
-                         "minor %d\n", swminor, IPATH_USER_SWMINOR);
-
-       mutex_lock(&ipath_mutex);
-
-       if (ipath_compatible_subports(swmajor, swminor) &&
-           uinfo->spu_subport_cnt &&
-           (ret = find_shared_port(fp, uinfo))) {
-               if (ret > 0)
-                       ret = 0;
-               goto done_chk_sdma;
-       }
-
-       i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
-       ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-                  (long)file_inode(fp)->i_rdev, i_minor);
-
-       if (i_minor)
-               ret = find_free_port(i_minor - 1, fp, uinfo);
-       else
-               ret = find_best_unit(fp, uinfo);
-
-done_chk_sdma:
-       if (!ret) {
-               struct ipath_filedata *fd = fp->private_data;
-               const struct ipath_portdata *pd = fd->pd;
-               const struct ipath_devdata *dd = pd->port_dd;
-
-               fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
-                                                     dd->ipath_unit,
-                                                     pd->port_port,
-                                                     fd->subport);
-
-               if (!fd->pq)
-                       ret = -ENOMEM;
-       }
-
-       mutex_unlock(&ipath_mutex);
-
-done:
-       return ret;
-}
-
-
-static int ipath_do_user_init(struct file *fp,
-                             const struct ipath_user_info *uinfo)
-{
-       int ret;
-       struct ipath_portdata *pd = port_fp(fp);
-       struct ipath_devdata *dd;
-       u32 head32;
-
-       /* Subports don't need to initialize anything since master did it. */
-       if (subport_fp(fp)) {
-               ret = wait_event_interruptible(pd->port_wait,
-                       !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
-               goto done;
-       }
-
-       dd = pd->port_dd;
-
-       if (uinfo->spu_rcvhdrsize) {
-               ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
-               if (ret)
-                       goto done;
-       }
-
-       /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
-       /* some ports may get extra buffers, calculate that here */
-       if (pd->port_port <= dd->ipath_ports_extrabuf)
-               pd->port_piocnt = dd->ipath_pbufsport + 1;
-       else
-               pd->port_piocnt = dd->ipath_pbufsport;
-
-       /* for right now, kernel piobufs are at end, so port 1 is at 0 */
-       if (pd->port_port <= dd->ipath_ports_extrabuf)
-               pd->port_pio_base = (dd->ipath_pbufsport + 1)
-                       * (pd->port_port - 1);
-       else
-               pd->port_pio_base = dd->ipath_ports_extrabuf +
-                       dd->ipath_pbufsport * (pd->port_port - 1);
-       pd->port_piobufs = dd->ipath_piobufbase +
-               pd->port_pio_base * dd->ipath_palign;
-       ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
-               " first pio %u\n", pd->port_port, pd->port_piobufs,
-               pd->port_piocnt, pd->port_pio_base);
-       ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
-
-       /*
-        * Now allocate the rcvhdr Q and eager TIDs; skip the TID
-        * array for time being.  If pd->port_port > chip-supported,
-        * we need to do extra stuff here to handle by handling overflow
-        * through port 0, someday
-        */
-       ret = ipath_create_rcvhdrq(dd, pd);
-       if (!ret)
-               ret = ipath_create_user_egr(pd);
-       if (ret)
-               goto done;
-
-       /*
-        * set the eager head register for this port to the current values
-        * of the tail pointers, since we don't know if they were
-        * updated on last use of the port.
-        */
-       head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
-       ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
-       pd->port_lastrcvhdrqtail = -1;
-       ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
-               pd->port_port, head32);
-       pd->port_tidcursor = 0; /* start at beginning after open */
-
-       /* initialize poll variables... */
-       pd->port_urgent = 0;
-       pd->port_urgent_poll = 0;
-       pd->port_hdrqfull_poll = pd->port_hdrqfull;
-
-       /*
-        * Now enable the port for receive.
-        * For chips that are set to DMA the tail register to memory
-        * when they change (and when the update bit transitions from
-        * 0 to 1.  So for those chips, we turn it off and then back on.
-        * This will (very briefly) affect any other open ports, but the
-        * duration is very short, and therefore isn't an issue.  We
-        * explicitly set the in-memory tail copy to 0 beforehand, so we
-        * don't have to wait to be sure the DMA update has happened
-        * (chip resets head/tail to 0 on transition to enable).
-        */
-       set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-               &dd->ipath_rcvctrl);
-       if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-               if (pd->port_rcvhdrtail_kvaddr)
-                       ipath_clear_rcvhdrtail(pd);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                       dd->ipath_rcvctrl &
-                       ~(1ULL << dd->ipath_r_tailupd_shift));
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-       /* Notify any waiting slaves */
-       if (pd->port_subport_cnt) {
-               clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-               wake_up(&pd->port_wait);
-       }
-done:
-       return ret;
-}
-
-/**
- * unlock_exptid - unlock any expected TID entries port still had in use
- * @pd: port
- *
- * We don't actually update the chip here, because we do a bulk update
- * below, using ipath_f_clear_tids.
- */
-static void unlock_expected_tids(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
-       int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-
-       ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
-                  pd->port_port);
-       for (i = port_tidbase; i < maxtid; i++) {
-               struct page *ps = dd->ipath_pageshadow[i];
-
-               if (!ps)
-                       continue;
-
-               dd->ipath_pageshadow[i] = NULL;
-               pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
-                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-               ipath_release_user_pages_on_close(&ps, 1);
-               cnt++;
-               ipath_stats.sps_pageunlocks++;
-       }
-       if (cnt)
-               ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
-                          pd->port_port, cnt);
-
-       if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
-               ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
-                          (unsigned long long) ipath_stats.sps_pagelocks,
-                          (unsigned long long)
-                          ipath_stats.sps_pageunlocks);
-}
-
-static int ipath_close(struct inode *in, struct file *fp)
-{
-       struct ipath_filedata *fd;
-       struct ipath_portdata *pd;
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       unsigned port;
-       struct pid *pid;
-
-       ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
-                  (long)in->i_rdev, fp->private_data);
-
-       mutex_lock(&ipath_mutex);
-
-       fd = fp->private_data;
-       fp->private_data = NULL;
-       pd = fd->pd;
-       if (!pd) {
-               mutex_unlock(&ipath_mutex);
-               goto bail;
-       }
-
-       dd = pd->port_dd;
-
-       /* drain user sdma queue */
-       ipath_user_sdma_queue_drain(dd, fd->pq);
-       ipath_user_sdma_queue_destroy(fd->pq);
-
-       if (--pd->port_cnt) {
-               /*
-                * XXX If the master closes the port before the slave(s),
-                * revoke the mmap for the eager receive queue so
-                * the slave(s) don't wait for receive data forever.
-                */
-               pd->active_slaves &= ~(1 << fd->subport);
-               put_pid(pd->port_subpid[fd->subport]);
-               pd->port_subpid[fd->subport] = NULL;
-               mutex_unlock(&ipath_mutex);
-               goto bail;
-       }
-       /* early; no interrupt users after this */
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       port = pd->port_port;
-       dd->ipath_pd[port] = NULL;
-       pid = pd->port_pid;
-       pd->port_pid = NULL;
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-       if (pd->port_rcvwait_to || pd->port_piowait_to
-           || pd->port_rcvnowait || pd->port_pionowait) {
-               ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
-                          "%u rcv %u, pio already\n",
-                          pd->port_port, pd->port_rcvwait_to,
-                          pd->port_piowait_to, pd->port_rcvnowait,
-                          pd->port_pionowait);
-               pd->port_rcvwait_to = pd->port_piowait_to =
-                       pd->port_rcvnowait = pd->port_pionowait = 0;
-       }
-       if (pd->port_flag) {
-               ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
-                         pd->port_port, pd->port_flag);
-               pd->port_flag = 0;
-       }
-
-       if (dd->ipath_kregbase) {
-               /* atomically clear receive enable port and intr avail. */
-               clear_bit(dd->ipath_r_portenable_shift + port,
-                         &dd->ipath_rcvctrl);
-               clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
-                         &dd->ipath_rcvctrl);
-               ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
-                       dd->ipath_rcvctrl);
-               /* and read back from chip to be sure that nothing
-                * else is in flight when we do the rest */
-               (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-               /* clean up the pkeys for this port user */
-               ipath_clean_part_key(pd, dd);
-               /*
-                * be paranoid, and never write 0's to these, just use an
-                * unused part of the port 0 tail page.  Of course,
-                * rcvhdraddr points to a large chunk of memory, so this
-                * could still trash things, but at least it won't trash
-                * page 0, and by disabling the port, it should stop "soon",
-                * even if a packet or two is in already in flight after we
-                * disabled the port.
-                */
-               ipath_write_kreg_port(dd,
-                       dd->ipath_kregs->kr_rcvhdrtailaddr, port,
-                       dd->ipath_dummy_hdrq_phys);
-               ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-                       pd->port_port, dd->ipath_dummy_hdrq_phys);
-
-               ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
-               ipath_chg_pioavailkernel(dd, pd->port_pio_base,
-                       pd->port_piocnt, 1);
-
-               dd->ipath_f_clear_tids(dd, pd->port_port);
-
-               if (dd->ipath_pageshadow)
-                       unlock_expected_tids(pd);
-               ipath_stats.sps_ports--;
-               ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-                          pd->port_comm, pid_nr(pid),
-                          dd->ipath_unit, port);
-       }
-
-       put_pid(pid);
-       mutex_unlock(&ipath_mutex);
-       ipath_free_pddata(dd, pd); /* after releasing the mutex */
-
-bail:
-       kfree(fd);
-       return 0;
-}
-
-static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
-                          struct ipath_port_info __user *uinfo)
-{
-       struct ipath_port_info info;
-       int nup;
-       int ret;
-       size_t sz;
-
-       (void) ipath_count_units(NULL, &nup, NULL);
-       info.num_active = nup;
-       info.unit = pd->port_dd->ipath_unit;
-       info.port = pd->port_port;
-       info.subport = subport;
-       /* Don't return new fields if old library opened the port. */
-       if (ipath_supports_subports(pd->userversion >> 16,
-                                   pd->userversion & 0xffff)) {
-               /* Number of user ports available for this device. */
-               info.num_ports = pd->port_dd->ipath_cfgports - 1;
-               info.num_subports = pd->port_subport_cnt;
-               sz = sizeof(info);
-       } else
-               sz = sizeof(info) - 2 * sizeof(u16);
-
-       if (copy_to_user(uinfo, &info, sz)) {
-               ret = -EFAULT;
-               goto bail;
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_get_slave_info(struct ipath_portdata *pd,
-                               void __user *slave_mask_addr)
-{
-       int ret = 0;
-
-       if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
-               ret = -EFAULT;
-       return ret;
-}
-
-static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
-                                  u32 __user *inflightp)
-{
-       const u32 val = ipath_user_sdma_inflight_counter(pq);
-
-       if (put_user(val, inflightp))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int ipath_sdma_get_complete(struct ipath_devdata *dd,
-                                  struct ipath_user_sdma_queue *pq,
-                                  u32 __user *completep)
-{
-       u32 val;
-       int err;
-
-       err = ipath_user_sdma_make_progress(dd, pq);
-       if (err < 0)
-               return err;
-
-       val = ipath_user_sdma_complete_counter(pq);
-       if (put_user(val, completep))
-               return -EFAULT;
-
-       return 0;
-}
-
-static ssize_t ipath_write(struct file *fp, const char __user *data,
-                          size_t count, loff_t *off)
-{
-       const struct ipath_cmd __user *ucmd;
-       struct ipath_portdata *pd;
-       const void __user *src;
-       size_t consumed, copy;
-       struct ipath_cmd cmd;
-       ssize_t ret = 0;
-       void *dest;
-
-       if (count < sizeof(cmd.type)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ucmd = (const struct ipath_cmd __user *) data;
-
-       if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       consumed = sizeof(cmd.type);
-
-       switch (cmd.type) {
-       case IPATH_CMD_ASSIGN_PORT:
-       case __IPATH_CMD_USER_INIT:
-       case IPATH_CMD_USER_INIT:
-               copy = sizeof(cmd.cmd.user_info);
-               dest = &cmd.cmd.user_info;
-               src = &ucmd->cmd.user_info;
-               break;
-       case IPATH_CMD_RECV_CTRL:
-               copy = sizeof(cmd.cmd.recv_ctrl);
-               dest = &cmd.cmd.recv_ctrl;
-               src = &ucmd->cmd.recv_ctrl;
-               break;
-       case IPATH_CMD_PORT_INFO:
-               copy = sizeof(cmd.cmd.port_info);
-               dest = &cmd.cmd.port_info;
-               src = &ucmd->cmd.port_info;
-               break;
-       case IPATH_CMD_TID_UPDATE:
-       case IPATH_CMD_TID_FREE:
-               copy = sizeof(cmd.cmd.tid_info);
-               dest = &cmd.cmd.tid_info;
-               src = &ucmd->cmd.tid_info;
-               break;
-       case IPATH_CMD_SET_PART_KEY:
-               copy = sizeof(cmd.cmd.part_key);
-               dest = &cmd.cmd.part_key;
-               src = &ucmd->cmd.part_key;
-               break;
-       case __IPATH_CMD_SLAVE_INFO:
-               copy = sizeof(cmd.cmd.slave_mask_addr);
-               dest = &cmd.cmd.slave_mask_addr;
-               src = &ucmd->cmd.slave_mask_addr;
-               break;
-       case IPATH_CMD_PIOAVAILUPD:     // force an update of PIOAvail reg
-               copy = 0;
-               src = NULL;
-               dest = NULL;
-               break;
-       case IPATH_CMD_POLL_TYPE:
-               copy = sizeof(cmd.cmd.poll_type);
-               dest = &cmd.cmd.poll_type;
-               src = &ucmd->cmd.poll_type;
-               break;
-       case IPATH_CMD_ARMLAUNCH_CTRL:
-               copy = sizeof(cmd.cmd.armlaunch_ctrl);
-               dest = &cmd.cmd.armlaunch_ctrl;
-               src = &ucmd->cmd.armlaunch_ctrl;
-               break;
-       case IPATH_CMD_SDMA_INFLIGHT:
-               copy = sizeof(cmd.cmd.sdma_inflight);
-               dest = &cmd.cmd.sdma_inflight;
-               src = &ucmd->cmd.sdma_inflight;
-               break;
-       case IPATH_CMD_SDMA_COMPLETE:
-               copy = sizeof(cmd.cmd.sdma_complete);
-               dest = &cmd.cmd.sdma_complete;
-               src = &ucmd->cmd.sdma_complete;
-               break;
-       default:
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (copy) {
-               if ((count - consumed) < copy) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               if (copy_from_user(dest, src, copy)) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-
-               consumed += copy;
-       }
-
-       pd = port_fp(fp);
-       if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
-               cmd.type != IPATH_CMD_ASSIGN_PORT) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       switch (cmd.type) {
-       case IPATH_CMD_ASSIGN_PORT:
-               ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               break;
-       case __IPATH_CMD_USER_INIT:
-               /* backwards compatibility, get port first */
-               ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               /* and fall through to current version. */
-       case IPATH_CMD_USER_INIT:
-               ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               ret = ipath_get_base_info(
-                       fp, (void __user *) (unsigned long)
-                       cmd.cmd.user_info.spu_base_info,
-                       cmd.cmd.user_info.spu_base_info_size);
-               break;
-       case IPATH_CMD_RECV_CTRL:
-               ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
-               break;
-       case IPATH_CMD_PORT_INFO:
-               ret = ipath_port_info(pd, subport_fp(fp),
-                                     (struct ipath_port_info __user *)
-                                     (unsigned long) cmd.cmd.port_info);
-               break;
-       case IPATH_CMD_TID_UPDATE:
-               ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
-               break;
-       case IPATH_CMD_TID_FREE:
-               ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
-               break;
-       case IPATH_CMD_SET_PART_KEY:
-               ret = ipath_set_part_key(pd, cmd.cmd.part_key);
-               break;
-       case __IPATH_CMD_SLAVE_INFO:
-               ret = ipath_get_slave_info(pd,
-                                          (void __user *) (unsigned long)
-                                          cmd.cmd.slave_mask_addr);
-               break;
-       case IPATH_CMD_PIOAVAILUPD:
-               ipath_force_pio_avail_update(pd->port_dd);
-               break;
-       case IPATH_CMD_POLL_TYPE:
-               pd->poll_type = cmd.cmd.poll_type;
-               break;
-       case IPATH_CMD_ARMLAUNCH_CTRL:
-               if (cmd.cmd.armlaunch_ctrl)
-                       ipath_enable_armlaunch(pd->port_dd);
-               else
-                       ipath_disable_armlaunch(pd->port_dd);
-               break;
-       case IPATH_CMD_SDMA_INFLIGHT:
-               ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
-                                             (u32 __user *) (unsigned long)
-                                             cmd.cmd.sdma_inflight);
-               break;
-       case IPATH_CMD_SDMA_COMPLETE:
-               ret = ipath_sdma_get_complete(pd->port_dd,
-                                             user_sdma_queue_fp(fp),
-                                             (u32 __user *) (unsigned long)
-                                             cmd.cmd.sdma_complete);
-               break;
-       }
-
-       if (ret >= 0)
-               ret = consumed;
-
-bail:
-       return ret;
-}
-
-static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
-       struct file *filp = iocb->ki_filp;
-       struct ipath_filedata *fp = filp->private_data;
-       struct ipath_portdata *pd = port_fp(filp);
-       struct ipath_user_sdma_queue *pq = fp->pq;
-
-       if (!iter_is_iovec(from) || !from->nr_segs)
-               return -EINVAL;
-
-       return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs);
-}
-
-static struct class *ipath_class;
-
-static int init_cdev(int minor, char *name, const struct file_operations *fops,
-                    struct cdev **cdevp, struct device **devp)
-{
-       const dev_t dev = MKDEV(IPATH_MAJOR, minor);
-       struct cdev *cdev = NULL;
-       struct device *device = NULL;
-       int ret;
-
-       cdev = cdev_alloc();
-       if (!cdev) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate cdev for minor %d, %s\n",
-                      minor, name);
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       cdev->owner = THIS_MODULE;
-       cdev->ops = fops;
-       kobject_set_name(&cdev->kobj, name);
-
-       ret = cdev_add(cdev, dev, 1);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not add cdev for minor %d, %s (err %d)\n",
-                      minor, name, -ret);
-               goto err_cdev;
-       }
-
-       device = device_create(ipath_class, NULL, dev, NULL, name);
-
-       if (IS_ERR(device)) {
-               ret = PTR_ERR(device);
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-                      "device for minor %d, %s (err %d)\n",
-                      minor, name, -ret);
-               goto err_cdev;
-       }
-
-       goto done;
-
-err_cdev:
-       cdev_del(cdev);
-       cdev = NULL;
-
-done:
-       if (ret >= 0) {
-               *cdevp = cdev;
-               *devp = device;
-       } else {
-               *cdevp = NULL;
-               *devp = NULL;
-       }
-
-       return ret;
-}
-
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-                   struct cdev **cdevp, struct device **devp)
-{
-       return init_cdev(minor, name, fops, cdevp, devp);
-}
-
-static void cleanup_cdev(struct cdev **cdevp,
-                        struct device **devp)
-{
-       struct device *dev = *devp;
-
-       if (dev) {
-               device_unregister(dev);
-               *devp = NULL;
-       }
-
-       if (*cdevp) {
-               cdev_del(*cdevp);
-               *cdevp = NULL;
-       }
-}
-
-void ipath_cdev_cleanup(struct cdev **cdevp,
-                       struct device **devp)
-{
-       cleanup_cdev(cdevp, devp);
-}
-
-static struct cdev *wildcard_cdev;
-static struct device *wildcard_dev;
-
-static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
-
-static int user_init(void)
-{
-       int ret;
-
-       ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
-                      "chrdev region (err %d)\n", -ret);
-               goto done;
-       }
-
-       ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
-
-       if (IS_ERR(ipath_class)) {
-               ret = PTR_ERR(ipath_class);
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-                      "device class (err %d)\n", -ret);
-               goto bail;
-       }
-
-       goto done;
-bail:
-       unregister_chrdev_region(dev, IPATH_NMINORS);
-done:
-       return ret;
-}
-
-static void user_cleanup(void)
-{
-       if (ipath_class) {
-               class_destroy(ipath_class);
-               ipath_class = NULL;
-       }
-
-       unregister_chrdev_region(dev, IPATH_NMINORS);
-}
-
-static atomic_t user_count = ATOMIC_INIT(0);
-static atomic_t user_setup = ATOMIC_INIT(0);
-
-int ipath_user_add(struct ipath_devdata *dd)
-{
-       char name[10];
-       int ret;
-
-       if (atomic_inc_return(&user_count) == 1) {
-               ret = user_init();
-               if (ret < 0) {
-                       ipath_dev_err(dd, "Unable to set up user support: "
-                                     "error %d\n", -ret);
-                       goto bail;
-               }
-               ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
-                               &wildcard_dev);
-               if (ret < 0) {
-                       ipath_dev_err(dd, "Could not create wildcard "
-                                     "minor: error %d\n", -ret);
-                       goto bail_user;
-               }
-
-               atomic_set(&user_setup, 1);
-       }
-
-       snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
-
-       ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
-                       &dd->user_cdev, &dd->user_dev);
-       if (ret < 0)
-               ipath_dev_err(dd, "Could not create user minor %d, %s\n",
-                             dd->ipath_unit + 1, name);
-
-       goto bail;
-
-bail_user:
-       user_cleanup();
-bail:
-       return ret;
-}
-
-void ipath_user_remove(struct ipath_devdata *dd)
-{
-       cleanup_cdev(&dd->user_cdev, &dd->user_dev);
-
-       if (atomic_dec_return(&user_count) == 0) {
-               if (atomic_read(&user_setup) == 0)
-                       goto bail;
-
-               cleanup_cdev(&wildcard_cdev, &wildcard_dev);
-               user_cleanup();
-
-               atomic_set(&user_setup, 0);
-       }
-bail:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
deleted file mode 100644 (file)
index 796af68..0000000
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-#define IPATHFS_MAGIC 0x726a77
-
-static struct super_block *ipath_super;
-
-static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
-                        umode_t mode, const struct file_operations *fops,
-                        void *data)
-{
-       int error;
-       struct inode *inode = new_inode(dir->i_sb);
-
-       if (!inode) {
-               error = -EPERM;
-               goto bail;
-       }
-
-       inode->i_ino = get_next_ino();
-       inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       inode->i_private = data;
-       if (S_ISDIR(mode)) {
-               inode->i_op = &simple_dir_inode_operations;
-               inc_nlink(inode);
-               inc_nlink(dir);
-       }
-
-       inode->i_fop = fops;
-
-       d_instantiate(dentry, inode);
-       error = 0;
-
-bail:
-       return error;
-}
-
-static int create_file(const char *name, umode_t mode,
-                      struct dentry *parent, struct dentry **dentry,
-                      const struct file_operations *fops, void *data)
-{
-       int error;
-
-       mutex_lock(&d_inode(parent)->i_mutex);
-       *dentry = lookup_one_len(name, parent, strlen(name));
-       if (!IS_ERR(*dentry))
-               error = ipathfs_mknod(d_inode(parent), *dentry,
-                                     mode, fops, data);
-       else
-               error = PTR_ERR(*dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
-
-       return error;
-}
-
-static ssize_t atomic_stats_read(struct file *file, char __user *buf,
-                                size_t count, loff_t *ppos)
-{
-       return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
-                                      sizeof ipath_stats);
-}
-
-static const struct file_operations atomic_stats_ops = {
-       .read = atomic_stats_read,
-       .llseek = default_llseek,
-};
-
-static ssize_t atomic_counters_read(struct file *file, char __user *buf,
-                                   size_t count, loff_t *ppos)
-{
-       struct infinipath_counters counters;
-       struct ipath_devdata *dd;
-
-       dd = file_inode(file)->i_private;
-       dd->ipath_f_read_counters(dd, &counters);
-
-       return simple_read_from_buffer(buf, count, ppos, &counters,
-                                      sizeof counters);
-}
-
-static const struct file_operations atomic_counters_ops = {
-       .read = atomic_counters_read,
-       .llseek = default_llseek,
-};
-
-static ssize_t flash_read(struct file *file, char __user *buf,
-                         size_t count, loff_t *ppos)
-{
-       struct ipath_devdata *dd;
-       ssize_t ret;
-       loff_t pos;
-       char *tmp;
-
-       pos = *ppos;
-
-       if ( pos < 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (pos >= sizeof(struct ipath_flash)) {
-               ret = 0;
-               goto bail;
-       }
-
-       if (count > sizeof(struct ipath_flash) - pos)
-               count = sizeof(struct ipath_flash) - pos;
-
-       tmp = kmalloc(count, GFP_KERNEL);
-       if (!tmp) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       dd = file_inode(file)->i_private;
-       if (ipath_eeprom_read(dd, pos, tmp, count)) {
-               ipath_dev_err(dd, "failed to read from flash\n");
-               ret = -ENXIO;
-               goto bail_tmp;
-       }
-
-       if (copy_to_user(buf, tmp, count)) {
-               ret = -EFAULT;
-               goto bail_tmp;
-       }
-
-       *ppos = pos + count;
-       ret = count;
-
-bail_tmp:
-       kfree(tmp);
-
-bail:
-       return ret;
-}
-
-static ssize_t flash_write(struct file *file, const char __user *buf,
-                          size_t count, loff_t *ppos)
-{
-       struct ipath_devdata *dd;
-       ssize_t ret;
-       loff_t pos;
-       char *tmp;
-
-       pos = *ppos;
-
-       if (pos != 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (count != sizeof(struct ipath_flash)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       tmp = memdup_user(buf, count);
-       if (IS_ERR(tmp))
-               return PTR_ERR(tmp);
-
-       dd = file_inode(file)->i_private;
-       if (ipath_eeprom_write(dd, pos, tmp, count)) {
-               ret = -ENXIO;
-               ipath_dev_err(dd, "failed to write to flash\n");
-               goto bail_tmp;
-       }
-
-       *ppos = pos + count;
-       ret = count;
-
-bail_tmp:
-       kfree(tmp);
-
-bail:
-       return ret;
-}
-
-static const struct file_operations flash_ops = {
-       .read = flash_read,
-       .write = flash_write,
-       .llseek = default_llseek,
-};
-
-static int create_device_files(struct super_block *sb,
-                              struct ipath_devdata *dd)
-{
-       struct dentry *dir, *tmp;
-       char unit[10];
-       int ret;
-
-       snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-       ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
-                         &simple_dir_operations, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-       ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
-                         &atomic_counters_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/atomic_counters) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-       ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
-                         &flash_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/flash) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-static int remove_file(struct dentry *parent, char *name)
-{
-       struct dentry *tmp;
-       int ret;
-
-       tmp = lookup_one_len(name, parent, strlen(name));
-
-       if (IS_ERR(tmp)) {
-               ret = PTR_ERR(tmp);
-               goto bail;
-       }
-
-       spin_lock(&tmp->d_lock);
-       if (simple_positive(tmp)) {
-               dget_dlock(tmp);
-               __d_drop(tmp);
-               spin_unlock(&tmp->d_lock);
-               simple_unlink(d_inode(parent), tmp);
-       } else
-               spin_unlock(&tmp->d_lock);
-
-       ret = 0;
-bail:
-       /*
-        * We don't expect clients to care about the return value, but
-        * it's there if they need it.
-        */
-       return ret;
-}
-
-static int remove_device_files(struct super_block *sb,
-                              struct ipath_devdata *dd)
-{
-       struct dentry *dir, *root;
-       char unit[10];
-       int ret;
-
-       root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
-       snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-       dir = lookup_one_len(unit, root, strlen(unit));
-
-       if (IS_ERR(dir)) {
-               ret = PTR_ERR(dir);
-               printk(KERN_ERR "Lookup of %s failed\n", unit);
-               goto bail;
-       }
-
-       remove_file(dir, "flash");
-       remove_file(dir, "atomic_counters");
-       d_delete(dir);
-       ret = simple_rmdir(d_inode(root), dir);
-
-bail:
-       mutex_unlock(&d_inode(root)->i_mutex);
-       dput(root);
-       return ret;
-}
-
-static int ipathfs_fill_super(struct super_block *sb, void *data,
-                             int silent)
-{
-       struct ipath_devdata *dd, *tmp;
-       unsigned long flags;
-       int ret;
-
-       static struct tree_descr files[] = {
-               [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
-               {""},
-       };
-
-       ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
-       if (ret) {
-               printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-               spin_unlock_irqrestore(&ipath_devs_lock, flags);
-               ret = create_device_files(sb, dd);
-               if (ret)
-                       goto bail;
-               spin_lock_irqsave(&ipath_devs_lock, flags);
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-bail:
-       return ret;
-}
-
-static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
-                       int flags, const char *dev_name, void *data)
-{
-       struct dentry *ret;
-       ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
-       if (!IS_ERR(ret))
-               ipath_super = ret->d_sb;
-       return ret;
-}
-
-static void ipathfs_kill_super(struct super_block *s)
-{
-       kill_litter_super(s);
-       ipath_super = NULL;
-}
-
-int ipathfs_add_device(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (ipath_super == NULL) {
-               ret = 0;
-               goto bail;
-       }
-
-       ret = create_device_files(ipath_super, dd);
-
-bail:
-       return ret;
-}
-
-int ipathfs_remove_device(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (ipath_super == NULL) {
-               ret = 0;
-               goto bail;
-       }
-
-       ret = remove_device_files(ipath_super, dd);
-
-bail:
-       return ret;
-}
-
-static struct file_system_type ipathfs_fs_type = {
-       .owner =        THIS_MODULE,
-       .name =         "ipathfs",
-       .mount =        ipathfs_mount,
-       .kill_sb =      ipathfs_kill_super,
-};
-MODULE_ALIAS_FS("ipathfs");
-
-int __init ipath_init_ipathfs(void)
-{
-       return register_filesystem(&ipathfs_fs_type);
-}
-
-void __exit ipath_exit_ipathfs(void)
-{
-       unregister_filesystem(&ipathfs_fs_type);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_iba6110.c b/drivers/staging/rdma/ipath/ipath_iba6110.c
deleted file mode 100644 (file)
index 5f13572..0000000
+++ /dev/null
@@ -1,1939 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains all of the code that is specific to the InfiniPath
- * HT chip.
- */
-
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/htirq.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
-
-
-/*
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- *
- * The names are in InterCap form because they're taken straight from
- * the chip specification.  Since they're only used in this file, they
- * don't pollute the rest of the source.
-*/
-
-struct _infinipath_do_not_use_kernel_regs {
-       unsigned long long Revision;
-       unsigned long long Control;
-       unsigned long long PageAlign;
-       unsigned long long PortCnt;
-       unsigned long long DebugPortSelect;
-       unsigned long long DebugPort;
-       unsigned long long SendRegBase;
-       unsigned long long UserRegBase;
-       unsigned long long CounterRegBase;
-       unsigned long long Scratch;
-       unsigned long long ReservedMisc1;
-       unsigned long long InterruptConfig;
-       unsigned long long IntBlocked;
-       unsigned long long IntMask;
-       unsigned long long IntStatus;
-       unsigned long long IntClear;
-       unsigned long long ErrorMask;
-       unsigned long long ErrorStatus;
-       unsigned long long ErrorClear;
-       unsigned long long HwErrMask;
-       unsigned long long HwErrStatus;
-       unsigned long long HwErrClear;
-       unsigned long long HwDiagCtrl;
-       unsigned long long MDIO;
-       unsigned long long IBCStatus;
-       unsigned long long IBCCtrl;
-       unsigned long long ExtStatus;
-       unsigned long long ExtCtrl;
-       unsigned long long GPIOOut;
-       unsigned long long GPIOMask;
-       unsigned long long GPIOStatus;
-       unsigned long long GPIOClear;
-       unsigned long long RcvCtrl;
-       unsigned long long RcvBTHQP;
-       unsigned long long RcvHdrSize;
-       unsigned long long RcvHdrCnt;
-       unsigned long long RcvHdrEntSize;
-       unsigned long long RcvTIDBase;
-       unsigned long long RcvTIDCnt;
-       unsigned long long RcvEgrBase;
-       unsigned long long RcvEgrCnt;
-       unsigned long long RcvBufBase;
-       unsigned long long RcvBufSize;
-       unsigned long long RxIntMemBase;
-       unsigned long long RxIntMemSize;
-       unsigned long long RcvPartitionKey;
-       unsigned long long ReservedRcv[10];
-       unsigned long long SendCtrl;
-       unsigned long long SendPIOBufBase;
-       unsigned long long SendPIOSize;
-       unsigned long long SendPIOBufCnt;
-       unsigned long long SendPIOAvailAddr;
-       unsigned long long TxIntMemBase;
-       unsigned long long TxIntMemSize;
-       unsigned long long ReservedSend[9];
-       unsigned long long SendBufferError;
-       unsigned long long SendBufferErrorCONT1;
-       unsigned long long SendBufferErrorCONT2;
-       unsigned long long SendBufferErrorCONT3;
-       unsigned long long ReservedSBE[4];
-       unsigned long long RcvHdrAddr0;
-       unsigned long long RcvHdrAddr1;
-       unsigned long long RcvHdrAddr2;
-       unsigned long long RcvHdrAddr3;
-       unsigned long long RcvHdrAddr4;
-       unsigned long long RcvHdrAddr5;
-       unsigned long long RcvHdrAddr6;
-       unsigned long long RcvHdrAddr7;
-       unsigned long long RcvHdrAddr8;
-       unsigned long long ReservedRHA[7];
-       unsigned long long RcvHdrTailAddr0;
-       unsigned long long RcvHdrTailAddr1;
-       unsigned long long RcvHdrTailAddr2;
-       unsigned long long RcvHdrTailAddr3;
-       unsigned long long RcvHdrTailAddr4;
-       unsigned long long RcvHdrTailAddr5;
-       unsigned long long RcvHdrTailAddr6;
-       unsigned long long RcvHdrTailAddr7;
-       unsigned long long RcvHdrTailAddr8;
-       unsigned long long ReservedRHTA[7];
-       unsigned long long Sync;        /* Software only */
-       unsigned long long Dump;        /* Software only */
-       unsigned long long SimVer;      /* Software only */
-       unsigned long long ReservedSW[5];
-       unsigned long long SerdesConfig0;
-       unsigned long long SerdesConfig1;
-       unsigned long long SerdesStatus;
-       unsigned long long XGXSConfig;
-       unsigned long long ReservedSW2[4];
-};
-
-struct _infinipath_do_not_use_counters {
-       __u64 LBIntCnt;
-       __u64 LBFlowStallCnt;
-       __u64 Reserved1;
-       __u64 TxUnsupVLErrCnt;
-       __u64 TxDataPktCnt;
-       __u64 TxFlowPktCnt;
-       __u64 TxDwordCnt;
-       __u64 TxLenErrCnt;
-       __u64 TxMaxMinLenErrCnt;
-       __u64 TxUnderrunCnt;
-       __u64 TxFlowStallCnt;
-       __u64 TxDroppedPktCnt;
-       __u64 RxDroppedPktCnt;
-       __u64 RxDataPktCnt;
-       __u64 RxFlowPktCnt;
-       __u64 RxDwordCnt;
-       __u64 RxLenErrCnt;
-       __u64 RxMaxMinLenErrCnt;
-       __u64 RxICRCErrCnt;
-       __u64 RxVCRCErrCnt;
-       __u64 RxFlowCtrlErrCnt;
-       __u64 RxBadFormatCnt;
-       __u64 RxLinkProblemCnt;
-       __u64 RxEBPCnt;
-       __u64 RxLPCRCErrCnt;
-       __u64 RxBufOvflCnt;
-       __u64 RxTIDFullErrCnt;
-       __u64 RxTIDValidErrCnt;
-       __u64 RxPKeyMismatchCnt;
-       __u64 RxP0HdrEgrOvflCnt;
-       __u64 RxP1HdrEgrOvflCnt;
-       __u64 RxP2HdrEgrOvflCnt;
-       __u64 RxP3HdrEgrOvflCnt;
-       __u64 RxP4HdrEgrOvflCnt;
-       __u64 RxP5HdrEgrOvflCnt;
-       __u64 RxP6HdrEgrOvflCnt;
-       __u64 RxP7HdrEgrOvflCnt;
-       __u64 RxP8HdrEgrOvflCnt;
-       __u64 Reserved6;
-       __u64 Reserved7;
-       __u64 IBStatusChangeCnt;
-       __u64 IBLinkErrRecoveryCnt;
-       __u64 IBLinkDownedCnt;
-       __u64 IBSymbolErrCnt;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof( \
-       struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
-       struct _infinipath_do_not_use_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_ht_kregs = {
-       .kr_control = IPATH_KREG_OFFSET(Control),
-       .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
-       .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
-       .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
-       .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
-       .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
-       .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
-       .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
-       .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
-       .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
-       .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
-       .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
-       .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
-       .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
-       .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
-       .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
-       .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
-       .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
-       .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
-       .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
-       .kr_intclear = IPATH_KREG_OFFSET(IntClear),
-       .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
-       .kr_intmask = IPATH_KREG_OFFSET(IntMask),
-       .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
-       .kr_mdio = IPATH_KREG_OFFSET(MDIO),
-       .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
-       .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
-       .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
-       .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
-       .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
-       .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
-       .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
-       .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
-       .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
-       .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
-       .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
-       .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
-       .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
-       .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
-       .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
-       .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
-       .kr_revision = IPATH_KREG_OFFSET(Revision),
-       .kr_scratch = IPATH_KREG_OFFSET(Scratch),
-       .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
-       .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
-       .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
-       .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
-       .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
-       .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
-       .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
-       .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
-       .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
-       .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-       .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
-       .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
-       .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
-       .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-       /*
-        * These should not be used directly via ipath_write_kreg64(),
-        * use them with ipath_write_kreg64_port(),
-        */
-       .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
-       .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
-};
-
-static const struct ipath_cregs ipath_ht_cregs = {
-       .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
-       .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
-       .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
-       .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
-       .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
-       .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
-       .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
-       .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
-       .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
-       .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
-       .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
-       .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
-       /* calc from Reg_CounterRegBase + offset */
-       .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
-       .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
-       .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
-       .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
-       .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
-       .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
-       .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
-       .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
-       .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
-       .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
-       .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
-       .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
-       .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
-       .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
-       .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
-       .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
-       .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
-       .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
-       .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
-       .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
-       .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
-#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR   0x0000000000800000ULL
-#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR   0x0000000001000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR   0x0000000002000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR   0x0000000004000000ULL
-#define INFINIPATH_HWE_HTCMISCERR4          0x0000000008000000ULL
-#define INFINIPATH_HWE_HTCMISCERR5          0x0000000010000000ULL
-#define INFINIPATH_HWE_HTCMISCERR6          0x0000000020000000ULL
-#define INFINIPATH_HWE_HTCMISCERR7          0x0000000040000000ULL
-#define INFINIPATH_HWE_HTCBUSTREQPARITYERR  0x0000000080000000ULL
-#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
-#define INFINIPATH_HWE_HTCBUSIREQPARITYERR  0x0000000200000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_FBSLIP        0x0200000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_RFSLIP        0x0400000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_FBSLIP        0x0800000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_RFSLIP        0x1000000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED      0x2000000000000000ULL
-
-#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
-#define IBA6110_IBCS_LINKSTATE_SHIFT 4
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
-
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL        /* 40 bits valid */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
-
-#define INFINIPATH_R_INTRAVAIL_SHIFT 16
-#define INFINIPATH_R_TAILUPD_SHIFT 31
-
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET          0x7ULL
-
-/*
- * masks and bits that are different in different chips, or present only
- * in one
- */
-static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
-    INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
-static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
-    INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
-
-static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA \
-       (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL \
-       (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/* keep the code below somewhat more readable; not used elsewhere */
-#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkabyte1crcerr)
-#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte0crcerr)
-#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-
-static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
-                         char *msg, size_t msgl)
-{
-       char bitsmsg[64];
-       ipath_err_t crcbits = hwerrs &
-               (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
-       /*
-        * we'll want to ignore link errors on link that is
-        * not in use, if any.  For now, complain about both
-        */
-       if (crcbits) {
-               u16 ctrl0, ctrl1;
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
-                        !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
-                        "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
-                                   ? "1 (B)" : "0+1 (A+B)"),
-                        !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
-                        : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
-                           "0+1"), (unsigned long long) crcbits);
-               strlcat(msg, bitsmsg, msgl);
-
-               /*
-                * print extra info for debugging.  slave/primary
-                * config word 4, 8 (link control 0, 1)
-                */
-
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x4,
-                                        &ctrl0))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl0 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl0 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
-                                 ((ctrl0 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl0 >> 4) & 1) ? "linkfail" :
-                                 "");
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x8,
-                                        &ctrl1))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl1 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl1 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
-                                 ((ctrl1 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl1 >> 4) & 1) ? "linkfail" :
-                                 "");
-
-               /* disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~crcbits;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-               ipath_dbg("HT crc errs: %s\n", msg);
-       } else
-               ipath_dbg("ignoring HT crc errors 0x%llx, "
-                         "not in use\n", (unsigned long long)
-                         (hwerrs & (_IPATH_HTLINK0_CRCBITS |
-                                    _IPATH_HTLINK1_CRCBITS)));
-}
-
-/* 6110 specific hardware errors... */
-static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
-       INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
-       INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
-       INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
-       INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
-       INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
-       INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
-       INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
-       INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
-};
-
-#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
-                       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
-                       << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
-#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
-                         << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
-
-static void ipath_ht_txe_recover(struct ipath_devdata *dd)
-{
-       ++ipath_stats.sps_txeparity;
-       dev_info(&dd->pcidev->dev,
-               "Recovering from TXE PIO parity error\n");
-}
-
-
-/**
- * ipath_ht_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use.  Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue.  We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
-                                    size_t msgl)
-{
-       ipath_err_t hwerrs;
-       u32 bits, ctrl;
-       int isfatal = 0;
-       char bitsmsg[64];
-       int log_idx;
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-
-       if (!hwerrs) {
-               ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
-               /*
-                * better than printing cofusing messages
-                * This seems to be related to clearing the crc error, or
-                * the pll error during init.
-                */
-               goto bail;
-       } else if (hwerrs == -1LL) {
-               ipath_dev_err(dd, "Read of hardware error status failed "
-                             "(all bits set); ignoring\n");
-               goto bail;
-       }
-       ipath_stats.sps_hwerrs++;
-
-       /* Always clear the error status register, except MEMBISTFAIL,
-        * regardless of whether we continue or stop using the chip.
-        * We want that set so we know it failed, even across driver reload.
-        * We'll still ignore it in the hwerrmask.  We do this partly for
-        * diagnostics, but also for support */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
-       hwerrs &= dd->ipath_hwerrmask;
-
-       /* We log some errors to EEPROM, check if we have any of those. */
-       for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
-               if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
-                       ipath_inc_eeprom_err(dd, log_idx, 1);
-
-       /*
-        * make sure we get this much out, unless told to be quiet,
-        * it's a parity error we may recover from,
-        * or it's occurred within the last 5 seconds
-        */
-       if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
-               RXE_EAGER_PARITY)) ||
-               (ipath_debug & __IPATH_VERBDBG))
-               dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
-                        "(cleared)\n", (unsigned long long) hwerrs);
-       dd->ipath_lasthwerror |= hwerrs;
-
-       if (hwerrs & ~dd->ipath_hwe_bitsextant)
-               ipath_dev_err(dd, "hwerror interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (hwerrs & ~dd->ipath_hwe_bitsextant));
-
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-       if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
-               /*
-                * parity errors in send memory are recoverable,
-                * just cancel the send (if indicated in * sendbuffererror),
-                * count the occurrence, unfreeze (if no other handled
-                * hardware error bits are set), and continue. They can
-                * occur if a processor speculative read is done to the PIO
-                * buffer while we are sending a packet, for example.
-                */
-               if (hwerrs & TXE_PIO_PARITY) {
-                       ipath_ht_txe_recover(dd);
-                       hwerrs &= ~TXE_PIO_PARITY;
-               }
-
-               if (!hwerrs) {
-                       ipath_dbg("Clearing freezemode on ignored or "
-                                 "recovered hardware error\n");
-                       ipath_clear_freeze(dd);
-               }
-       }
-
-       *msg = '\0';
-
-       /*
-        * may someday want to decode into which bits are which
-        * functional area for parity errors, etc.
-        */
-       if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
-                     << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-
-       ipath_format_hwerrors(hwerrs,
-                             ipath_6110_hwerror_msgs,
-                             ARRAY_SIZE(ipath_6110_hwerror_msgs),
-                             msg, msgl);
-
-       if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
-               hwerr_crcbits(dd, hwerrs, msg, msgl);
-
-       if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-               strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
-                       msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |       \
-                        INFINIPATH_HWE_COREPLL_RFSLIP |        \
-                        INFINIPATH_HWE_HTBPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTBPLL_RFSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_RFSLIP)
-
-       if (hwerrs & _IPATH_PLL_FAIL) {
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[PLL failed (%llx), InfiniPath hardware unusable]",
-                        (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
-               strlcat(msg, bitsmsg, msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
-               /*
-                * If it occurs, it is left masked since the eternal
-                * interface is unused
-                */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs) {
-               /*
-                * if any set that we aren't ignoring; only
-                * make the complaint once, in case it's stuck
-                * or recurring, and we get here multiple
-                * times.
-                * force link down, so switch knows, and
-                * LEDs are turned off
-                */
-               if (dd->ipath_flags & IPATH_INITTED) {
-                       ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-                       ipath_setup_ht_setextled(dd,
-                               INFINIPATH_IBCS_L_STATE_DOWN,
-                               INFINIPATH_IBCS_LT_STATE_DISABLED);
-                       ipath_dev_err(dd, "Fatal Hardware Error (freeze "
-                                         "mode), no longer usable, SN %.16s\n",
-                                         dd->ipath_serial);
-                       isfatal = 1;
-               }
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-               /* mark as having had error */
-               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-               /*
-                * mark as not usable, at a minimum until driver
-                * is reloaded, probably until reboot, since no
-                * other reset is possible.
-                */
-               dd->ipath_flags &= ~IPATH_INITTED;
-       } else {
-               *msg = 0; /* recovered from all of them */
-       }
-       if (*msg)
-               ipath_dev_err(dd, "%s hardware error\n", msg);
-       if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
-               /*
-                * for status file; if no trailing brace is copied,
-                * we'll know it was truncated.
-                */
-               snprintf(dd->ipath_freezemsg,
-                        dd->ipath_freezelen, "{%s}", msg);
-
-bail:;
-}
-
-/**
- * ipath_ht_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * fill in the board name, based on the board revision register
- */
-static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
-                             size_t namelen)
-{
-       char *n = NULL;
-       u8 boardrev = dd->ipath_boardrev;
-       int ret = 0;
-
-       switch (boardrev) {
-       case 5:
-               /*
-                * original production board; two production levels, with
-                * different serial number ranges.   See ipath_ht_early_init() for
-                * case where we enable IPATH_GPIO_INTR for later serial # range.
-                * Original 112* serial number is no longer supported.
-                */
-               n = "InfiniPath_QHT7040";
-               break;
-       case 7:
-               /* small form factor production board */
-               n = "InfiniPath_QHT7140";
-               break;
-       default:                /* don't know, just print the number */
-               ipath_dev_err(dd, "Don't yet know about board "
-                             "with ID %u\n", boardrev);
-               snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
-                        boardrev);
-               break;
-       }
-       if (n)
-               snprintf(name, namelen, "%s", n);
-
-       if (ret) {
-               ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
-               goto bail;
-       }
-       if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
-               dd->ipath_minrev > 4)) {
-               /*
-                * This version of the driver only supports Rev 3.2 - 3.4
-                */
-               ipath_dev_err(dd,
-                             "Unsupported InfiniPath hardware revision %u.%u!\n",
-                             dd->ipath_majrev, dd->ipath_minrev);
-               ret = 1;
-               goto bail;
-       }
-       /*
-        * pkt/word counters are 32 bit, and therefore wrap fast enough
-        * that we snapshot them from a timer, and maintain 64 bit shadow
-        * copies
-        */
-       dd->ipath_flags |= IPATH_32BITCOUNTERS;
-       dd->ipath_flags |= IPATH_GPIO_INTR;
-       if (dd->ipath_lbus_speed != 800)
-               ipath_dev_err(dd,
-                             "Incorrectly configured for HT @ %uMHz\n",
-                             dd->ipath_lbus_speed);
-
-       /*
-        * set here, not in ipath_init_*_funcs because we have to do
-        * it after we can read chip registers.
-        */
-       dd->ipath_ureg_align =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-
-bail:
-       return ret;
-}
-
-static void ipath_check_htlink(struct ipath_devdata *dd)
-{
-       u8 linkerr, link_off, i;
-
-       for (i = 0; i < 2; i++) {
-               link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
-               if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkerror%d of HT slave/primary block\n",
-                                i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set should
-                        * clear them
-                        */
-                       if (pci_write_config_byte(dd->pcidev, link_off,
-                                                 linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(dd->pcidev, link_off,
-                                                &linkerr))
-                               dev_info(&dd->pcidev->dev,
-                                        "Couldn't reread linkerror%d of "
-                                        "HT slave/primary block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&dd->pcidev->dev,
-                                        "HT linkerror%d bits 0x%x "
-                                        "couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-}
-
-static int ipath_setup_ht_reset(struct ipath_devdata *dd)
-{
-       ipath_dbg("No reset possible for this InfiniPath hardware\n");
-       return 0;
-}
-
-#define HT_INTR_DISC_CONFIG  0x80      /* HT interrupt and discovery cap */
-#define HT_INTR_REG_INDEX    2 /* intconfig requires indirect accesses */
-
-/*
- * Bits 13-15 of command==0 is slave/primary block.  Clear any HT CRC
- * errors.  We only bother to do this at load time, because it's OK if
- * it happened before we were loaded (first time after boot/reset),
- * but any time after that, it's fatal anyway.  Also need to not check
- * for upper byte errors if we are in 8 bit mode, so figure out
- * our width.  For now, at least, also complain if it's 8 bit.
- */
-static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
-                            int pos, u8 cap_type)
-{
-       u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
-       u16 linkctrl = 0;
-       int i;
-
-       dd->ipath_ht_slave_off = pos;
-       /* command word, master_host bit */
-       /* master host || slave */
-       if ((cap_type >> 2) & 1)
-               link_a_b_off = 4;
-       else
-               link_a_b_off = 0;
-       ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
-                  link_a_b_off ? 1 : 0,
-                  link_a_b_off ? 'B' : 'A');
-
-       link_a_b_off += pos;
-
-       /*
-        * check both link control registers; clear both HT CRC sets if
-        * necessary.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0x4;
-               if (pci_read_config_word(pdev, link_off, &linkctrl))
-                       ipath_dev_err(dd, "Couldn't read HT link control%d "
-                                     "register\n", i);
-               else if (linkctrl & (0xf << 8)) {
-                       ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
-                                  "bits %x\n", i, linkctrl & (0xf << 8));
-                       /*
-                        * now write them back to clear the error.
-                        */
-                       pci_write_config_word(pdev, link_off,
-                                             linkctrl & (0xf << 8));
-               }
-       }
-
-       /*
-        * As with HT CRC bits, same for protocol errors that might occur
-        * during boot.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0xd;
-               if (pci_read_config_byte(pdev, link_off, &linkerr))
-                       dev_info(&pdev->dev, "Couldn't read linkerror%d "
-                                "of HT slave/primary block\n", i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set will clear
-                        * them
-                        */
-                       if (pci_write_config_byte
-                           (pdev, link_off, linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(pdev, link_off, &linkerr))
-                               dev_info(&pdev->dev, "Couldn't reread "
-                                        "linkerror%d of HT slave/primary "
-                                        "block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&pdev->dev, "HT linkerror%d bits "
-                                        "0x%x couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-
-       if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link width "
-                             "config register\n");
-       else {
-               u32 width;
-               switch (linkwidth & 7) {
-               case 5:
-                       width = 4;
-                       break;
-               case 4:
-                       width = 2;
-                       break;
-               case 3:
-                       width = 32;
-                       break;
-               case 1:
-                       width = 16;
-                       break;
-               case 0:
-               default:        /* if wrong, assume 8 bit */
-                       width = 8;
-                       break;
-               }
-
-               dd->ipath_lbus_width = width;
-
-               if (linkwidth != 0x11) {
-                       ipath_dev_err(dd, "Not configured for 16 bit HT "
-                                     "(%x)\n", linkwidth);
-                       if (!(linkwidth & 0xf)) {
-                               ipath_dbg("Will ignore HT lane1 errors\n");
-                               dd->ipath_flags |= IPATH_8BIT_IN_HT0;
-                       }
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-       if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link frequency "
-                             "config register\n");
-       else {
-               u32 speed;
-               switch (linkwidth & 0xf) {
-               case 6:
-                       speed = 1000;
-                       break;
-               case 5:
-                       speed = 800;
-                       break;
-               case 4:
-                       speed = 600;
-                       break;
-               case 3:
-                       speed = 500;
-                       break;
-               case 2:
-                       speed = 400;
-                       break;
-               case 1:
-                       speed = 300;
-                       break;
-               default:
-                       /*
-                        * assume reserved and vendor-specific are 200...
-                        */
-               case 0:
-                       speed = 200;
-                       break;
-               }
-               dd->ipath_lbus_speed = speed;
-       }
-
-       snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
-               "HyperTransport,%uMHz,x%u\n",
-               dd->ipath_lbus_speed,
-               dd->ipath_lbus_width);
-}
-
-static int ipath_ht_intconfig(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (dd->ipath_intconfig) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
-                                dd->ipath_intconfig);  /* interrupt address */
-               ret = 0;
-       } else {
-               ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
-                             "interrupt address\n");
-               ret = -EINVAL;
-       }
-
-       return ret;
-}
-
-static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
-                               struct ht_irq_msg *msg)
-{
-       struct ipath_devdata *dd = pci_get_drvdata(dev);
-       u64 prev_intconfig = dd->ipath_intconfig;
-
-       dd->ipath_intconfig = msg->address_lo;
-       dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
-
-       /*
-        * If the previous value of dd->ipath_intconfig is zero, we're
-        * getting configured for the first time, and must not program the
-        * intconfig register here (it will be programmed later, when the
-        * hardware is ready).  Otherwise, we should.
-        */
-       if (prev_intconfig)
-               ipath_ht_intconfig(dd);
-}
-
-/**
- * ipath_setup_ht_config - setup the interruptconfig register
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * setup the interruptconfig register from the HT config info.
- * Also clear CRC errors in HT linkcontrol, if necessary.
- * This is done only for the real hardware.  It is done before
- * chip address space is initted, so can't touch infinipath registers
- */
-static int ipath_setup_ht_config(struct ipath_devdata *dd,
-                                struct pci_dev *pdev)
-{
-       int pos, ret;
-
-       ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
-       if (ret < 0) {
-               ipath_dev_err(dd, "Couldn't create interrupt handler: "
-                             "err %d\n", ret);
-               goto bail;
-       }
-       dd->ipath_irq = ret;
-       ret = 0;
-
-       /*
-        * Handle clearing CRC errors in linkctrl register if necessary.  We
-        * do this early, before we ever enable errors or hardware errors,
-        * mostly to avoid causing the chip to enter freeze mode.
-        */
-       pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
-       if (!pos) {
-               ipath_dev_err(dd, "Couldn't find HyperTransport "
-                             "capability; no interrupts\n");
-               ret = -ENODEV;
-               goto bail;
-       }
-       do {
-               u8 cap_type;
-
-               /*
-                * The HT capability type byte is 3 bytes after the
-                * capability byte.
-                */
-               if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
-                       dev_info(&pdev->dev, "Couldn't read config "
-                                "command @ %d\n", pos);
-                       continue;
-               }
-               if (!(cap_type & 0xE0))
-                       slave_or_pri_blk(dd, pdev, pos, cap_type);
-       } while ((pos = pci_find_next_capability(pdev, pos,
-                                                PCI_CAP_ID_HT)));
-
-       dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * Called during driver unload.
- * This is currently a nop for the HT chip, not for all chips
- */
-static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
-{
-}
-
-/**
- * ipath_setup_ht_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
- *
- * Set the state of the two external LEDs, to indicate physical and
- * logical state of IB link.   For this chip (at least with recommended
- * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
- * (logical state)
- *
- * Note:  We try to match the Mellanox HCA LED behavior as best
- * we can.  Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate.  That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
-                                    u64 lst, u64 ltst)
-{
-       u64 extctl;
-       unsigned long flags = 0;
-
-       /* the diags use the LED to indicate diag info, so we leave
-        * the external LED alone when the diags are running */
-       if (ipath_diag_inuse)
-               return;
-
-       /* Allow override of LED display for, e.g. Locating system in rack */
-       if (dd->ipath_led_override) {
-               ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
-                       ? INFINIPATH_IBCS_LT_STATE_LINKUP
-                       : INFINIPATH_IBCS_LT_STATE_DISABLED;
-               lst = (dd->ipath_led_override & IPATH_LED_LOG)
-                       ? INFINIPATH_IBCS_L_STATE_ACTIVE
-                       : INFINIPATH_IBCS_L_STATE_DOWN;
-       }
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       /*
-        * start by setting both LED control bits to off, then turn
-        * on the appropriate bit(s).
-        */
-       if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
-               /*
-                * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
-                * is inverted,  because it is normally used to indicate
-                * a hardware fault at reset, if there were errors
-                */
-               extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
-                       | INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
-       } else {
-               extctl = dd->ipath_extctrl &
-                       ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
-                         INFINIPATH_EXTC_LED2PRIPORT_ON);
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
-       }
-       dd->ipath_extctrl = extctl;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-}
-
-static void ipath_init_ht_variables(struct ipath_devdata *dd)
-{
-       /*
-        * setup the register offsets, since they are different for each
-        * chip
-        */
-       dd->ipath_kregs = &ipath_ht_kregs;
-       dd->ipath_cregs = &ipath_ht_cregs;
-
-       dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-       dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-       dd->ipath_gpio_sda = IPATH_GPIO_SDA;
-       dd->ipath_gpio_scl = IPATH_GPIO_SCL;
-
-       /*
-        * Fill in data for field-values that change in newer chips.
-        * We dynamically specify only the mask for LINKTRAININGSTATE
-        * and only the shift for LINKSTATE, as they are the only ones
-        * that change.  Also precalculate the 3 link states of interest
-        * and the combined mask.
-        */
-       dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
-       dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
-       dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
-               dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
-       dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
-       dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
-       dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
-
-       /*
-        * Fill in data for ibcc field-values that change in newer chips.
-        * We dynamically specify only the mask for LINKINITCMD
-        * and only the shift for LINKCMD and MAXPKTLEN, as they are
-        * the only ones that change.
-        */
-       dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
-       dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
-       dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-
-       /* Fill in shifts for RcvCtrl. */
-       dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
-       dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
-       dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
-       dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
-
-       dd->ipath_i_bitsextant =
-               (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
-               (INFINIPATH_I_RCVAVAIL_MASK <<
-                INFINIPATH_I_RCVAVAIL_SHIFT) |
-               INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
-               INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-
-       dd->ipath_e_bitsextant =
-               INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
-               INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
-               INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
-               INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
-               INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
-               INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
-               INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-               INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
-               INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
-               INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
-               INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
-               INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
-               INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
-               INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
-               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
-               INFINIPATH_E_HARDWARE;
-
-       dd->ipath_hwe_bitsextant =
-               (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
-               INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
-               INFINIPATH_HWE_HTCMISCERR4 |
-               INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
-               INFINIPATH_HWE_HTCMISCERR7 |
-               INFINIPATH_HWE_HTCBUSTREQPARITYERR |
-               INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
-               INFINIPATH_HWE_HTCBUSIREQPARITYERR |
-               INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
-               INFINIPATH_HWE_MEMBISTFAILED |
-               INFINIPATH_HWE_COREPLL_FBSLIP |
-               INFINIPATH_HWE_COREPLL_RFSLIP |
-               INFINIPATH_HWE_HTBPLL_FBSLIP |
-               INFINIPATH_HWE_HTBPLL_RFSLIP |
-               INFINIPATH_HWE_HTAPLL_FBSLIP |
-               INFINIPATH_HWE_HTAPLL_RFSLIP |
-               INFINIPATH_HWE_SERDESPLLFAILED |
-               INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
-               INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-
-       dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-       dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-       dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
-       dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
-
-       /*
-        * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
-        * 2 is Some Misc, 3 is reserved for future.
-        */
-       dd->ipath_eep_st_masks[0].hwerrs_to_log =
-               INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-               INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
-
-       dd->ipath_eep_st_masks[1].hwerrs_to_log =
-               INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-               INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
-
-       dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
-
-       dd->delay_mult = 2; /* SDR, 4X, can't change */
-
-       dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
-       dd->ipath_link_speed_supported = IPATH_IB_SDR;
-       dd->ipath_link_width_enabled = IB_WIDTH_4X;
-       dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
-       /* these can't change for this chip, so set once */
-       dd->ipath_link_width_active = dd->ipath_link_width_enabled;
-       dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
-}
-
-/**
- * ipath_ht_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
-{
-       ipath_err_t val;
-       u64 extsval;
-
-       extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
-       if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
-               ipath_dev_err(dd, "MemBIST did not complete!\n");
-       if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
-               ipath_dbg("MemBIST corrected\n");
-
-       ipath_check_htlink(dd);
-
-       /* barring bugs, all hwerrors become interrupts, which can */
-       val = -1LL;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               val &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               val &= ~infinipath_hwe_htclnkbbyte1crcerr;
-
-       /*
-        * disable RXDSYNCMEMPARITY because external serdes is unused,
-        * and therefore the logic will never be used or initialized,
-        * and uninitialized state will normally result in this error
-        * being asserted.  Similarly for the external serdess pll
-        * lock signal.
-        */
-       val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
-                INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
-
-       /*
-        * Disable MISCERR4 because of an inversion in the HT core
-        * logic checking for errors that cause this bit to be set.
-        * The errata can also cause the protocol error bit to be set
-        * in the HT config space linkerror register(s).
-        */
-       val &= ~INFINIPATH_HWE_HTCMISCERR4;
-
-       /*
-        * PLL ignored because unused MDIO interface has a logic problem
-        */
-       if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
-               val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-       dd->ipath_hwerrmask = val;
-}
-
-
-
-
-/**
- * ipath_ht_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
-{
-       u64 val, config1;
-       int ret = 0, change = 0;
-
-       ipath_dbg("Trying to bringup serdes\n");
-
-       if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
-           INFINIPATH_HWE_SERDESPLLFAILED)
-       {
-               ipath_dbg("At start, serdes PLL failed bit set in "
-                         "hwerrstatus, clearing and continuing\n");
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                                INFINIPATH_HWE_SERDESPLLFAILED);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-       config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
-       ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       /* force reset on */
-       val |= INFINIPATH_SERDC0_RESET_PLL
-               /* | INFINIPATH_SERDC0_RESET_MASK */
-               ;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-       udelay(15);             /* need pll reset set at least for a bit */
-
-       if (val & INFINIPATH_SERDC0_RESET_PLL) {
-               u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
-               /* set lane resets, and tx idle, during pll reset */
-               val2 |= INFINIPATH_SERDC0_RESET_MASK |
-                       INFINIPATH_SERDC0_TXIDLE;
-               ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
-                          "%llx)\n", (unsigned long long) val2);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val2);
-               /*
-                * be sure chip saw it
-                */
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               /*
-                * need pll reset clear at least 11 usec before lane
-                * resets cleared; give it a few more
-                */
-               udelay(15);
-               val = val2;     /* for check below */
-       }
-
-       if (val & (INFINIPATH_SERDC0_RESET_PLL |
-                  INFINIPATH_SERDC0_RESET_MASK |
-                  INFINIPATH_SERDC0_TXIDLE)) {
-               val &= ~(INFINIPATH_SERDC0_RESET_PLL |
-                        INFINIPATH_SERDC0_RESET_MASK |
-                        INFINIPATH_SERDC0_TXIDLE);
-               /* clear them */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       if (val & INFINIPATH_XGXS_RESET) {
-               /* normally true after boot */
-               val &= ~INFINIPATH_XGXS_RESET;
-               change = 1;
-       }
-       if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
-            INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
-               /* need to compensate for Tx inversion in partner */
-               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT);
-               val |= dd->ipath_rx_pol_inv <<
-                       INFINIPATH_XGXS_RX_POL_SHIFT;
-               change = 1;
-       }
-       if (change)
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       /* clear current and de-emphasis bits */
-       config1 &= ~0x0ffffffff00ULL;
-       /* set current to 20ma */
-       config1 |= 0x00000000000ULL;
-       /* set de-emphasis to -5.68dB */
-       config1 |= 0x0cccc000000ULL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
-       ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       return ret;             /* for now, say we always succeeded */
-}
-
-/**
- * ipath_ht_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * driver is being unloaded
- */
-static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
-{
-       u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       val |= INFINIPATH_SERDC0_TXIDLE;
-       ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
-                 (unsigned long long) val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_ht_put_tid(struct ipath_devdata *dd,
-                            u64 __iomem *tidptr, u32 type,
-                            unsigned long pa)
-{
-       if (!dd->ipath_kregbase)
-               return;
-
-       if (pa != dd->ipath_tidinvalid) {
-               if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
-                       dev_info(&dd->pcidev->dev,
-                                "physaddr %lx has more than "
-                                "40 bits, using only 40!!!\n", pa);
-                       pa &= INFINIPATH_RT_ADDR_MASK;
-               }
-               if (type == RCVHQ_RCV_TYPE_EAGER)
-                       pa |= dd->ipath_tidtemplate;
-               else {
-                       /* in words (fixed, full page).  */
-                       u64 lenvalid = PAGE_SIZE >> 2;
-                       lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-                       pa |= lenvalid | INFINIPATH_RT_VALID;
-               }
-       }
-
-       writeq(pa, tidptr);
-}
-
-
-/**
- * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * Used from ipath_close(), and at chip initialization.
- */
-static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
-       u64 __iomem *tidbase;
-       int i;
-
-       if (!dd->ipath_kregbase)
-               return;
-
-       ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
-       /*
-        * need to invalidate all of the expected TID entries for this
-        * port, so we don't have valid entries that might somehow get
-        * used (early in next use of this port, or through some bug)
-        */
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  port * dd->ipath_rcvtidcnt *
-                                  sizeof(*tidbase));
-       for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
-                                dd->ipath_tidinvalid);
-
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvegrbase +
-                                  port * dd->ipath_rcvegrcnt *
-                                  sizeof(*tidbase));
-
-       for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
-                                dd->ipath_tidinvalid);
-}
-
-/**
- * ipath_ht_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
-{
-       dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
-       dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-       dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
-
-       /*
-        * work around chip errata bug 7358, by marking invalid tids
-        * as having max length
-        */
-       dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
-               INFINIPATH_RT_BUFSIZE_SHIFT;
-}
-
-static int ipath_ht_early_init(struct ipath_devdata *dd)
-{
-       u32 __iomem *piobuf;
-       u32 pioincr, val32;
-       int i;
-
-       /*
-        * one cache line; long IB headers will spill over into received
-        * buffer
-        */
-       dd->ipath_rcvhdrentsize = 16;
-       dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
-       /*
-        * For HT, we allocate a somewhat overly large eager buffer,
-        * such that we can guarantee that we can receive the largest
-        * packet that we can send out.  To truly support a 4KB MTU,
-        * we need to bump this to a large value.  To date, other than
-        * testing, we have never encountered an HCA that can really
-        * send 4KB MTU packets, so we do not handle that (we'll get
-        * errors interrupts if we ever see one).
-        */
-       dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
-
-       /*
-        * the min() check here is currently a nop, but it may not
-        * always be, depending on just how we do ipath_rcvegrbufsize
-        */
-       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
-                                dd->ipath_rcvegrbufsize);
-       dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-       ipath_ht_tidtemplate(dd);
-
-       /*
-        * zero all the TID entries at startup.  We do this for sanity,
-        * in case of a previous driver crash of some kind, and also
-        * because the chip powers up with these memories in an unknown
-        * state.  Use portcnt, not cfgports, since this is for the
-        * full chip, not for current (possibly different) configuration
-        * value.
-        * Chip Errata bug 6447
-        */
-       for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
-               ipath_ht_clear_tids(dd, val32);
-
-       /*
-        * write the pbc of each buffer, to be sure it's initialized, then
-        * cancel all the buffers, and also abort any packets that might
-        * have been in flight for some reason (the latter is for driver
-        * unload/reload, but isn't a bad idea at first init).  PIO send
-        * isn't enabled at this point, so there is no danger of sending
-        * these out on the wire.
-        * Chip Errata bug 6610
-        */
-       piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
-                                 dd->ipath_piobufbase);
-       pioincr = dd->ipath_palign / sizeof(*piobuf);
-       for (i = 0; i < dd->ipath_piobcnt2k; i++) {
-               /*
-                * reasonable word count, just to init pbc
-                */
-               writel(16, piobuf);
-               piobuf += pioincr;
-       }
-
-       ipath_get_eeprom_info(dd);
-       if (dd->ipath_boardrev == 5) {
-               /*
-                * Later production QHT7040 has same changes as QHT7140, so
-                * can use GPIO interrupts.  They have serial #'s starting
-                * with 128, rather than 112.
-                */
-               if (dd->ipath_serial[0] == '1' &&
-                   dd->ipath_serial[1] == '2' &&
-                   dd->ipath_serial[2] == '8')
-                       dd->ipath_flags |= IPATH_GPIO_INTR;
-               else {
-                       ipath_dev_err(dd, "Unsupported InfiniPath board "
-                               "(serial number %.16s)!\n",
-                               dd->ipath_serial);
-                       return 1;
-               }
-       }
-
-       if (dd->ipath_minrev >= 4) {
-               /* Rev4+ reports extra errors via internal GPIO pins */
-               dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
-               dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-       }
-
-       return 0;
-}
-
-
-/**
- * ipath_init_ht_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithms.
- */
-static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
-       struct ipath_base_info *kinfo = kbase;
-
-       kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
-               IPATH_RUNTIME_PIO_REGSWAPPED;
-
-       if (pd->port_dd->ipath_minrev < 4)
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
-
-       return 0;
-}
-
-static void ipath_ht_free_irq(struct ipath_devdata *dd)
-{
-       free_irq(dd->ipath_irq, dd);
-       ht_destroy_irq(dd->ipath_irq);
-       dd->ipath_irq = 0;
-       dd->ipath_intconfig = 0;
-}
-
-static struct ipath_message_header *
-ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
-{
-       return (struct ipath_message_header *)
-               &rhf_addr[sizeof(u64) / sizeof(u32)];
-}
-
-static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
-{
-       dd->ipath_portcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
-       dd->ipath_p0_rcvegrcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-}
-
-static void ipath_ht_read_counters(struct ipath_devdata *dd,
-                                  struct infinipath_counters *cntrs)
-{
-       cntrs->LBIntCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
-       cntrs->LBFlowStallCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
-       cntrs->TxSDmaDescCnt = 0;
-       cntrs->TxUnsupVLErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
-       cntrs->TxDataPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
-       cntrs->TxFlowPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
-       cntrs->TxDwordCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
-       cntrs->TxLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
-       cntrs->TxMaxMinLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
-       cntrs->TxUnderrunCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
-       cntrs->TxFlowStallCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
-       cntrs->TxDroppedPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
-       cntrs->RxDroppedPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
-       cntrs->RxDataPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
-       cntrs->RxFlowPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
-       cntrs->RxDwordCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
-       cntrs->RxLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
-       cntrs->RxMaxMinLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
-       cntrs->RxICRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
-       cntrs->RxVCRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
-       cntrs->RxFlowCtrlErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
-       cntrs->RxBadFormatCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
-       cntrs->RxLinkProblemCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
-       cntrs->RxEBPCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
-       cntrs->RxLPCRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
-       cntrs->RxBufOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
-       cntrs->RxTIDFullErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
-       cntrs->RxTIDValidErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
-       cntrs->RxPKeyMismatchCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
-       cntrs->RxP0HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
-       cntrs->RxP1HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
-       cntrs->RxP2HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
-       cntrs->RxP3HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
-       cntrs->RxP4HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
-       cntrs->RxP5HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
-       cntrs->RxP6HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
-       cntrs->RxP7HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
-       cntrs->RxP8HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
-       cntrs->RxP9HdrEgrOvflCnt = 0;
-       cntrs->RxP10HdrEgrOvflCnt = 0;
-       cntrs->RxP11HdrEgrOvflCnt = 0;
-       cntrs->RxP12HdrEgrOvflCnt = 0;
-       cntrs->RxP13HdrEgrOvflCnt = 0;
-       cntrs->RxP14HdrEgrOvflCnt = 0;
-       cntrs->RxP15HdrEgrOvflCnt = 0;
-       cntrs->RxP16HdrEgrOvflCnt = 0;
-       cntrs->IBStatusChangeCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
-       cntrs->IBLinkErrRecoveryCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
-       cntrs->IBLinkDownedCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
-       cntrs->IBSymbolErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
-       cntrs->RxVL15DroppedPktCnt = 0;
-       cntrs->RxOtherLocalPhyErrCnt = 0;
-       cntrs->PcieRetryBufDiagQwordCnt = 0;
-       cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
-       cntrs->LocalLinkIntegrityErrCnt =
-               (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-               dd->ipath_lli_errs : dd->ipath_lli_errors;
-       cntrs->RxVlErrCnt = 0;
-       cntrs->RxDlidFltrCnt = 0;
-}
-
-
-/* no interrupt fallback for these chips */
-static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
-{
-       return 0;
-}
-
-
-/*
- * reset the XGXS (between serdes and IBC).  Slightly less intrusive
- * than resetting the IBC or external link state, and useful in some
- * cases to cause some retraining.  To do this right, we reset IBC
- * as well.
- */
-static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
-{
-       u64 val, prev_val;
-
-       prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       val = prev_val | INFINIPATH_XGXS_RESET;
-       prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-       ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-}
-
-
-static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
-{
-       int ret;
-
-       switch (which) {
-       case IPATH_IB_CFG_LWID:
-               ret = dd->ipath_link_width_active;
-               break;
-       case IPATH_IB_CFG_SPD:
-               ret = dd->ipath_link_speed_active;
-               break;
-       case IPATH_IB_CFG_LWID_ENB:
-               ret = dd->ipath_link_width_enabled;
-               break;
-       case IPATH_IB_CFG_SPD_ENB:
-               ret = dd->ipath_link_speed_enabled;
-               break;
-       default:
-               ret =  -ENOTSUPP;
-               break;
-       }
-       return ret;
-}
-
-
-/* we assume range checking is already done, if needed */
-static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
-{
-       int ret = 0;
-
-       if (which == IPATH_IB_CFG_LWID_ENB)
-               dd->ipath_link_width_enabled = val;
-       else if (which == IPATH_IB_CFG_SPD_ENB)
-               dd->ipath_link_speed_enabled = val;
-       else
-               ret = -ENOTSUPP;
-       return ret;
-}
-
-
-static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
-{
-}
-
-
-static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
-{
-       ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
-               ipath_ib_linktrstate(dd, ibcs));
-       return 0;
-}
-
-
-/**
- * ipath_init_iba6110_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
-{
-       dd->ipath_f_intrsetup = ipath_ht_intconfig;
-       dd->ipath_f_bus = ipath_setup_ht_config;
-       dd->ipath_f_reset = ipath_setup_ht_reset;
-       dd->ipath_f_get_boardname = ipath_ht_boardname;
-       dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
-       dd->ipath_f_early_init = ipath_ht_early_init;
-       dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
-       dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
-       dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
-       dd->ipath_f_clear_tids = ipath_ht_clear_tids;
-       dd->ipath_f_put_tid = ipath_ht_put_tid;
-       dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
-       dd->ipath_f_setextled = ipath_setup_ht_setextled;
-       dd->ipath_f_get_base_info = ipath_ht_get_base_info;
-       dd->ipath_f_free_irq = ipath_ht_free_irq;
-       dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
-       dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
-       dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
-       dd->ipath_f_config_ports = ipath_ht_config_ports;
-       dd->ipath_f_read_counters = ipath_ht_read_counters;
-       dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
-       dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
-       dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
-       dd->ipath_f_config_jint = ipath_ht_config_jint;
-       dd->ipath_f_ib_updown = ipath_ht_ib_updown;
-
-       /*
-        * initialize chip-specific variables
-        */
-       ipath_init_ht_variables(dd);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
deleted file mode 100644 (file)
index a5eea19..0000000
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-/*
- * min buffers we want to have per port, after driver
- */
-#define IPATH_MIN_USER_PORT_BUFCNT 7
-
-/*
- * Number of ports we are configured to use (to allow for more pio
- * buffers per port, etc.)  Zero means use chip value.
- */
-static ushort ipath_cfgports;
-
-module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
-MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
-
-/*
- * Number of buffers reserved for driver (verbs and layered drivers.)
- * Initialized based on number of PIO buffers if not set via module interface.
- * The problem with this is that it's global, but we'll use different
- * numbers for different chip types.
- */
-static ushort ipath_kpiobufs;
-
-static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
-
-module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
-                 &ipath_kpiobufs, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
-
-/**
- * create_port0_egr - allocate the eager TID buffers
- * @dd: the infinipath device
- *
- * This code is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance.
- * This is the kernel (port0) version.
- *
- * Allocate the eager TID buffers and program them into infinipath.
- * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like verbs
- * packets, or pass the buffers up to the ipath layered driver and
- * thence the network layer, replacing them as we do so (see
- * ipath_rcv_layer()).
- */
-static int create_port0_egr(struct ipath_devdata *dd)
-{
-       unsigned e, egrcnt;
-       struct ipath_skbinfo *skbinfo;
-       int ret;
-
-       egrcnt = dd->ipath_p0_rcvegrcnt;
-
-       skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
-       if (skbinfo == NULL) {
-               ipath_dev_err(dd, "allocation error for eager TID "
-                             "skb array\n");
-               ret = -ENOMEM;
-               goto bail;
-       }
-       for (e = 0; e < egrcnt; e++) {
-               /*
-                * This is a bit tricky in that we allocate extra
-                * space for 2 bytes of the 14 byte ethernet header.
-                * These two bytes are passed in the ipath header so
-                * the rest of the data is word aligned.  We allocate
-                * 4 bytes so that the data buffer stays word aligned.
-                * See ipath_kreceive() for more details.
-                */
-               skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
-               if (!skbinfo[e].skb) {
-                       ipath_dev_err(dd, "SKB allocation error for "
-                                     "eager TID %u\n", e);
-                       while (e != 0)
-                               dev_kfree_skb(skbinfo[--e].skb);
-                       vfree(skbinfo);
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-       }
-       /*
-        * After loop above, so we can test non-NULL to see if ready
-        * to use at receive, etc.
-        */
-       dd->ipath_port0_skbinfo = skbinfo;
-
-       for (e = 0; e < egrcnt; e++) {
-               dd->ipath_port0_skbinfo[e].phys =
-                 ipath_map_single(dd->pcidev,
-                                  dd->ipath_port0_skbinfo[e].skb->data,
-                                  dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
-               dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
-                                   ((char __iomem *) dd->ipath_kregbase +
-                                    dd->ipath_rcvegrbase),
-                                   RCVHQ_RCV_TYPE_EAGER,
-                                   dd->ipath_port0_skbinfo[e].phys);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int bringup_link(struct ipath_devdata *dd)
-{
-       u64 val, ibc;
-       int ret = 0;
-
-       /* hold IBC in reset */
-       dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-
-       /*
-        * set initial max size pkt IBC will send, including ICRC; it's the
-        * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
-        */
-       val = (dd->ipath_ibmaxlen >> 2) + 1;
-       ibc = val << dd->ibcc_mpl_shift;
-
-       /* flowcontrolwatermark is in units of KBytes */
-       ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
-       /*
-        * How often flowctrl sent.  More or less in usecs; balance against
-        * watermark value, so that in theory senders always get a flow
-        * control update in time to not let the IB link go idle.
-        */
-       ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
-       /* max error tolerance */
-       ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-       /* use "real" buffer space for */
-       ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
-       /* IB credit flow control. */
-       ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-       /* initially come up waiting for TS1, without sending anything. */
-       dd->ipath_ibcctrl = ibc;
-       /*
-        * Want to start out with both LINKCMD and LINKINITCMD in NOP
-        * (0 and 0).  Don't put linkinitcmd in ipath_ibcctrl, want that
-        * to stay a NOP. Flag that we are disabled, for the (unlikely)
-        * case that some recovery path is trying to bring the link up
-        * before we are ready.
-        */
-       ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-               INFINIPATH_IBCC_LINKINITCMD_SHIFT;
-       dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-       ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
-                  (unsigned long long) ibc);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
-
-       // be sure chip saw it
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-       ret = dd->ipath_f_bringup_serdes(dd);
-
-       if (ret)
-               dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
-                        "not usable\n");
-       else {
-               /* enable IBC */
-               dd->ipath_control |= INFINIPATH_C_LINKENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                                dd->ipath_control);
-       }
-
-       return ret;
-}
-
-static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
-{
-       struct ipath_portdata *pd;
-
-       pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-       if (pd) {
-               pd->port_dd = dd;
-               pd->port_cnt = 1;
-               /* The port 0 pkey table is used by the layer interface. */
-               pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
-               pd->port_seq_cnt = 1;
-       }
-       return pd;
-}
-
-static int init_chip_first(struct ipath_devdata *dd)
-{
-       struct ipath_portdata *pd;
-       int ret = 0;
-       u64 val;
-
-       spin_lock_init(&dd->ipath_kernel_tid_lock);
-       spin_lock_init(&dd->ipath_user_tid_lock);
-       spin_lock_init(&dd->ipath_sendctrl_lock);
-       spin_lock_init(&dd->ipath_uctxt_lock);
-       spin_lock_init(&dd->ipath_sdma_lock);
-       spin_lock_init(&dd->ipath_gpio_lock);
-       spin_lock_init(&dd->ipath_eep_st_lock);
-       spin_lock_init(&dd->ipath_sdepb_lock);
-       mutex_init(&dd->ipath_eep_lock);
-
-       /*
-        * skip cfgports stuff because we are not allocating memory,
-        * and we don't want problems if the portcnt changed due to
-        * cfgports.  We do still check and report a difference, if
-        * not same (should be impossible).
-        */
-       dd->ipath_f_config_ports(dd, ipath_cfgports);
-       if (!ipath_cfgports)
-               dd->ipath_cfgports = dd->ipath_portcnt;
-       else if (ipath_cfgports <= dd->ipath_portcnt) {
-               dd->ipath_cfgports = ipath_cfgports;
-               ipath_dbg("Configured to use %u ports out of %u in chip\n",
-                         dd->ipath_cfgports, ipath_read_kreg32(dd,
-                         dd->ipath_kregs->kr_portcnt));
-       } else {
-               dd->ipath_cfgports = dd->ipath_portcnt;
-               ipath_dbg("Tried to configured to use %u ports; chip "
-                         "only supports %u\n", ipath_cfgports,
-                         ipath_read_kreg32(dd,
-                                 dd->ipath_kregs->kr_portcnt));
-       }
-       /*
-        * Allocate full portcnt array, rather than just cfgports, because
-        * cleanup iterates across all possible ports.
-        */
-       dd->ipath_pd = kcalloc(dd->ipath_portcnt, sizeof(*dd->ipath_pd),
-                              GFP_KERNEL);
-
-       if (!dd->ipath_pd) {
-               ipath_dev_err(dd, "Unable to allocate portdata array, "
-                             "failing\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       pd = create_portdata0(dd);
-       if (!pd) {
-               ipath_dev_err(dd, "Unable to allocate portdata for port "
-                             "0, failing\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-       dd->ipath_pd[0] = pd;
-
-       dd->ipath_rcvtidcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-       dd->ipath_rcvtidbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-       dd->ipath_rcvegrcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-       dd->ipath_rcvegrbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-       dd->ipath_palign =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-       dd->ipath_piobufbase =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
-       dd->ipath_piosize2k = val & ~0U;
-       dd->ipath_piosize4k = val >> 32;
-       if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
-               ipath_mtu4096 = 0; /* 4KB not supported by this chip */
-       dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
-       dd->ipath_piobcnt2k = val & ~0U;
-       dd->ipath_piobcnt4k = val >> 32;
-       dd->ipath_pio2kbase =
-               (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-                                (dd->ipath_piobufbase & 0xffffffff));
-       if (dd->ipath_piobcnt4k) {
-               dd->ipath_pio4kbase = (u32 __iomem *)
-                       (((char __iomem *) dd->ipath_kregbase) +
-                        (dd->ipath_piobufbase >> 32));
-               /*
-                * 4K buffers take 2 pages; we use roundup just to be
-                * paranoid; we calculate it once here, rather than on
-                * ever buf allocate
-                */
-               dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
-                                         dd->ipath_palign);
-               ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
-                         "(%x aligned)\n",
-                         dd->ipath_piobcnt2k, dd->ipath_piosize2k,
-                         dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
-                         dd->ipath_piosize4k, dd->ipath_pio4kbase,
-                         dd->ipath_4kalign);
-       } else {
-               ipath_dbg("%u 2k piobufs @ %p\n",
-                         dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
-       }
-done:
-       return ret;
-}
-
-/**
- * init_chip_reset - re-initialize after a reset, or enable
- * @dd: the infinipath device
- *
- * sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explicitly, in case reset
- * failed
- */
-static int init_chip_reset(struct ipath_devdata *dd)
-{
-       u32 rtmp;
-       int i;
-       unsigned long flags;
-
-       /*
-        * ensure chip does no sends or receives, tail updates, or
-        * pioavail updates while we re-initialize
-        */
-       dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
-       for (i = 0; i < dd->ipath_portcnt; i++) {
-               clear_bit(dd->ipath_r_portenable_shift + i,
-                         &dd->ipath_rcvctrl);
-               clear_bit(dd->ipath_r_intravail_shift + i,
-                         &dd->ipath_rcvctrl);
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-               dd->ipath_rcvctrl);
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl = 0U; /* no sdma, etc */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-       if (rtmp != dd->ipath_rcvtidcnt)
-               dev_info(&dd->pcidev->dev, "tidcnt was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvtidcnt, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-       if (rtmp != dd->ipath_rcvtidbase)
-               dev_info(&dd->pcidev->dev, "tidbase was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvtidbase, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-       if (rtmp != dd->ipath_rcvegrcnt)
-               dev_info(&dd->pcidev->dev, "egrcnt was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvegrcnt, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-       if (rtmp != dd->ipath_rcvegrbase)
-               dev_info(&dd->pcidev->dev, "egrbase was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvegrbase, rtmp);
-
-       return 0;
-}
-
-static int init_pioavailregs(struct ipath_devdata *dd)
-{
-       int ret;
-
-       dd->ipath_pioavailregs_dma = dma_alloc_coherent(
-               &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
-               GFP_KERNEL);
-       if (!dd->ipath_pioavailregs_dma) {
-               ipath_dev_err(dd, "failed to allocate PIOavail reg area "
-                             "in memory\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       /*
-        * we really want L2 cache aligned, but for current CPUs of
-        * interest, they are the same.
-        */
-       dd->ipath_statusp = (u64 *)
-               ((char *)dd->ipath_pioavailregs_dma +
-                ((2 * L1_CACHE_BYTES +
-                  dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
-       /* copy the current value now that it's really allocated */
-       *dd->ipath_statusp = dd->_ipath_status;
-       /*
-        * setup buffer to hold freeze msg, accessible to apps,
-        * following statusp
-        */
-       dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
-       /* and its length */
-       dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
-
-       ret = 0;
-
-done:
-       return ret;
-}
-
-/**
- * init_shadow_tids - allocate the shadow TID array
- * @dd: the infinipath device
- *
- * allocate the shadow TID array, so we can ipath_munlock previous
- * entries.  It may make more sense to move the pageshadow to the
- * port data structure, so we only allocate memory for ports actually
- * in use, since we at 8k per port, now.
- */
-static void init_shadow_tids(struct ipath_devdata *dd)
-{
-       struct page **pages;
-       dma_addr_t *addrs;
-
-       pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-                       sizeof(struct page *));
-       if (!pages) {
-               ipath_dev_err(dd, "failed to allocate shadow page * "
-                             "array, no expected sends!\n");
-               dd->ipath_pageshadow = NULL;
-               return;
-       }
-
-       addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-                       sizeof(dma_addr_t));
-       if (!addrs) {
-               ipath_dev_err(dd, "failed to allocate shadow dma handle "
-                             "array, no expected sends!\n");
-               vfree(pages);
-               dd->ipath_pageshadow = NULL;
-               return;
-       }
-
-       dd->ipath_pageshadow = pages;
-       dd->ipath_physshadow = addrs;
-}
-
-static void enable_chip(struct ipath_devdata *dd, int reinit)
-{
-       u32 val;
-       u64 rcvmask;
-       unsigned long flags;
-       int i;
-
-       if (!reinit)
-               init_waitqueue_head(&ipath_state_wait);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       /* Enable PIO send, and update of PIOavail regs to memory. */
-       dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
-               INFINIPATH_S_PIOBUFAVAILUPD;
-
-       /*
-        * Set the PIO avail update threshold to host memory
-        * on chips that support it.
-        */
-       if (dd->ipath_pioupd_thresh)
-               dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-                       << INFINIPATH_S_UPDTHRESH_SHIFT;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /*
-        * Enable kernel ports' receive and receive interrupt.
-        * Other ports done as user opens and inits them.
-        */
-       rcvmask = 1ULL;
-       dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
-               (rcvmask << dd->ipath_r_intravail_shift);
-       if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
-               dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       /*
-        * now ready for use.  this should be cleared whenever we
-        * detect a reset, or initiate one.
-        */
-       dd->ipath_flags |= IPATH_INITTED;
-
-       /*
-        * Init our shadow copies of head from tail values,
-        * and write head values to match.
-        */
-       val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
-       ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
-
-       /* Initialize so we interrupt on next packet received */
-       ipath_write_ureg(dd, ur_rcvhdrhead,
-                        dd->ipath_rhdrhead_intr_off |
-                        dd->ipath_pd[0]->port_head, 0);
-
-       /*
-        * by now pioavail updates to memory should have occurred, so
-        * copy them into our working/shadow registers; this is in
-        * case something went wrong with abort, but mostly to get the
-        * initial values of the generation bit correct.
-        */
-       for (i = 0; i < dd->ipath_pioavregs; i++) {
-               __le64 pioavail;
-
-               /*
-                * Chip Errata bug 6641; even and odd qwords>3 are swapped.
-                */
-               if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-                       pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
-               else
-                       pioavail = dd->ipath_pioavailregs_dma[i];
-               /*
-                * don't need to worry about ipath_pioavailkernel here
-                * because we will call ipath_chg_pioavailkernel() later
-                * in initialization, to busy out buffers as needed
-                */
-               dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
-       }
-       /* can get counters, stats, etc. */
-       dd->ipath_flags |= IPATH_PRESENT;
-}
-
-static int init_housekeeping(struct ipath_devdata *dd, int reinit)
-{
-       char boardn[40];
-       int ret = 0;
-
-       /*
-        * have to clear shadow copies of registers at init that are
-        * not otherwise set here, or all kinds of bizarre things
-        * happen with driver on chip reset
-        */
-       dd->ipath_rcvhdrsize = 0;
-
-       /*
-        * Don't clear ipath_flags as 8bit mode was set before
-        * entering this func. However, we do set the linkstate to
-        * unknown, so we can watch for a transition.
-        * PRESENT is set because we want register reads to work,
-        * and the kernel infrastructure saw it in config space;
-        * We clear it if we have failures.
-        */
-       dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
-       dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
-                            IPATH_LINKDOWN | IPATH_LINKINIT);
-
-       ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
-       dd->ipath_revision =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
-
-       /*
-        * set up fundamental info we need to use the chip; we assume
-        * if the revision reg and these regs are OK, we don't need to
-        * special case the rest
-        */
-       dd->ipath_sregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
-       dd->ipath_cregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
-       dd->ipath_uregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
-       ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
-                  "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
-                  dd->ipath_uregbase, dd->ipath_cregbase);
-       if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
-           || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
-           || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
-           || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
-               ipath_dev_err(dd, "Register read failures from chip, "
-                             "giving up initialization\n");
-               dd->ipath_flags &= ~IPATH_PRESENT;
-               ret = -ENODEV;
-               goto done;
-       }
-
-
-       /* clear diagctrl register, in case diags were running and crashed */
-       ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
-
-       /* clear the initial reset flag, in case first driver load */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-                        INFINIPATH_E_RESET);
-
-       ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
-                  (unsigned long long) dd->ipath_revision,
-                  dd->ipath_pcirev);
-
-       if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
-            INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
-               ipath_dev_err(dd, "Driver only handles version %d, "
-                             "chip swversion is %d (%llx), failng\n",
-                             IPATH_CHIP_SWVERSION,
-                             (int)(dd->ipath_revision >>
-                                   INFINIPATH_R_SOFTWARE_SHIFT) &
-                             INFINIPATH_R_SOFTWARE_MASK,
-                             (unsigned long long) dd->ipath_revision);
-               ret = -ENOSYS;
-               goto done;
-       }
-       dd->ipath_majrev = (u8) ((dd->ipath_revision >>
-                                 INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
-                                INFINIPATH_R_CHIPREVMAJOR_MASK);
-       dd->ipath_minrev = (u8) ((dd->ipath_revision >>
-                                 INFINIPATH_R_CHIPREVMINOR_SHIFT) &
-                                INFINIPATH_R_CHIPREVMINOR_MASK);
-       dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
-                                   INFINIPATH_R_BOARDID_SHIFT) &
-                                  INFINIPATH_R_BOARDID_MASK);
-
-       ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
-
-       snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
-                "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
-                "SW Compat %u\n",
-                IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
-                (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
-                INFINIPATH_R_ARCH_MASK,
-                dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
-                (unsigned)(dd->ipath_revision >>
-                           INFINIPATH_R_SOFTWARE_SHIFT) &
-                INFINIPATH_R_SOFTWARE_MASK);
-
-       ipath_dbg("%s", dd->ipath_boardversion);
-
-       if (ret)
-               goto done;
-
-       if (reinit)
-               ret = init_chip_reset(dd);
-       else
-               ret = init_chip_first(dd);
-
-done:
-       return ret;
-}
-
-static void verify_interrupt(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-
-       if (!dd)
-               return; /* being torn down */
-
-       /*
-        * If we don't have any interrupts, let the user know and
-        * don't bother checking again.
-        */
-       if (dd->ipath_int_counter == 0) {
-               if (!dd->ipath_f_intr_fallback(dd))
-                       dev_err(&dd->pcidev->dev, "No interrupts detected, "
-                               "not usable.\n");
-               else /* re-arm the timer to see if fallback works */
-                       mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
-       } else
-               ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
-                       dd->ipath_int_counter);
-}
-
-/**
- * ipath_init_chip - do the actual initialization sequence on the chip
- * @dd: the infinipath device
- * @reinit: reinitializing, so don't allocate new memory
- *
- * Do the actual initialization sequence on the chip.  This is done
- * both from the init routine called from the PCI infrastructure, and
- * when we reset the chip, or detect that it was reset internally,
- * or it's administratively re-enabled.
- *
- * Memory allocation here and in called routines is only done in
- * the first case (reinit == 0).  We have to be careful, because even
- * without memory allocation, we need to re-write all the chip registers
- * TIDs, etc. after the reset or enable has completed.
- */
-int ipath_init_chip(struct ipath_devdata *dd, int reinit)
-{
-       int ret = 0;
-       u32 kpiobufs, defkbufs;
-       u32 piobufs, uports;
-       u64 val;
-       struct ipath_portdata *pd;
-       gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-
-       ret = init_housekeeping(dd, reinit);
-       if (ret)
-               goto done;
-
-       /*
-        * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
-        * but then it no longer nicely fits power of two, and since
-        * we now use routines that backend onto __get_free_pages, the
-        * rest would be wasted.
-        */
-       dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
-                        dd->ipath_rcvhdrcnt);
-
-       /*
-        * Set up the shadow copies of the piobufavail registers,
-        * which we compare against the chip registers for now, and
-        * the in memory DMA'ed copies of the registers.  This has to
-        * be done early, before we calculate lastport, etc.
-        */
-       piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       /*
-        * calc number of pioavail registers, and save it; we have 2
-        * bits per buffer.
-        */
-       dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
-               / (sizeof(u64) * BITS_PER_BYTE / 2);
-       uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
-       if (piobufs > 144)
-               defkbufs = 32 + dd->ipath_pioreserved;
-       else
-               defkbufs = 16 + dd->ipath_pioreserved;
-
-       if (ipath_kpiobufs && (ipath_kpiobufs +
-               (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
-               int i = (int) piobufs -
-                       (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
-               if (i < 1)
-                       i = 1;
-               dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
-                        "%d for kernel leaves too few for %d user ports "
-                        "(%d each); using %u\n", ipath_kpiobufs,
-                        piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
-               /*
-                * shouldn't change ipath_kpiobufs, because could be
-                * different for different devices...
-                */
-               kpiobufs = i;
-       } else if (ipath_kpiobufs)
-               kpiobufs = ipath_kpiobufs;
-       else
-               kpiobufs = defkbufs;
-       dd->ipath_lastport_piobuf = piobufs - kpiobufs;
-       dd->ipath_pbufsport =
-               uports ? dd->ipath_lastport_piobuf / uports : 0;
-       /* if not an even divisor, some user ports get extra buffers */
-       dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
-               (dd->ipath_pbufsport * uports);
-       if (dd->ipath_ports_extrabuf)
-               ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
-                       "ports <= %u\n", dd->ipath_pbufsport,
-                       dd->ipath_ports_extrabuf);
-       dd->ipath_lastpioindex = 0;
-       dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
-       /* ipath_pioavailshadow initialized earlier */
-       ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
-                  "each for %u user ports\n", kpiobufs,
-                  piobufs, dd->ipath_pbufsport, uports);
-       ret = dd->ipath_f_early_init(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Early initialization failure\n");
-               goto done;
-       }
-
-       /*
-        * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
-        * done after early_init.
-        */
-       dd->ipath_hdrqlast =
-               dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
-                        dd->ipath_rcvhdrentsize);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-                        dd->ipath_rcvhdrsize);
-
-       if (!reinit) {
-               ret = init_pioavailregs(dd);
-               init_shadow_tids(dd);
-               if (ret)
-                       goto done;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
-                        dd->ipath_pioavailregs_phys);
-
-       /*
-        * this is to detect s/w errors, which the h/w works around by
-        * ignoring the low 6 bits of address, if it wasn't aligned.
-        */
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
-       if (val != dd->ipath_pioavailregs_phys) {
-               ipath_dev_err(dd, "Catastrophic software error, "
-                             "SendPIOAvailAddr written as %lx, "
-                             "read back as %llx\n",
-                             (unsigned long) dd->ipath_pioavailregs_phys,
-                             (unsigned long long) val);
-               ret = -EINVAL;
-               goto done;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
-
-       /*
-        * make sure we are not in freeze, and PIO send enabled, so
-        * writes to pbc happen
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-       /*
-        * before error clears, since we expect serdes pll errors during
-        * this, the first time after reset
-        */
-       if (bringup_link(dd)) {
-               dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
-               ret = -ENETDOWN;
-               goto done;
-       }
-
-       /*
-        * clear any "expected" hwerrs from reset and/or initialization
-        * clear any that aren't enabled (at least this once), and then
-        * set the enable mask
-        */
-       dd->ipath_f_init_hwerrors(dd);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                        dd->ipath_hwerrmask);
-
-       /* clear all */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-       /* enable errors that are masked, at least this first time. */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                        ~dd->ipath_maskederrs);
-       dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
-       dd->ipath_errormask =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-       /* clear any interrupts up to this point (ints still not enabled) */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-       dd->ipath_f_tidtemplate(dd);
-
-       /*
-        * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
-        * re-init, the simplest way to handle this is to free
-        * existing, and re-allocate.
-        * Need to re-create rest of port 0 portdata as well.
-        */
-       pd = dd->ipath_pd[0];
-       if (reinit) {
-               struct ipath_portdata *npd;
-
-               /*
-                * Alloc and init new ipath_portdata for port0,
-                * Then free old pd. Could lead to fragmentation, but also
-                * makes later support for hot-swap easier.
-                */
-               npd = create_portdata0(dd);
-               if (npd) {
-                       ipath_free_pddata(dd, pd);
-                       dd->ipath_pd[0] = npd;
-                       pd = npd;
-               } else {
-                       ipath_dev_err(dd, "Unable to allocate portdata"
-                                     " for port 0, failing\n");
-                       ret = -ENOMEM;
-                       goto done;
-               }
-       }
-       ret = ipath_create_rcvhdrq(dd, pd);
-       if (!ret)
-               ret = create_port0_egr(dd);
-       if (ret) {
-               ipath_dev_err(dd, "failed to allocate kernel port's "
-                             "rcvhdrq and/or egr bufs\n");
-               goto done;
-       } else {
-               enable_chip(dd, reinit);
-       }
-
-       /* after enable_chip, so pioavailshadow setup */
-       ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
-
-       /*
-        * Cancel any possible active sends from early driver load.
-        * Follows early_init because some chips have to initialize
-        * PIO buffers in early_init to avoid false parity errors.
-        * After enable and ipath_chg_pioavailkernel so we can safely
-        * enable pioavail updates and PIOENABLE; packets are now
-        * ready to go out.
-        */
-       ipath_cancel_sends(dd, 1);
-
-       if (!reinit) {
-               /*
-                * Used when we close a port, for DMA already in flight
-                * at close.
-                */
-               dd->ipath_dummy_hdrq = dma_alloc_coherent(
-                       &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
-                       &dd->ipath_dummy_hdrq_phys,
-                       gfp_flags);
-               if (!dd->ipath_dummy_hdrq) {
-                       dev_info(&dd->pcidev->dev,
-                               "Couldn't allocate 0x%lx bytes for dummy hdrq\n",
-                               dd->ipath_pd[0]->port_rcvhdrq_size);
-                       /* fallback to just 0'ing */
-                       dd->ipath_dummy_hdrq_phys = 0UL;
-               }
-       }
-
-       /*
-        * cause retrigger of pending interrupts ignored during init,
-        * even if we had errors
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-
-       if (!dd->ipath_stats_timer_active) {
-               /*
-                * first init, or after an admin disable/enable
-                * set up stats retrieval timer, even if we had errors
-                * in last portion of setup
-                */
-               setup_timer(&dd->ipath_stats_timer, ipath_get_faststats,
-                               (unsigned long)dd);
-               /* every 5 seconds; */
-               dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
-               /* takes ~16 seconds to overflow at full IB 4x bandwdith */
-               add_timer(&dd->ipath_stats_timer);
-               dd->ipath_stats_timer_active = 1;
-       }
-
-       /* Set up SendDMA if chip supports it */
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               ret = setup_sdma(dd);
-
-       /* Set up HoL state */
-       setup_timer(&dd->ipath_hol_timer, ipath_hol_event, (unsigned long)dd);
-
-       dd->ipath_hol_state = IPATH_HOL_UP;
-
-done:
-       if (!ret) {
-               *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
-               if (!dd->ipath_f_intrsetup(dd)) {
-                       /* now we can enable all interrupts from the chip */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-                                        -1LL);
-                       /* force re-interrupt of any pending interrupts. */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
-                                        0ULL);
-                       /* chip is usable; mark it as initialized */
-                       *dd->ipath_statusp |= IPATH_STATUS_INITTED;
-
-                       /*
-                        * setup to verify we get an interrupt, and fallback
-                        * to an alternate if necessary and possible
-                        */
-                       if (!reinit) {
-                               setup_timer(&dd->ipath_intrchk_timer,
-                                               verify_interrupt,
-                                               (unsigned long)dd);
-                       }
-                       dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
-                       add_timer(&dd->ipath_intrchk_timer);
-               } else
-                       ipath_dev_err(dd, "No interrupts enabled, couldn't "
-                                     "setup interrupt address\n");
-
-               if (dd->ipath_cfgports > ipath_stats.sps_nports)
-                       /*
-                        * sps_nports is a global, so, we set it to
-                        * the highest number of ports of any of the
-                        * chips we find; we never decrement it, at
-                        * least for now.  Since this might have changed
-                        * over disable/enable or prior to reset, always
-                        * do the check and potentially adjust.
-                        */
-                       ipath_stats.sps_nports = dd->ipath_cfgports;
-       } else
-               ipath_dbg("Failed (%d) to initialize chip\n", ret);
-
-       /* if ret is non-zero, we probably should do some cleanup
-          here... */
-       return ret;
-}
-
-static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
-{
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       unsigned short val;
-       int ret;
-
-       ret = ipath_parse_ushort(str, &val);
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       if (ret < 0)
-               goto bail;
-
-       if (val == 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-               if (dd->ipath_kregbase)
-                       continue;
-               if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-                          (dd->ipath_cfgports *
-                           IPATH_MIN_USER_PORT_BUFCNT)))
-               {
-                       ipath_dev_err(
-                               dd,
-                               "Allocating %d PIO bufs for kernel leaves "
-                               "too few for %d user ports (%d each)\n",
-                               val, dd->ipath_cfgports - 1,
-                               IPATH_MIN_USER_PORT_BUFCNT);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               dd->ipath_lastport_piobuf =
-                       dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
-       }
-
-       ipath_kpiobufs = val;
-       ret = 0;
-bail:
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_intr.c b/drivers/staging/rdma/ipath/ipath_intr.c
deleted file mode 100644 (file)
index 0403fa2..0000000
+++ /dev/null
@@ -1,1271 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-
-/*
- * Called when we might have an error that is specific to a particular
- * PIO buffer, and may need to cancel that buffer, so it can be re-used.
- */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
-{
-       u32 piobcnt;
-       unsigned long sbuf[4];
-       /*
-        * it's possible that sendbuffererror could have bits set; might
-        * have already done this as a result of hardware error handling
-        */
-       piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       /* read these before writing errorclear */
-       sbuf[0] = ipath_read_kreg64(
-               dd, dd->ipath_kregs->kr_sendbuffererror);
-       sbuf[1] = ipath_read_kreg64(
-               dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-       if (piobcnt > 128)
-               sbuf[2] = ipath_read_kreg64(
-                       dd, dd->ipath_kregs->kr_sendbuffererror + 2);
-       if (piobcnt > 192)
-               sbuf[3] = ipath_read_kreg64(
-                       dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-       else
-               sbuf[3] = 0;
-
-       if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
-               int i;
-               if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
-                       time_after(dd->ipath_lastcancel, jiffies)) {
-                       __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
-                                         "SendbufErrs %lx %lx", sbuf[0],
-                                         sbuf[1]);
-                       if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
-                               printk(" %lx %lx ", sbuf[2], sbuf[3]);
-                       printk("\n");
-               }
-
-               for (i = 0; i < piobcnt; i++)
-                       if (test_bit(i, sbuf))
-                               ipath_disarm_piobufs(dd, i, 1);
-               /* ignore armlaunch errs for a bit */
-               dd->ipath_lastcancel = jiffies+3;
-       }
-}
-
-
-/* These are all rcv-related errors which we want to count for stats */
-#define E_SUM_PKTERRS \
-       (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
-        INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
-        INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
-        INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-        INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
-        INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
-
-/* These are all send-related errors which we want to count for stats */
-#define E_SUM_ERRS \
-       (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
-        INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
-        INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-        INFINIPATH_E_INVALIDADDR)
-
-/*
- * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
- * errors not related to freeze and cancelling buffers.  Can't ignore
- * armlaunch because could get more while still cleaning up, and need
- * to cancel those as they happen.
- */
-#define E_SPKT_ERRS_IGNORE \
-        (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
-        INFINIPATH_E_SPKTLEN)
-
-/*
- * these are errors that can occur when the link changes state while
- * a packet is being sent or received.  This doesn't cover things
- * like EBP or VCRC that can be the result of a sending having the
- * link change state, so we receive a "known bad" packet.
- */
-#define E_SUM_LINK_PKTERRS \
-       (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-        INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-        INFINIPATH_E_RUNEXPCHAR)
-
-static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       u64 ignore_this_time = 0;
-
-       ipath_disarm_senderrbufs(dd);
-       if ((errs & E_SUM_LINK_PKTERRS) &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               /*
-                * This can happen when SMA is trying to bring the link
-                * up, but the IB link changes state at the "wrong" time.
-                * The IB logic then complains that the packet isn't
-                * valid.  We don't want to confuse people, so we just
-                * don't print them, except at debug
-                */
-               ipath_dbg("Ignoring packet errors %llx, because link not "
-                         "ACTIVE\n", (unsigned long long) errs);
-               ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-       }
-
-       return ignore_this_time;
-}
-
-/* generic hw error messages... */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
-       { \
-               .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
-                         INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
-               .msg = "TXE " #a " Memory Parity"            \
-       }
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
-       { \
-               .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
-                         INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
-               .msg = "RXE " #a " Memory Parity"            \
-       }
-
-static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
-       INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
-       INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
-
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
-
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
-};
-
-/**
- * ipath_format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
- */
-static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
-       strlcat(msg, "[", msgl);
-       strlcat(msg, hwmsg, msgl);
-       strlcat(msg, "]", msgl);
-}
-
-/**
- * ipath_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
- */
-void ipath_format_hwerrors(u64 hwerrs,
-                          const struct ipath_hwerror_msgs *hwerrmsgs,
-                          size_t nhwerrmsgs,
-                          char *msg, size_t msgl)
-{
-       int i;
-       const int glen =
-           ARRAY_SIZE(ipath_generic_hwerror_msgs);
-
-       for (i=0; i<glen; i++) {
-               if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
-                       ipath_format_hwmsg(msg, msgl,
-                                          ipath_generic_hwerror_msgs[i].msg);
-               }
-       }
-
-       for (i=0; i<nhwerrmsgs; i++) {
-               if (hwerrs & hwerrmsgs[i].mask) {
-                       ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-               }
-       }
-}
-
-/* return the strings for the most common link states */
-static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       char *ret;
-       u32 state;
-
-       state = ipath_ib_state(dd, ibcs);
-       if (state == dd->ib_init)
-               ret = "Init";
-       else if (state == dd->ib_arm)
-               ret = "Arm";
-       else if (state == dd->ib_active)
-               ret = "Active";
-       else
-               ret = "Down";
-       return ret;
-}
-
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
-{
-       struct ib_event event;
-
-       event.device = &dd->verbs_dev->ibdev;
-       event.element.port_num = 1;
-       event.event = ev;
-       ib_dispatch_event(&event);
-}
-
-static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
-                                    ipath_err_t errs)
-{
-       u32 ltstate, lstate, ibstate, lastlstate;
-       u32 init = dd->ib_init;
-       u32 arm = dd->ib_arm;
-       u32 active = dd->ib_active;
-       const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-
-       lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
-       ibstate = ipath_ib_state(dd, ibcs);
-       /* linkstate at last interrupt */
-       lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-       ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
-
-       /*
-        * Since going into a recovery state causes the link state to go
-        * down and since recovery is transitory, it is better if we "miss"
-        * ever seeing the link training state go into recovery (i.e.,
-        * ignore this transition for link state special handling purposes)
-        * without even updating ipath_lastibcstat.
-        */
-       if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
-           (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
-           (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
-               goto done;
-
-       /*
-        * if linkstate transitions into INIT from any of the various down
-        * states, or if it transitions from any of the up (INIT or better)
-        * states into any of the down states (except link recovery), then
-        * call the chip-specific code to take appropriate actions.
-        */
-       if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
-               lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
-               /* transitioned to UP */
-               if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
-                       /* link came up, so we must no longer be disabled */
-                       dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-                       ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
-                       goto skip_ibchange; /* chip-code handled */
-               }
-       } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
-               (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
-               ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
-               ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
-               int handled;
-               handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
-               dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
-               if (handled) {
-                       ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
-                       goto skip_ibchange; /* chip-code handled */
-               }
-       }
-
-       /*
-        * Significant enough to always print and get into logs, if it was
-        * unexpected.  If it was a requested state change, we'll have
-        * already cleared the flags, so we won't print this warning
-        */
-       if ((ibstate != arm && ibstate != active) &&
-           (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
-               dev_info(&dd->pcidev->dev, "Link state changed from %s "
-                        "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
-                        "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
-       }
-
-       if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-           ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-               u32 lastlts;
-               lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-               /*
-                * Ignore cycling back and forth from Polling.Active to
-                * Polling.Quiet while waiting for the other end of the link
-                * to come up, except to try and decide if we are connected
-                * to a live IB device or not.  We will cycle back and
-                * forth between them if no cable is plugged in, the other
-                * device is powered off or disabled, etc.
-                */
-               if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-                   lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-                       if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
-                            (++dd->ipath_ibpollcnt == 40)) {
-                               dd->ipath_flags |= IPATH_NOCABLE;
-                               *dd->ipath_statusp |=
-                                       IPATH_STATUS_IB_NOCABLE;
-                               ipath_cdbg(LINKVERB, "Set NOCABLE\n");
-                       }
-                       ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
-                               ipath_ibcstatus_str[ltstate], ibstate);
-                       goto skip_ibchange;
-               }
-       }
-
-       dd->ipath_ibpollcnt = 0; /* not poll*, now */
-       ipath_stats.sps_iblink++;
-
-       if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
-               u64 linkrecov;
-               linkrecov = ipath_snap_cntr(dd,
-                       dd->ipath_cregs->cr_iblinkerrrecovcnt);
-               if (linkrecov != dd->ipath_lastlinkrecov) {
-                       ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
-                               (unsigned long long) ibcs,
-                               ib_linkstate(dd, ibcs),
-                               ipath_ibcstatus_str[ltstate],
-                               (unsigned long long) linkrecov);
-                       /* and no more until active again */
-                       dd->ipath_lastlinkrecov = 0;
-                       ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-                       goto skip_ibchange;
-               }
-       }
-
-       if (ibstate == init || ibstate == arm || ibstate == active) {
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
-               if (ibstate == init || ibstate == arm) {
-                       *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-                       if (dd->ipath_flags & IPATH_LINKACTIVE)
-                               signal_ib_event(dd, IB_EVENT_PORT_ERR);
-               }
-               if (ibstate == arm) {
-                       dd->ipath_flags |= IPATH_LINKARMED;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK |
-                               IPATH_LINKINIT | IPATH_LINKDOWN |
-                               IPATH_LINKACTIVE | IPATH_NOCABLE);
-                       ipath_hol_down(dd);
-               } else  if (ibstate == init) {
-                       /*
-                        * set INIT and DOWN.  Down is checked by
-                        * most of the other code, but INIT is
-                        * useful to know in a few places.
-                        */
-                       dd->ipath_flags |= IPATH_LINKINIT |
-                               IPATH_LINKDOWN;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK |
-                               IPATH_LINKARMED | IPATH_LINKACTIVE |
-                               IPATH_NOCABLE);
-                       ipath_hol_down(dd);
-               } else {  /* active */
-                       dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
-                               dd->ipath_cregs->cr_iblinkerrrecovcnt);
-                       *dd->ipath_statusp |=
-                               IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
-                       dd->ipath_flags |= IPATH_LINKACTIVE;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                               | IPATH_LINKDOWN | IPATH_LINKARMED |
-                               IPATH_NOCABLE);
-                       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-                               ipath_restart_sdma(dd);
-                       signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
-                       /* LED active not handled in chip _f_updown */
-                       dd->ipath_f_setextled(dd, lstate, ltstate);
-                       ipath_hol_up(dd);
-               }
-
-               /*
-                * print after we've already done the work, so as not to
-                * delay the state changes and notifications, for debugging
-                */
-               if (lstate == lastlstate)
-                       ipath_cdbg(LINKVERB, "Unchanged from last: %s "
-                               "(%x)\n", ib_linkstate(dd, ibcs), ibstate);
-               else
-                       ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
-                                 dd->ipath_unit, ib_linkstate(dd, ibcs),
-                                 ipath_ibcstatus_str[ltstate],  ibstate);
-       } else { /* down */
-               if (dd->ipath_flags & IPATH_LINKACTIVE)
-                       signal_ib_event(dd, IB_EVENT_PORT_ERR);
-               dd->ipath_flags |= IPATH_LINKDOWN;
-               dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                                    | IPATH_LINKACTIVE |
-                                    IPATH_LINKARMED);
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-               dd->ipath_lli_counter = 0;
-
-               if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
-                       ipath_cdbg(VERBOSE, "Unit %u link state down "
-                                  "(state 0x%x), from %s\n",
-                                  dd->ipath_unit, lstate,
-                                  ib_linkstate(dd, dd->ipath_lastibcstat));
-               else
-                       ipath_cdbg(LINKVERB, "Unit %u link state changed "
-                                  "to %s (0x%x) from down (%x)\n",
-                                  dd->ipath_unit,
-                                  ipath_ibcstatus_str[ltstate],
-                                  ibstate, lastlstate);
-       }
-
-skip_ibchange:
-       dd->ipath_lastibcstat = ibcs;
-done:
-       return;
-}
-
-static void handle_supp_msgs(struct ipath_devdata *dd,
-                            unsigned supp_msgs, char *msg, u32 msgsz)
-{
-       /*
-        * Print the message unless it's ibc status change only, which
-        * happens so often we never want to count it.
-        */
-       if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-               int iserr;
-               ipath_err_t mask;
-               iserr = ipath_decode_err(dd, msg, msgsz,
-                                        dd->ipath_lasterror &
-                                        ~INFINIPATH_E_IBSTATUSCHANGED);
-
-               mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                       INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
-
-               /* if we're in debug, then don't mask SDMADISABLED msgs */
-               if (ipath_debug & __IPATH_DBG)
-                       mask &= ~INFINIPATH_E_SDMADISABLED;
-
-               if (dd->ipath_lasterror & ~mask)
-                       ipath_dev_err(dd, "Suppressed %u messages for "
-                                     "fast-repeating errors (%s) (%llx)\n",
-                                     supp_msgs, msg,
-                                     (unsigned long long)
-                                     dd->ipath_lasterror);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal", for some
-                        * types of processes (mostly benchmarks) that send
-                        * huge numbers of messages, while not processing
-                        * them. So only complain about these at debug
-                        * level.
-                        */
-                       if (iserr)
-                               ipath_dbg("Suppressed %u messages for %s\n",
-                                         supp_msgs, msg);
-                       else
-                               ipath_cdbg(ERRPKT,
-                                       "Suppressed %u messages for %s\n",
-                                         supp_msgs, msg);
-               }
-       }
-}
-
-static unsigned handle_frequent_errors(struct ipath_devdata *dd,
-                                      ipath_err_t errs, char *msg,
-                                      u32 msgsz, int *noprint)
-{
-       unsigned long nc;
-       static unsigned long nextmsg_time;
-       static unsigned nmsgs, supp_msgs;
-
-       /*
-        * Throttle back "fast" messages to no more than 10 per 5 seconds.
-        * This isn't perfect, but it's a reasonable heuristic. If we get
-        * more than 10, give a 6x longer delay.
-        */
-       nc = jiffies;
-       if (nmsgs > 10) {
-               if (time_before(nc, nextmsg_time)) {
-                       *noprint = 1;
-                       if (!supp_msgs++)
-                               nextmsg_time = nc + HZ * 3;
-               } else if (supp_msgs) {
-                       handle_supp_msgs(dd, supp_msgs, msg, msgsz);
-                       supp_msgs = 0;
-                       nmsgs = 0;
-               }
-       } else if (!nmsgs++ || time_after(nc, nextmsg_time)) {
-               nextmsg_time = nc + HZ / 2;
-       }
-
-       return supp_msgs;
-}
-
-static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       unsigned long flags;
-       int expected;
-
-       if (ipath_debug & __IPATH_DBG) {
-               char msg[128];
-               ipath_decode_err(dd, msg, sizeof msg, errs &
-                       INFINIPATH_E_SDMAERRS);
-               ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
-       }
-       if (ipath_debug & __IPATH_VERBDBG) {
-               unsigned long tl, hd, status, lengen;
-               tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-               hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-               status = ipath_read_kreg64(dd
-                       , dd->ipath_kregs->kr_senddmastatus);
-               lengen = ipath_read_kreg64(dd,
-                       dd->ipath_kregs->kr_senddmalengen);
-               ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
-                       "lengen 0x%lx\n", tl, hd, status, lengen);
-       }
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-       expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!expected)
-               ipath_cancel_sends(dd, 1);
-}
-
-static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
-{
-       unsigned long flags;
-       int expected;
-
-       if ((istat & INFINIPATH_I_SDMAINT) &&
-           !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               ipath_sdma_intr(dd);
-
-       if (istat & INFINIPATH_I_SDMADISABLED) {
-               expected = test_bit(IPATH_SDMA_ABORTING,
-                       &dd->ipath_sdma_status);
-               ipath_dbg("%s SDmaDisabled intr\n",
-                       expected ? "expected" : "unexpected");
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-               if (!expected)
-                       ipath_cancel_sends(dd, 1);
-               if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-       }
-}
-
-static int handle_hdrq_full(struct ipath_devdata *dd)
-{
-       int chkerrpkts = 0;
-       u32 hd, tl;
-       u32 i;
-
-       ipath_stats.sps_hdrqfull++;
-       for (i = 0; i < dd->ipath_cfgports; i++) {
-               struct ipath_portdata *pd = dd->ipath_pd[i];
-
-               if (i == 0) {
-                       /*
-                        * For kernel receive queues, we just want to know
-                        * if there are packets in the queue that we can
-                        * process.
-                        */
-                       if (pd->port_head != ipath_get_hdrqtail(pd))
-                               chkerrpkts |= 1 << i;
-                       continue;
-               }
-
-               /* Skip if user context is not open */
-               if (!pd || !pd->port_cnt)
-                       continue;
-
-               /* Don't report the same point multiple times. */
-               if (dd->ipath_flags & IPATH_NODMA_RTAIL)
-                       tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
-               else
-                       tl = ipath_get_rcvhdrtail(pd);
-               if (tl == pd->port_lastrcvhdrqtail)
-                       continue;
-
-               hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
-               if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
-                       pd->port_lastrcvhdrqtail = tl;
-                       pd->port_hdrqfull++;
-                       /* flush hdrqfull so that poll() sees it */
-                       wmb();
-                       wake_up_interruptible(&pd->port_wait);
-               }
-       }
-
-       return chkerrpkts;
-}
-
-static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       char msg[128];
-       u64 ignore_this_time = 0;
-       u64 iserr = 0;
-       int chkerrpkts = 0, noprint = 0;
-       unsigned supp_msgs;
-       int log_idx;
-
-       /*
-        * don't report errors that are masked, either at init
-        * (not set in ipath_errormask), or temporarily (set in
-        * ipath_maskederrs)
-        */
-       errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
-
-       supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
-               &noprint);
-
-       /* do these first, they are most important */
-       if (errs & INFINIPATH_E_HARDWARE) {
-               /* reuse same msg buf */
-               dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
-       } else {
-               u64 mask;
-               for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
-                       mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
-                       if (errs & mask)
-                               ipath_inc_eeprom_err(dd, log_idx, 1);
-               }
-       }
-
-       if (errs & INFINIPATH_E_SDMAERRS)
-               handle_sdma_errors(dd, errs);
-
-       if (!noprint && (errs & ~dd->ipath_e_bitsextant))
-               ipath_dev_err(dd, "error interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (errs & ~dd->ipath_e_bitsextant));
-
-       if (errs & E_SUM_ERRS)
-               ignore_this_time = handle_e_sum_errs(dd, errs);
-       else if ((errs & E_SUM_LINK_PKTERRS) &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               /*
-                * This can happen when SMA is trying to bring the link
-                * up, but the IB link changes state at the "wrong" time.
-                * The IB logic then complains that the packet isn't
-                * valid.  We don't want to confuse people, so we just
-                * don't print them, except at debug
-                */
-               ipath_dbg("Ignoring packet errors %llx, because link not "
-                         "ACTIVE\n", (unsigned long long) errs);
-               ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-       }
-
-       if (supp_msgs == 250000) {
-               int s_iserr;
-               /*
-                * It's not entirely reasonable assuming that the errors set
-                * in the last clear period are all responsible for the
-                * problem, but the alternative is to assume it's the only
-                * ones on this particular interrupt, which also isn't great
-                */
-               dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
-
-               dd->ipath_errormask &= ~dd->ipath_maskederrs;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                dd->ipath_errormask);
-               s_iserr = ipath_decode_err(dd, msg, sizeof msg,
-                                          dd->ipath_maskederrs);
-
-               if (dd->ipath_maskederrs &
-                   ~(INFINIPATH_E_RRCVEGRFULL |
-                     INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
-                       ipath_dev_err(dd, "Temporarily disabling "
-                           "error(s) %llx reporting; too frequent (%s)\n",
-                               (unsigned long long) dd->ipath_maskederrs,
-                               msg);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal",
-                        * for some types of processes (mostly benchmarks)
-                        * that send huge numbers of messages, while not
-                        * processing them.  So only complain about
-                        * these at debug level.
-                        */
-                       if (s_iserr)
-                               ipath_dbg("Temporarily disabling reporting "
-                                   "too frequent queue full errors (%s)\n",
-                                   msg);
-                       else
-                               ipath_cdbg(ERRPKT,
-                                   "Temporarily disabling reporting too"
-                                   " frequent packet errors (%s)\n",
-                                   msg);
-               }
-
-               /*
-                * Re-enable the masked errors after around 3 minutes.  in
-                * ipath_get_faststats().  If we have a series of fast
-                * repeating but different errors, the interval will keep
-                * stretching out, but that's OK, as that's pretty
-                * catastrophic.
-                */
-               dd->ipath_unmasktime = jiffies + HZ * 180;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
-       if (ignore_this_time)
-               errs &= ~ignore_this_time;
-       if (errs & ~dd->ipath_lasterror) {
-               errs &= ~dd->ipath_lasterror;
-               /* never suppress duplicate hwerrors or ibstatuschange */
-               dd->ipath_lasterror |= errs &
-                       ~(INFINIPATH_E_HARDWARE |
-                         INFINIPATH_E_IBSTATUSCHANGED);
-       }
-
-       if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
-               dd->ipath_spectriggerhit++;
-               ipath_dbg("%lu special trigger hits\n",
-                       dd->ipath_spectriggerhit);
-       }
-
-       /* likely due to cancel; so suppress message unless verbose */
-       if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
-               time_after(dd->ipath_lastcancel, jiffies)) {
-               /* armlaunch takes precedence; it often causes both. */
-               ipath_cdbg(VERBOSE,
-                       "Suppressed %s error (%llx) after sendbuf cancel\n",
-                       (errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
-                       "armlaunch" : "sendpktlen", (unsigned long long)errs);
-               errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
-       }
-
-       if (!errs)
-               return 0;
-
-       if (!noprint) {
-               ipath_err_t mask;
-               /*
-                * The ones we mask off are handled specially below
-                * or above.  Also mask SDMADISABLED by default as it
-                * is too chatty.
-                */
-               mask = INFINIPATH_E_IBSTATUSCHANGED |
-                       INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                       INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
-
-               /* if we're in debug, then don't mask SDMADISABLED msgs */
-               if (ipath_debug & __IPATH_DBG)
-                       mask &= ~INFINIPATH_E_SDMADISABLED;
-
-               ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
-       } else
-               /* so we don't need if (!noprint) at strlcat's below */
-               *msg = 0;
-
-       if (errs & E_SUM_PKTERRS) {
-               ipath_stats.sps_pkterrs++;
-               chkerrpkts = 1;
-       }
-       if (errs & E_SUM_ERRS)
-               ipath_stats.sps_errs++;
-
-       if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
-               ipath_stats.sps_crcerrs++;
-               chkerrpkts = 1;
-       }
-       iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
-
-
-       /*
-        * We don't want to print these two as they happen, or we can make
-        * the situation even worse, because it takes so long to print
-        * messages to serial consoles.  Kernel ports get printed from
-        * fast_stats, no more than every 5 seconds, user ports get printed
-        * on close
-        */
-       if (errs & INFINIPATH_E_RRCVHDRFULL)
-               chkerrpkts |= handle_hdrq_full(dd);
-       if (errs & INFINIPATH_E_RRCVEGRFULL) {
-               struct ipath_portdata *pd = dd->ipath_pd[0];
-
-               /*
-                * since this is of less importance and not likely to
-                * happen without also getting hdrfull, only count
-                * occurrences; don't check each port (or even the kernel
-                * vs user)
-                */
-               ipath_stats.sps_etidfull++;
-               if (pd->port_head != ipath_get_hdrqtail(pd))
-                       chkerrpkts |= 1;
-       }
-
-       /*
-        * do this before IBSTATUSCHANGED, in case both bits set in a single
-        * interrupt; we want the STATUSCHANGE to "win", so we do our
-        * internal copy of state machine correctly
-        */
-       if (errs & INFINIPATH_E_RIBLOSTLINK) {
-               /*
-                * force through block below
-                */
-               errs |= INFINIPATH_E_IBSTATUSCHANGED;
-               ipath_stats.sps_iblink++;
-               dd->ipath_flags |= IPATH_LINKDOWN;
-               dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                                    | IPATH_LINKARMED | IPATH_LINKACTIVE);
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-
-               ipath_dbg("Lost link, link now down (%s)\n",
-                       ipath_ibcstatus_str[ipath_read_kreg64(dd,
-                       dd->ipath_kregs->kr_ibcstatus) & 0xf]);
-       }
-       if (errs & INFINIPATH_E_IBSTATUSCHANGED)
-               handle_e_ibstatuschanged(dd, errs);
-
-       if (errs & INFINIPATH_E_RESET) {
-               if (!noprint)
-                       ipath_dev_err(dd, "Got reset, requires re-init "
-                                     "(unload and reload driver)\n");
-               dd->ipath_flags &= ~IPATH_INITTED;      /* needs re-init */
-               /* mark as having had error */
-               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
-       }
-
-       if (!noprint && *msg) {
-               if (iserr)
-                       ipath_dev_err(dd, "%s error\n", msg);
-       }
-       if (dd->ipath_state_wanted & dd->ipath_flags) {
-               ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
-                          "waking\n", dd->ipath_state_wanted,
-                          dd->ipath_flags);
-               wake_up_interruptible(&ipath_state_wait);
-       }
-
-       return chkerrpkts;
-}
-
-/*
- * try to cleanup as much as possible for anything that might have gone
- * wrong while in freeze mode, such as pio buffers being written by user
- * processes (causing armlaunch), send errors due to going into freeze mode,
- * etc., and try to avoid causing extra interrupts while doing so.
- * Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it while in freeze mode (the register values
- * themselves are kept correct).
- * Make sure that we don't lose any important interrupts by using the chip
- * feature that says that writing 0 to a bit in *clear that is set in
- * *status will cause an interrupt to be generated again (if allowed by
- * the *mask value).
- */
-void ipath_clear_freeze(struct ipath_devdata *dd)
-{
-       /* disable error interrupts, to avoid confusion */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
-
-       /* also disable interrupts; errormask is sometimes overwriten */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-       ipath_cancel_sends(dd, 1);
-
-       /* clear the freeze, and be sure chip saw it */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-       /* force in-memory update now we are out of freeze */
-       ipath_force_pio_avail_update(dd);
-
-       /*
-        * force new interrupt if any hwerr, error or interrupt bits are
-        * still set, and clear "safe" send packet errors related to freeze
-        * and cancelling sends.  Re-enable error interrupts before possible
-        * force of re-interrupt on pending interrupts.
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-               E_SPKT_ERRS_IGNORE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-}
-
-
-/* this is separate to allow for better optimization of ipath_intr() */
-
-static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
-{
-       /*
-        * sometimes happen during driver init and unload, don't want
-        * to process any interrupts at that point
-        */
-
-       /* this is just a bandaid, not a fix, if something goes badly
-        * wrong */
-       if (++*unexpectp > 100) {
-               if (++*unexpectp > 105) {
-                       /*
-                        * ok, we must be taking somebody else's interrupts,
-                        * due to a messed up mptable and/or PIRQ table, so
-                        * unregister the interrupt.  We've seen this during
-                        * linuxbios development work, and it may happen in
-                        * the future again.
-                        */
-                       if (dd->pcidev && dd->ipath_irq) {
-                               ipath_dev_err(dd, "Now %u unexpected "
-                                             "interrupts, unregistering "
-                                             "interrupt handler\n",
-                                             *unexpectp);
-                               ipath_dbg("free_irq of irq %d\n",
-                                         dd->ipath_irq);
-                               dd->ipath_f_free_irq(dd);
-                       }
-               }
-               if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
-                       ipath_dev_err(dd, "%u unexpected interrupts, "
-                                     "disabling interrupts completely\n",
-                                     *unexpectp);
-                       /*
-                        * disable all interrupts, something is very wrong
-                        */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-                                        0ULL);
-               }
-       } else if (*unexpectp > 1)
-               ipath_dbg("Interrupt when not ready, should not happen, "
-                         "ignoring\n");
-}
-
-static noinline void ipath_bad_regread(struct ipath_devdata *dd)
-{
-       static int allbits;
-
-       /* separate routine, for better optimization of ipath_intr() */
-
-       /*
-        * We print the message and disable interrupts, in hope of
-        * having a better chance of debugging the problem.
-        */
-       ipath_dev_err(dd,
-                     "Read of interrupt status failed (all bits set)\n");
-       if (allbits++) {
-               /* disable all interrupts, something is very wrong */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-               if (allbits == 2) {
-                       ipath_dev_err(dd, "Still bad interrupt status, "
-                                     "unregistering interrupt\n");
-                       dd->ipath_f_free_irq(dd);
-               } else if (allbits > 2) {
-                       if ((allbits % 10000) == 0)
-                               printk(".");
-               } else
-                       ipath_dev_err(dd, "Disabling interrupts, "
-                                     "multiple errors\n");
-       }
-}
-
-static void handle_layer_pioavail(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int ret;
-
-       ret = ipath_ib_piobufavail(dd->verbs_dev);
-       if (ret > 0)
-               goto set;
-
-       return;
-set:
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-/*
- * Handle receive interrupts for user ports; this means a user
- * process was waiting for a packet to arrive, and didn't want
- * to poll
- */
-static void handle_urcv(struct ipath_devdata *dd, u64 istat)
-{
-       u64 portr;
-       int i;
-       int rcvdint = 0;
-
-       /*
-        * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
-        * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
-        * would both like timely updates of the bits so that
-        * we don't pass them by unnecessarily.  the rmb()
-        * here ensures that we see them promptly -- the
-        * corresponding wmb()'s are in ipath_poll_urgent()
-        * and ipath_poll_next()...
-        */
-       rmb();
-       portr = ((istat >> dd->ipath_i_rcvavail_shift) &
-                dd->ipath_i_rcvavail_mask) |
-               ((istat >> dd->ipath_i_rcvurg_shift) &
-                dd->ipath_i_rcvurg_mask);
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               struct ipath_portdata *pd = dd->ipath_pd[i];
-
-               if (portr & (1 << i) && pd && pd->port_cnt) {
-                       if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
-                                              &pd->port_flag)) {
-                               clear_bit(i + dd->ipath_r_intravail_shift,
-                                         &dd->ipath_rcvctrl);
-                               wake_up_interruptible(&pd->port_wait);
-                               rcvdint = 1;
-                       } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
-                                                     &pd->port_flag)) {
-                               pd->port_urgent++;
-                               wake_up_interruptible(&pd->port_wait);
-                       }
-               }
-       }
-       if (rcvdint) {
-               /* only want to take one interrupt, so turn off the rcv
-                * interrupt for all the ports that we set the rcv_waiting
-                * (but never for kernel port)
-                */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                                dd->ipath_rcvctrl);
-       }
-}
-
-irqreturn_t ipath_intr(int irq, void *data)
-{
-       struct ipath_devdata *dd = data;
-       u64 istat, chk0rcv = 0;
-       ipath_err_t estat = 0;
-       irqreturn_t ret;
-       static unsigned unexpected = 0;
-       u64 kportrbits;
-
-       ipath_stats.sps_ints++;
-
-       if (dd->ipath_int_counter != (u32) -1)
-               dd->ipath_int_counter++;
-
-       if (!(dd->ipath_flags & IPATH_PRESENT)) {
-               /*
-                * This return value is not great, but we do not want the
-                * interrupt core code to remove our interrupt handler
-                * because we don't appear to be handling an interrupt
-                * during a chip reset.
-                */
-               return IRQ_HANDLED;
-       }
-
-       /*
-        * this needs to be flags&initted, not statusp, so we keep
-        * taking interrupts even after link goes down, etc.
-        * Also, we *must* clear the interrupt at some point, or we won't
-        * take it again, which can be real bad for errors, etc...
-        */
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               ipath_bad_intr(dd, &unexpected);
-               ret = IRQ_NONE;
-               goto bail;
-       }
-
-       istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
-
-       if (unlikely(!istat)) {
-               ipath_stats.sps_nullintr++;
-               ret = IRQ_NONE; /* not our interrupt, or already handled */
-               goto bail;
-       }
-       if (unlikely(istat == -1)) {
-               ipath_bad_regread(dd);
-               /* don't know if it was our interrupt or not */
-               ret = IRQ_NONE;
-               goto bail;
-       }
-
-       if (unexpected)
-               unexpected = 0;
-
-       if (unlikely(istat & ~dd->ipath_i_bitsextant))
-               ipath_dev_err(dd,
-                             "interrupt with unknown interrupts %Lx set\n",
-                             (unsigned long long)
-                             istat & ~dd->ipath_i_bitsextant);
-       else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
-               ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
-                       (unsigned long long) istat);
-
-       if (istat & INFINIPATH_I_ERROR) {
-               ipath_stats.sps_errints++;
-               estat = ipath_read_kreg64(dd,
-                                         dd->ipath_kregs->kr_errorstatus);
-               if (!estat)
-                       dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
-                                "but no error bits set!\n",
-                                (unsigned long long) istat);
-               else if (estat == -1LL)
-                       /*
-                        * should we try clearing all, or hope next read
-                        * works?
-                        */
-                       ipath_dev_err(dd, "Read of error status failed "
-                                     "(all bits set); ignoring\n");
-               else
-                       chk0rcv |= handle_errors(dd, estat);
-       }
-
-       if (istat & INFINIPATH_I_GPIO) {
-               /*
-                * GPIO interrupts fall in two broad classes:
-                * GPIO_2 indicates (on some HT4xx boards) that a packet
-                *        has arrived for Port 0. Checking for this
-                *        is controlled by flag IPATH_GPIO_INTR.
-                * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
-                *        errors that we need to count. Checking for this
-                *        is controlled by flag IPATH_GPIO_ERRINTRS.
-                */
-               u32 gpiostatus;
-               u32 to_clear = 0;
-
-               gpiostatus = ipath_read_kreg32(
-                       dd, dd->ipath_kregs->kr_gpio_status);
-               /* First the error-counter case. */
-               if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
-                   (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
-                       /* want to clear the bits we see asserted. */
-                       to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
-
-                       /*
-                        * Count appropriately, clear bits out of our copy,
-                        * as they have been "handled".
-                        */
-                       if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
-                               ipath_dbg("FlowCtl on UnsupVL\n");
-                               dd->ipath_rxfc_unsupvl_errs++;
-                       }
-                       if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
-                               ipath_dbg("Overrun Threshold exceeded\n");
-                               dd->ipath_overrun_thresh_errs++;
-                       }
-                       if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
-                               ipath_dbg("Local Link Integrity error\n");
-                               dd->ipath_lli_errs++;
-                       }
-                       gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
-               }
-               /* Now the Port0 Receive case */
-               if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
-                   (dd->ipath_flags & IPATH_GPIO_INTR)) {
-                       /*
-                        * GPIO status bit 2 is set, and we expected it.
-                        * clear it and indicate in p0bits.
-                        * This probably only happens if a Port0 pkt
-                        * arrives at _just_ the wrong time, and we
-                        * handle that by seting chk0rcv;
-                        */
-                       to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
-                       gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
-                       chk0rcv = 1;
-               }
-               if (gpiostatus) {
-                       /*
-                        * Some unexpected bits remain. If they could have
-                        * caused the interrupt, complain and clear.
-                        * To avoid repetition of this condition, also clear
-                        * the mask. It is almost certainly due to error.
-                        */
-                       const u32 mask = (u32) dd->ipath_gpio_mask;
-
-                       if (mask & gpiostatus) {
-                               ipath_dbg("Unexpected GPIO IRQ bits %x\n",
-                                 gpiostatus & mask);
-                               to_clear |= (gpiostatus & mask);
-                               dd->ipath_gpio_mask &= ~(gpiostatus & mask);
-                               ipath_write_kreg(dd,
-                                       dd->ipath_kregs->kr_gpio_mask,
-                                       dd->ipath_gpio_mask);
-                       }
-               }
-               if (to_clear) {
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-                                       (u64) to_clear);
-               }
-       }
-
-       /*
-        * Clear the interrupt bits we found set, unless they are receive
-        * related, in which case we already cleared them above, and don't
-        * want to clear them again, because we might lose an interrupt.
-        * Clear it early, so we "know" know the chip will have seen this by
-        * the time we process the queue, and will re-interrupt if necessary.
-        * The processor itself won't take the interrupt again until we return.
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
-
-       /*
-        * Handle kernel receive queues before checking for pio buffers
-        * available since receives can overflow; piobuf waiters can afford
-        * a few extra cycles, since they were waiting anyway, and user's
-        * waiting for receive are at the bottom.
-        */
-       kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
-               (1ULL << dd->ipath_i_rcvurg_shift);
-       if (chk0rcv || (istat & kportrbits)) {
-               istat &= ~kportrbits;
-               ipath_kreceive(dd->ipath_pd[0]);
-       }
-
-       if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
-                    (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
-               handle_urcv(dd, istat);
-
-       if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
-               handle_sdma_intr(dd, istat);
-
-       if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-               /* always process; sdma verbs uses PIO for acks and VL15  */
-               handle_layer_pioavail(dd);
-       }
-
-       ret = IRQ_HANDLED;
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_kernel.h b/drivers/staging/rdma/ipath/ipath_kernel.h
deleted file mode 100644 (file)
index 66c934a..0000000
+++ /dev/null
@@ -1,1374 +0,0 @@
-#ifndef _IPATH_KERNEL_H
-#define _IPATH_KERNEL_H
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This header file is the base header file for infinipath kernel code
- * ipath_user.h serves a similar purpose for user code.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <asm/io.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_common.h"
-#include "ipath_debug.h"
-#include "ipath_registers.h"
-
-/* only s/w major version of InfiniPath we can handle */
-#define IPATH_CHIP_VERS_MAJ 2U
-
-/* don't care about this except printing */
-#define IPATH_CHIP_VERS_MIN 0U
-
-/* temporary, maybe always */
-extern struct infinipath_stats ipath_stats;
-
-#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
-/*
- * First-cut critierion for "device is active" is
- * two thousand dwords combined Tx, Rx traffic per
- * 5-second interval. SMA packets are 64 dwords,
- * and occur "a few per second", presumably each way.
- */
-#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
-/*
- * Struct used to indicate which errors are logged in each of the
- * error-counters that are logged to EEPROM. A counter is incremented
- * _once_ (saturating at 255) for each event with any bits set in
- * the error or hwerror register masks below.
- */
-#define IPATH_EEP_LOG_CNT (4)
-struct ipath_eep_log_mask {
-       u64 errs_to_log;
-       u64 hwerrs_to_log;
-};
-
-struct ipath_portdata {
-       void **port_rcvegrbuf;
-       dma_addr_t *port_rcvegrbuf_phys;
-       /* rcvhdrq base, needs mmap before useful */
-       void *port_rcvhdrq;
-       /* kernel virtual address where hdrqtail is updated */
-       void *port_rcvhdrtail_kvaddr;
-       /*
-        * temp buffer for expected send setup, allocated at open, instead
-        * of each setup call
-        */
-       void *port_tid_pg_list;
-       /* when waiting for rcv or pioavail */
-       wait_queue_head_t port_wait;
-       /*
-        * rcvegr bufs base, physical, must fit
-        * in 44 bits so 32 bit programs mmap64 44 bit works)
-        */
-       dma_addr_t port_rcvegr_phys;
-       /* mmap of hdrq, must fit in 44 bits */
-       dma_addr_t port_rcvhdrq_phys;
-       dma_addr_t port_rcvhdrqtailaddr_phys;
-       /*
-        * number of opens (including slave subports) on this instance
-        * (ignoring forks, dup, etc. for now)
-        */
-       int port_cnt;
-       /*
-        * how much space to leave at start of eager TID entries for
-        * protocol use, on each TID
-        */
-       /* instead of calculating it */
-       unsigned port_port;
-       /* non-zero if port is being shared. */
-       u16 port_subport_cnt;
-       /* non-zero if port is being shared. */
-       u16 port_subport_id;
-       /* number of pio bufs for this port (all procs, if shared) */
-       u32 port_piocnt;
-       /* first pio buffer for this port */
-       u32 port_pio_base;
-       /* chip offset of PIO buffers for this port */
-       u32 port_piobufs;
-       /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
-       u32 port_rcvegrbuf_chunks;
-       /* how many egrbufs per chunk */
-       u32 port_rcvegrbufs_perchunk;
-       /* order for port_rcvegrbuf_pages */
-       size_t port_rcvegrbuf_size;
-       /* rcvhdrq size (for freeing) */
-       size_t port_rcvhdrq_size;
-       /* next expected TID to check when looking for free */
-       u32 port_tidcursor;
-       /* next expected TID to check */
-       unsigned long port_flag;
-       /* what happened */
-       unsigned long int_flag;
-       /* WAIT_RCV that timed out, no interrupt */
-       u32 port_rcvwait_to;
-       /* WAIT_PIO that timed out, no interrupt */
-       u32 port_piowait_to;
-       /* WAIT_RCV already happened, no wait */
-       u32 port_rcvnowait;
-       /* WAIT_PIO already happened, no wait */
-       u32 port_pionowait;
-       /* total number of rcvhdrqfull errors */
-       u32 port_hdrqfull;
-       /*
-        * Used to suppress multiple instances of same
-        * port staying stuck at same point.
-        */
-       u32 port_lastrcvhdrqtail;
-       /* saved total number of rcvhdrqfull errors for poll edge trigger */
-       u32 port_hdrqfull_poll;
-       /* total number of polled urgent packets */
-       u32 port_urgent;
-       /* saved total number of polled urgent packets for poll edge trigger */
-       u32 port_urgent_poll;
-       /* pid of process using this port */
-       struct pid *port_pid;
-       struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
-       /* same size as task_struct .comm[] */
-       char port_comm[TASK_COMM_LEN];
-       /* pkeys set by this use of this port */
-       u16 port_pkeys[4];
-       /* so file ops can get at unit */
-       struct ipath_devdata *port_dd;
-       /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
-       void *subport_uregbase;
-       /* An array of pages for the eager receive buffers * N */
-       void *subport_rcvegrbuf;
-       /* An array of pages for the eager header queue entries * N */
-       void *subport_rcvhdr_base;
-       /* The version of the library which opened this port */
-       u32 userversion;
-       /* Bitmask of active slaves */
-       u32 active_slaves;
-       /* Type of packets or conditions we want to poll for */
-       u16 poll_type;
-       /* port rcvhdrq head offset */
-       u32 port_head;
-       /* receive packet sequence counter */
-       u32 port_seq_cnt;
-};
-
-struct sk_buff;
-struct ipath_sge_state;
-struct ipath_verbs_txreq;
-
-/*
- * control information for layered drivers
- */
-struct _ipath_layer {
-       void *l_arg;
-};
-
-struct ipath_skbinfo {
-       struct sk_buff *skb;
-       dma_addr_t phys;
-};
-
-struct ipath_sdma_txreq {
-       int                 flags;
-       int                 sg_count;
-       union {
-               struct scatterlist *sg;
-               void *map_addr;
-       };
-       void              (*callback)(void *, int);
-       void               *callback_cookie;
-       int                 callback_status;
-       u16                 start_idx;  /* sdma private */
-       u16                 next_descq_idx;  /* sdma private */
-       struct list_head    list;       /* sdma private */
-};
-
-struct ipath_sdma_desc {
-       __le64 qw[2];
-};
-
-#define IPATH_SDMA_TXREQ_F_USELARGEBUF  0x1
-#define IPATH_SDMA_TXREQ_F_HEADTOHOST   0x2
-#define IPATH_SDMA_TXREQ_F_INTREQ       0x4
-#define IPATH_SDMA_TXREQ_F_FREEBUF      0x8
-#define IPATH_SDMA_TXREQ_F_FREEDESC     0x10
-#define IPATH_SDMA_TXREQ_F_VL15         0x20
-
-#define IPATH_SDMA_TXREQ_S_OK        0
-#define IPATH_SDMA_TXREQ_S_SENDERROR 1
-#define IPATH_SDMA_TXREQ_S_ABORTED   2
-#define IPATH_SDMA_TXREQ_S_SHUTDOWN  3
-
-#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG    (1ull << 63)
-#define IPATH_SDMA_STATUS_ABORT_IN_PROG                        (1ull << 62)
-#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE         (1ull << 61)
-#define IPATH_SDMA_STATUS_SCB_EMPTY                    (1ull << 30)
-
-/* max dwords in small buffer packet */
-#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
-
-/*
- * Possible IB config parameters for ipath_f_get/set_ib_cfg()
- */
-#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
-#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
-#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
-#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
-#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
-#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
-#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
-#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
-#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
-#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
-#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
-
-
-struct ipath_devdata {
-       struct list_head ipath_list;
-
-       struct ipath_kregs const *ipath_kregs;
-       struct ipath_cregs const *ipath_cregs;
-
-       /* mem-mapped pointer to base of chip regs */
-       u64 __iomem *ipath_kregbase;
-       /* end of mem-mapped chip space; range checking */
-       u64 __iomem *ipath_kregend;
-       /* physical address of chip for io_remap, etc. */
-       unsigned long ipath_physaddr;
-       /* base of memory alloced for ipath_kregbase, for free */
-       u64 *ipath_kregalloc;
-       /* ipath_cfgports pointers */
-       struct ipath_portdata **ipath_pd;
-       /* sk_buffs used by port 0 eager receive queue */
-       struct ipath_skbinfo *ipath_port0_skbinfo;
-       /* kvirt address of 1st 2k pio buffer */
-       void __iomem *ipath_pio2kbase;
-       /* kvirt address of 1st 4k pio buffer */
-       void __iomem *ipath_pio4kbase;
-       /*
-        * points to area where PIOavail registers will be DMA'ed.
-        * Has to be on a page of it's own, because the page will be
-        * mapped into user program space.  This copy is *ONLY* ever
-        * written by DMA, not by the driver!  Need a copy per device
-        * when we get to multiple devices
-        */
-       volatile __le64 *ipath_pioavailregs_dma;
-       /* physical address where updates occur */
-       dma_addr_t ipath_pioavailregs_phys;
-       struct _ipath_layer ipath_layer;
-       /* setup intr */
-       int (*ipath_f_intrsetup)(struct ipath_devdata *);
-       /* fallback to alternate interrupt type if possible */
-       int (*ipath_f_intr_fallback)(struct ipath_devdata *);
-       /* setup on-chip bus config */
-       int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
-       /* hard reset chip */
-       int (*ipath_f_reset)(struct ipath_devdata *);
-       int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
-                                    size_t);
-       void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
-       void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
-                                       size_t);
-       void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
-       int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
-       int (*ipath_f_early_init)(struct ipath_devdata *);
-       void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
-       void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
-                               u32, unsigned long);
-       void (*ipath_f_tidtemplate)(struct ipath_devdata *);
-       void (*ipath_f_cleanup)(struct ipath_devdata *);
-       void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
-       /* fill out chip-specific fields */
-       int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-       /* free irq */
-       void (*ipath_f_free_irq)(struct ipath_devdata *);
-       struct ipath_message_header *(*ipath_f_get_msgheader)
-                                       (struct ipath_devdata *, __le32 *);
-       void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
-       int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
-       int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
-       void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
-       void (*ipath_f_read_counters)(struct ipath_devdata *,
-                                       struct infinipath_counters *);
-       void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
-       /* per chip actions needed for IB Link up/down changes */
-       int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
-
-       unsigned ipath_lastegr_idx;
-       struct ipath_ibdev *verbs_dev;
-       struct timer_list verbs_timer;
-       /* total dwords sent (summed from counter) */
-       u64 ipath_sword;
-       /* total dwords rcvd (summed from counter) */
-       u64 ipath_rword;
-       /* total packets sent (summed from counter) */
-       u64 ipath_spkts;
-       /* total packets rcvd (summed from counter) */
-       u64 ipath_rpkts;
-       /* ipath_statusp initially points to this. */
-       u64 _ipath_status;
-       /* GUID for this interface, in network order */
-       __be64 ipath_guid;
-       /*
-        * aggregrate of error bits reported since last cleared, for
-        * limiting of error reporting
-        */
-       ipath_err_t ipath_lasterror;
-       /*
-        * aggregrate of error bits reported since last cleared, for
-        * limiting of hwerror reporting
-        */
-       ipath_err_t ipath_lasthwerror;
-       /* errors masked because they occur too fast */
-       ipath_err_t ipath_maskederrs;
-       u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
-       /* these 5 fields are used to establish deltas for IB Symbol
-        * errors and linkrecovery errors. They can be reported on
-        * some chips during link negotiation prior to INIT, and with
-        * DDR when faking DDR negotiations with non-IBTA switches.
-        * The chip counters are adjusted at driver unload if there is
-        * a non-zero delta.
-        */
-       u64 ibdeltainprog;
-       u64 ibsymdelta;
-       u64 ibsymsnap;
-       u64 iblnkerrdelta;
-       u64 iblnkerrsnap;
-
-       /* time in jiffies at which to re-enable maskederrs */
-       unsigned long ipath_unmasktime;
-       /* count of egrfull errors, combined for all ports */
-       u64 ipath_last_tidfull;
-       /* for ipath_qcheck() */
-       u64 ipath_lastport0rcv_cnt;
-       /* template for writing TIDs  */
-       u64 ipath_tidtemplate;
-       /* value to write to free TIDs */
-       u64 ipath_tidinvalid;
-       /* IBA6120 rcv interrupt setup */
-       u64 ipath_rhdrhead_intr_off;
-
-       /* size of memory at ipath_kregbase */
-       u32 ipath_kregsize;
-       /* number of registers used for pioavail */
-       u32 ipath_pioavregs;
-       /* IPATH_POLL, etc. */
-       u32 ipath_flags;
-       /* ipath_flags driver is waiting for */
-       u32 ipath_state_wanted;
-       /* last buffer for user use, first buf for kernel use is this
-        * index. */
-       u32 ipath_lastport_piobuf;
-       /* is a stats timer active */
-       u32 ipath_stats_timer_active;
-       /* number of interrupts for this device -- saturates... */
-       u32 ipath_int_counter;
-       /* dwords sent read from counter */
-       u32 ipath_lastsword;
-       /* dwords received read from counter */
-       u32 ipath_lastrword;
-       /* sent packets read from counter */
-       u32 ipath_lastspkts;
-       /* received packets read from counter */
-       u32 ipath_lastrpkts;
-       /* pio bufs allocated per port */
-       u32 ipath_pbufsport;
-       /* if remainder on bufs/port, ports < extrabuf get 1 extra */
-       u32 ipath_ports_extrabuf;
-       u32 ipath_pioupd_thresh; /* update threshold, some chips */
-       /*
-        * number of ports configured as max; zero is set to number chip
-        * supports, less gives more pio bufs/port, etc.
-        */
-       u32 ipath_cfgports;
-       /* count of port 0 hdrqfull errors */
-       u32 ipath_p0_hdrqfull;
-       /* port 0 number of receive eager buffers */
-       u32 ipath_p0_rcvegrcnt;
-
-       /*
-        * index of last piobuffer we used.  Speeds up searching, by
-        * starting at this point.  Doesn't matter if multiple cpu's use and
-        * update, last updater is only write that matters.  Whenever it
-        * wraps, we update shadow copies.  Need a copy per device when we
-        * get to multiple devices
-        */
-       u32 ipath_lastpioindex;
-       u32 ipath_lastpioindexl;
-       /* max length of freezemsg */
-       u32 ipath_freezelen;
-       /*
-        * consecutive times we wanted a PIO buffer but were unable to
-        * get one
-        */
-       u32 ipath_consec_nopiobuf;
-       /*
-        * hint that we should update ipath_pioavailshadow before
-        * looking for a PIO buffer
-        */
-       u32 ipath_upd_pio_shadow;
-       /* so we can rewrite it after a chip reset */
-       u32 ipath_pcibar0;
-       /* so we can rewrite it after a chip reset */
-       u32 ipath_pcibar1;
-       u32 ipath_x1_fix_tries;
-       u32 ipath_autoneg_tries;
-       u32 serdes_first_init_done;
-
-       struct ipath_relock {
-               atomic_t ipath_relock_timer_active;
-               struct timer_list ipath_relock_timer;
-               unsigned int ipath_relock_interval; /* in jiffies */
-       } ipath_relock_singleton;
-
-       /* interrupt number */
-       int ipath_irq;
-       /* HT/PCI Vendor ID (here for NodeInfo) */
-       u16 ipath_vendorid;
-       /* HT/PCI Device ID (here for NodeInfo) */
-       u16 ipath_deviceid;
-       /* offset in HT config space of slave/primary interface block */
-       u8 ipath_ht_slave_off;
-       /* for write combining settings */
-       int wc_cookie;
-       /* ref count for each pkey */
-       atomic_t ipath_pkeyrefs[4];
-       /* shadow copy of struct page *'s for exp tid pages */
-       struct page **ipath_pageshadow;
-       /* shadow copy of dma handles for exp tid pages */
-       dma_addr_t *ipath_physshadow;
-       u64 __iomem *ipath_egrtidbase;
-       /* lock to workaround chip bug 9437 and others */
-       spinlock_t ipath_kernel_tid_lock;
-       spinlock_t ipath_user_tid_lock;
-       spinlock_t ipath_sendctrl_lock;
-       /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
-       spinlock_t ipath_uctxt_lock;
-
-       /*
-        * IPATH_STATUS_*,
-        * this address is mapped readonly into user processes so they can
-        * get status cheaply, whenever they want.
-        */
-       u64 *ipath_statusp;
-       /* freeze msg if hw error put chip in freeze */
-       char *ipath_freezemsg;
-       /* pci access data structure */
-       struct pci_dev *pcidev;
-       struct cdev *user_cdev;
-       struct cdev *diag_cdev;
-       struct device *user_dev;
-       struct device *diag_dev;
-       /* timer used to prevent stats overflow, error throttling, etc. */
-       struct timer_list ipath_stats_timer;
-       /* timer to verify interrupts work, and fallback if possible */
-       struct timer_list ipath_intrchk_timer;
-       void *ipath_dummy_hdrq; /* used after port close */
-       dma_addr_t ipath_dummy_hdrq_phys;
-
-       /* SendDMA related entries */
-       spinlock_t            ipath_sdma_lock;
-       unsigned long         ipath_sdma_status;
-       unsigned long         ipath_sdma_abort_jiffies;
-       unsigned long         ipath_sdma_abort_intr_timeout;
-       unsigned long         ipath_sdma_buf_jiffies;
-       struct ipath_sdma_desc *ipath_sdma_descq;
-       u64                   ipath_sdma_descq_added;
-       u64                   ipath_sdma_descq_removed;
-       int                   ipath_sdma_desc_nreserved;
-       u16                   ipath_sdma_descq_cnt;
-       u16                   ipath_sdma_descq_tail;
-       u16                   ipath_sdma_descq_head;
-       u16                   ipath_sdma_next_intr;
-       u16                   ipath_sdma_reset_wait;
-       u8                    ipath_sdma_generation;
-       struct tasklet_struct ipath_sdma_abort_task;
-       struct tasklet_struct ipath_sdma_notify_task;
-       struct list_head      ipath_sdma_activelist;
-       struct list_head      ipath_sdma_notifylist;
-       atomic_t              ipath_sdma_vl15_count;
-       struct timer_list     ipath_sdma_vl15_timer;
-
-       dma_addr_t       ipath_sdma_descq_phys;
-       volatile __le64 *ipath_sdma_head_dma;
-       dma_addr_t       ipath_sdma_head_phys;
-
-       unsigned long ipath_ureg_align; /* user register alignment */
-
-       struct delayed_work ipath_autoneg_work;
-       wait_queue_head_t ipath_autoneg_wait;
-
-       /* HoL blocking / user app forward-progress state */
-       unsigned          ipath_hol_state;
-       unsigned          ipath_hol_next;
-       struct timer_list ipath_hol_timer;
-
-       /*
-        * Shadow copies of registers; size indicates read access size.
-        * Most of them are readonly, but some are write-only register,
-        * where we manipulate the bits in the shadow copy, and then write
-        * the shadow copy to infinipath.
-        *
-        * We deliberately make most of these 32 bits, since they have
-        * restricted range.  For any that we read, we won't to generate 32
-        * bit accesses, since Opteron will generate 2 separate 32 bit HT
-        * transactions for a 64 bit read, and we want to avoid unnecessary
-        * HT transactions.
-        */
-
-       /* This is the 64 bit group */
-
-       /*
-        * shadow of pioavail, check to be sure it's large enough at
-        * init time.
-        */
-       unsigned long ipath_pioavailshadow[8];
-       /* bitmap of send buffers available for the kernel to use with PIO. */
-       unsigned long ipath_pioavailkernel[8];
-       /* shadow of kr_gpio_out, for rmw ops */
-       u64 ipath_gpio_out;
-       /* shadow the gpio mask register */
-       u64 ipath_gpio_mask;
-       /* shadow the gpio output enable, etc... */
-       u64 ipath_extctrl;
-       /* kr_revision shadow */
-       u64 ipath_revision;
-       /*
-        * shadow of ibcctrl, for interrupt handling of link changes,
-        * etc.
-        */
-       u64 ipath_ibcctrl;
-       /*
-        * last ibcstatus, to suppress "duplicate" status change messages,
-        * mostly from 2 to 3
-        */
-       u64 ipath_lastibcstat;
-       /* hwerrmask shadow */
-       ipath_err_t ipath_hwerrmask;
-       ipath_err_t ipath_errormask; /* errormask shadow */
-       /* interrupt config reg shadow */
-       u64 ipath_intconfig;
-       /* kr_sendpiobufbase value */
-       u64 ipath_piobufbase;
-       /* kr_ibcddrctrl shadow */
-       u64 ipath_ibcddrctrl;
-
-       /* these are the "32 bit" regs */
-
-       /*
-        * number of GUIDs in the flash for this interface; may need some
-        * rethinking for setting on other ifaces
-        */
-       u32 ipath_nguid;
-       /*
-        * the following two are 32-bit bitmasks, but {test,clear,set}_bit
-        * all expect bit fields to be "unsigned long"
-        */
-       /* shadow kr_rcvctrl */
-       unsigned long ipath_rcvctrl;
-       /* shadow kr_sendctrl */
-       unsigned long ipath_sendctrl;
-       /* to not count armlaunch after cancel */
-       unsigned long ipath_lastcancel;
-       /* count cases where special trigger was needed (double write) */
-       unsigned long ipath_spectriggerhit;
-
-       /* value we put in kr_rcvhdrcnt */
-       u32 ipath_rcvhdrcnt;
-       /* value we put in kr_rcvhdrsize */
-       u32 ipath_rcvhdrsize;
-       /* value we put in kr_rcvhdrentsize */
-       u32 ipath_rcvhdrentsize;
-       /* offset of last entry in rcvhdrq */
-       u32 ipath_hdrqlast;
-       /* kr_portcnt value */
-       u32 ipath_portcnt;
-       /* kr_pagealign value */
-       u32 ipath_palign;
-       /* number of "2KB" PIO buffers */
-       u32 ipath_piobcnt2k;
-       /* size in bytes of "2KB" PIO buffers */
-       u32 ipath_piosize2k;
-       /* number of "4KB" PIO buffers */
-       u32 ipath_piobcnt4k;
-       /* size in bytes of "4KB" PIO buffers */
-       u32 ipath_piosize4k;
-       u32 ipath_pioreserved; /* reserved special-inkernel; */
-       /* kr_rcvegrbase value */
-       u32 ipath_rcvegrbase;
-       /* kr_rcvegrcnt value */
-       u32 ipath_rcvegrcnt;
-       /* kr_rcvtidbase value */
-       u32 ipath_rcvtidbase;
-       /* kr_rcvtidcnt value */
-       u32 ipath_rcvtidcnt;
-       /* kr_sendregbase */
-       u32 ipath_sregbase;
-       /* kr_userregbase */
-       u32 ipath_uregbase;
-       /* kr_counterregbase */
-       u32 ipath_cregbase;
-       /* shadow the control register contents */
-       u32 ipath_control;
-       /* PCI revision register (HTC rev on FPGA) */
-       u32 ipath_pcirev;
-
-       /* chip address space used by 4k pio buffers */
-       u32 ipath_4kalign;
-       /* The MTU programmed for this unit */
-       u32 ipath_ibmtu;
-       /*
-        * The max size IB packet, included IB headers that we can send.
-        * Starts same as ipath_piosize, but is affected when ibmtu is
-        * changed, or by size of eager buffers
-        */
-       u32 ipath_ibmaxlen;
-       /*
-        * ibmaxlen at init time, limited by chip and by receive buffer
-        * size.  Not changed after init.
-        */
-       u32 ipath_init_ibmaxlen;
-       /* size of each rcvegrbuffer */
-       u32 ipath_rcvegrbufsize;
-       /* localbus width (1, 2,4,8,16,32) from config space  */
-       u32 ipath_lbus_width;
-       /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
-       u32 ipath_lbus_speed;
-       /*
-        * number of sequential ibcstatus change for polling active/quiet
-        * (i.e., link not coming up).
-        */
-       u32 ipath_ibpollcnt;
-       /* low and high portions of MSI capability/vector */
-       u32 ipath_msi_lo;
-       /* saved after PCIe init for restore after reset */
-       u32 ipath_msi_hi;
-       /* MSI data (vector) saved for restore */
-       u16 ipath_msi_data;
-       /* MLID programmed for this instance */
-       u16 ipath_mlid;
-       /* LID programmed for this instance */
-       u16 ipath_lid;
-       /* list of pkeys programmed; 0 if not set */
-       u16 ipath_pkeys[4];
-       /*
-        * ASCII serial number, from flash, large enough for original
-        * all digit strings, and longer QLogic serial number format
-        */
-       u8 ipath_serial[16];
-       /* human readable board version */
-       u8 ipath_boardversion[96];
-       u8 ipath_lbus_info[32]; /* human readable localbus info */
-       /* chip major rev, from ipath_revision */
-       u8 ipath_majrev;
-       /* chip minor rev, from ipath_revision */
-       u8 ipath_minrev;
-       /* board rev, from ipath_revision */
-       u8 ipath_boardrev;
-       /* saved for restore after reset */
-       u8 ipath_pci_cacheline;
-       /* LID mask control */
-       u8 ipath_lmc;
-       /* link width supported */
-       u8 ipath_link_width_supported;
-       /* link speed supported */
-       u8 ipath_link_speed_supported;
-       u8 ipath_link_width_enabled;
-       u8 ipath_link_speed_enabled;
-       u8 ipath_link_width_active;
-       u8 ipath_link_speed_active;
-       /* Rx Polarity inversion (compensate for ~tx on partner) */
-       u8 ipath_rx_pol_inv;
-
-       u8 ipath_r_portenable_shift;
-       u8 ipath_r_intravail_shift;
-       u8 ipath_r_tailupd_shift;
-       u8 ipath_r_portcfg_shift;
-
-       /* unit # of this chip, if present */
-       int ipath_unit;
-
-       /* local link integrity counter */
-       u32 ipath_lli_counter;
-       /* local link integrity errors */
-       u32 ipath_lli_errors;
-       /*
-        * Above counts only cases where _successive_ LocalLinkIntegrity
-        * errors were seen in the receive headers of kern-packets.
-        * Below are the three (monotonically increasing) counters
-        * maintained via GPIO interrupts on iba6120-rev2.
-        */
-       u32 ipath_rxfc_unsupvl_errs;
-       u32 ipath_overrun_thresh_errs;
-       u32 ipath_lli_errs;
-
-       /*
-        * Not all devices managed by a driver instance are the same
-        * type, so these fields must be per-device.
-        */
-       u64 ipath_i_bitsextant;
-       ipath_err_t ipath_e_bitsextant;
-       ipath_err_t ipath_hwe_bitsextant;
-
-       /*
-        * Below should be computable from number of ports,
-        * since they are never modified.
-        */
-       u64 ipath_i_rcvavail_mask;
-       u64 ipath_i_rcvurg_mask;
-       u16 ipath_i_rcvurg_shift;
-       u16 ipath_i_rcvavail_shift;
-
-       /*
-        * Register bits for selecting i2c direction and values, used for
-        * I2C serial flash.
-        */
-       u8 ipath_gpio_sda_num;
-       u8 ipath_gpio_scl_num;
-       u8 ipath_i2c_chain_type;
-       u64 ipath_gpio_sda;
-       u64 ipath_gpio_scl;
-
-       /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
-       spinlock_t ipath_gpio_lock;
-
-       /*
-        * IB link and linktraining states and masks that vary per chip in
-        * some way.  Set at init, to avoid each IB status change interrupt
-        */
-       u8 ibcs_ls_shift;
-       u8 ibcs_lts_mask;
-       u32 ibcs_mask;
-       u32 ib_init;
-       u32 ib_arm;
-       u32 ib_active;
-
-       u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
-
-       /*
-        * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
-        * reg. Changes for IBA7220
-        */
-       u8 ibcc_lic_mask; /* LinkInitCmd */
-       u8 ibcc_lc_shift; /* LinkCmd */
-       u8 ibcc_mpl_shift; /* Maxpktlen */
-
-       u8 delay_mult;
-
-       /* used to override LED behavior */
-       u8 ipath_led_override;  /* Substituted for normal value, if non-zero */
-       u16 ipath_led_override_timeoff; /* delta to next timer event */
-       u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
-       u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
-       atomic_t ipath_led_override_timer_active;
-       /* Used to flash LEDs in override mode */
-       struct timer_list ipath_led_override_timer;
-
-       /* Support (including locks) for EEPROM logging of errors and time */
-       /* control access to actual counters, timer */
-       spinlock_t ipath_eep_st_lock;
-       /* control high-level access to EEPROM */
-       struct mutex ipath_eep_lock;
-       /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
-       uint64_t ipath_traffic_wds;
-       /* active time is kept in seconds, but logged in hours */
-       atomic_t ipath_active_time;
-       /* Below are nominal shadow of EEPROM, new since last EEPROM update */
-       uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
-       uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
-       uint16_t ipath_eep_hrs;
-       /*
-        * masks for which bits of errs, hwerrs that cause
-        * each of the counters to increment.
-        */
-       struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
-
-       /* interrupt mitigation reload register info */
-       u16 ipath_jint_idle_ticks;      /* idle clock ticks */
-       u16 ipath_jint_max_packets;     /* max packets across all ports */
-
-       /*
-        * lock for access to SerDes, and flags to sequence preset
-        * versus steady-state. 7220-only at the moment.
-        */
-       spinlock_t ipath_sdepb_lock;
-       u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
-};
-
-/* ipath_hol_state values (stopping/starting user proc, send flushing) */
-#define IPATH_HOL_UP       0
-#define IPATH_HOL_DOWN     1
-/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
-#define IPATH_HOL_DOWNSTOP 0
-#define IPATH_HOL_DOWNCONT 1
-
-/* bit positions for sdma_status */
-#define IPATH_SDMA_ABORTING  0
-#define IPATH_SDMA_DISARMED  1
-#define IPATH_SDMA_DISABLED  2
-#define IPATH_SDMA_LAYERBUF  3
-#define IPATH_SDMA_RUNNING  30
-#define IPATH_SDMA_SHUTDOWN 31
-
-/* bit combinations that correspond to abort states */
-#define IPATH_SDMA_ABORT_NONE 0
-#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
-#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED))
-#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-
-#define IPATH_SDMA_BUF_NONE 0
-#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
-
-/* Private data for file operations */
-struct ipath_filedata {
-       struct ipath_portdata *pd;
-       unsigned subport;
-       unsigned tidcursor;
-       struct ipath_user_sdma_queue *pq;
-};
-extern struct list_head ipath_dev_list;
-extern spinlock_t ipath_devs_lock;
-extern struct ipath_devdata *ipath_lookup(int unit);
-
-int ipath_init_chip(struct ipath_devdata *, int);
-int ipath_enable_wc(struct ipath_devdata *dd);
-void ipath_disable_wc(struct ipath_devdata *dd);
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
-void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_clear_freeze(struct ipath_devdata *);
-
-struct file_operations;
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-                   struct cdev **cdevp, struct device **devp);
-void ipath_cdev_cleanup(struct cdev **cdevp,
-                       struct device **devp);
-
-int ipath_diag_add(struct ipath_devdata *);
-void ipath_diag_remove(struct ipath_devdata *);
-
-extern wait_queue_head_t ipath_state_wait;
-
-int ipath_user_add(struct ipath_devdata *dd);
-void ipath_user_remove(struct ipath_devdata *dd);
-
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
-
-extern int ipath_diag_inuse;
-
-irqreturn_t ipath_intr(int irq, void *devid);
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-                    ipath_err_t err);
-#if __IPATH_INFO || __IPATH_DBG
-extern const char *ipath_ibcstatus_str[];
-#endif
-
-/* clean up any per-chip chip-specific stuff */
-void ipath_chip_cleanup(struct ipath_devdata *);
-/* clean up any chip type-specific stuff */
-void ipath_chip_done(void);
-
-void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
-                         unsigned cnt);
-void ipath_cancel_sends(struct ipath_devdata *, int);
-
-int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
-void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
-
-int ipath_parse_ushort(const char *str, unsigned short *valp);
-
-void ipath_kreceive(struct ipath_portdata *);
-int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
-int ipath_reset_device(int);
-void ipath_get_faststats(unsigned long);
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-int ipath_set_linkstate(struct ipath_devdata *, u8);
-int ipath_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
-void ipath_enable_armlaunch(struct ipath_devdata *);
-void ipath_disable_armlaunch(struct ipath_devdata *);
-void ipath_hol_down(struct ipath_devdata *);
-void ipath_hol_up(struct ipath_devdata *);
-void ipath_hol_event(unsigned long);
-void ipath_toggle_rclkrls(struct ipath_devdata *);
-void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
-void ipath_set_relock_poll(struct ipath_devdata *, int);
-void ipath_shutdown_relock_poll(struct ipath_devdata *);
-
-/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
-#define subport_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->subport
-#define tidcursor_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->tidcursor
-#define user_sdma_queue_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->pq
-
-/*
- * values for ipath_flags
- */
-               /* chip can report link latency (IB 1.2) */
-#define IPATH_HAS_LINK_LATENCY 0x1
-               /* The chip is up and initted */
-#define IPATH_INITTED       0x2
-               /* set if any user code has set kr_rcvhdrsize */
-#define IPATH_RCVHDRSZ_SET  0x4
-               /* The chip is present and valid for accesses */
-#define IPATH_PRESENT       0x8
-               /* HT link0 is only 8 bits wide, ignore upper byte crc
-                * errors, etc. */
-#define IPATH_8BIT_IN_HT0   0x10
-               /* HT link1 is only 8 bits wide, ignore upper byte crc
-                * errors, etc. */
-#define IPATH_8BIT_IN_HT1   0x20
-               /* The link is down */
-#define IPATH_LINKDOWN      0x40
-               /* The link level is up (0x11) */
-#define IPATH_LINKINIT      0x80
-               /* The link is in the armed (0x21) state */
-#define IPATH_LINKARMED     0x100
-               /* The link is in the active (0x31) state */
-#define IPATH_LINKACTIVE    0x200
-               /* link current state is unknown */
-#define IPATH_LINKUNK       0x400
-               /* Write combining flush needed for PIO */
-#define IPATH_PIO_FLUSH_WC  0x1000
-               /* DMA Receive tail pointer */
-#define IPATH_NODMA_RTAIL   0x2000
-               /* no IB cable, or no device on IB cable */
-#define IPATH_NOCABLE       0x4000
-               /* Supports port zero per packet receive interrupts via
-                * GPIO */
-#define IPATH_GPIO_INTR     0x8000
-               /* uses the coded 4byte TID, not 8 byte */
-#define IPATH_4BYTE_TID     0x10000
-               /* packet/word counters are 32 bit, else those 4 counters
-                * are 64bit */
-#define IPATH_32BITCOUNTERS 0x20000
-               /* Interrupt register is 64 bits */
-#define IPATH_INTREG_64     0x40000
-               /* can miss port0 rx interrupts */
-#define IPATH_DISABLED      0x80000 /* administratively disabled */
-               /* Use GPIO interrupts for new counters */
-#define IPATH_GPIO_ERRINTRS 0x100000
-#define IPATH_SWAP_PIOBUFS  0x200000
-               /* Supports Send DMA */
-#define IPATH_HAS_SEND_DMA  0x400000
-               /* Supports Send Count (not just word count) in PBC */
-#define IPATH_HAS_PBC_CNT   0x800000
-               /* Suppress heartbeat, even if turning off loopback */
-#define IPATH_NO_HRTBT      0x1000000
-#define IPATH_HAS_THRESH_UPDATE 0x4000000
-#define IPATH_HAS_MULT_IB_SPEED 0x8000000
-#define IPATH_IB_AUTONEG_INPROG 0x10000000
-#define IPATH_IB_AUTONEG_FAILED 0x20000000
-               /* Linkdown-disable intentionally, Do not attempt to bring up */
-#define IPATH_IB_LINK_DISABLED 0x40000000
-#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
-
-/* Bits in GPIO for the added interrupts */
-#define IPATH_GPIO_PORT0_BIT 2
-#define IPATH_GPIO_RXUVL_BIT 3
-#define IPATH_GPIO_OVRUN_BIT 4
-#define IPATH_GPIO_LLI_BIT 5
-#define IPATH_GPIO_ERRINTR_MASK 0x38
-
-/* portdata flag bit offsets */
-               /* waiting for a packet to arrive */
-#define IPATH_PORT_WAITING_RCV   2
-               /* master has not finished initializing */
-#define IPATH_PORT_MASTER_UNINIT 4
-               /* waiting for an urgent packet to arrive */
-#define IPATH_PORT_WAITING_URG 5
-
-/* free up any allocated data at closes */
-void ipath_free_data(struct ipath_portdata *dd);
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-                               unsigned len, int avail);
-void ipath_init_iba6110_funcs(struct ipath_devdata *);
-void ipath_get_eeprom_info(struct ipath_devdata *);
-int ipath_update_eeprom_log(struct ipath_devdata *dd);
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
-u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
-void ipath_force_pio_avail_update(struct ipath_devdata *);
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
-
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
-
-/* send dma routines */
-int setup_sdma(struct ipath_devdata *);
-void teardown_sdma(struct ipath_devdata *);
-void ipath_restart_sdma(struct ipath_devdata *);
-void ipath_sdma_intr(struct ipath_devdata *);
-int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
-                         u32, struct ipath_verbs_txreq *);
-/* ipath_sdma_lock should be locked before calling this. */
-int ipath_sdma_make_progress(struct ipath_devdata *dd);
-
-/* must be called under ipath_sdma_lock */
-static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
-{
-       return dd->ipath_sdma_descq_cnt -
-               (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
-               1 - dd->ipath_sdma_desc_nreserved;
-}
-
-static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
-{
-       dd->ipath_sdma_desc_nreserved += cnt;
-}
-
-static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
-{
-       dd->ipath_sdma_desc_nreserved -= cnt;
-}
-
-/*
- * number of words used for protocol header if not set by ipath_userinit();
- */
-#define IPATH_DFLT_RCVHDRSIZE 9
-
-int ipath_get_user_pages(unsigned long, size_t, struct page **);
-void ipath_release_user_pages(struct page **, size_t);
-void ipath_release_user_pages_on_close(struct page **, size_t);
-int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
-int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
-int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
-int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
-
-/* these are used for the registers that vary with port */
-void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
-                          unsigned, u64);
-
-/*
- * We could have a single register get/put routine, that takes a group type,
- * but this is somewhat clearer and cleaner.  It also gives us some error
- * checking.  64 bit register reads should always work, but are inefficient
- * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
- * so we use kreg32 wherever possible.  User register and counter register
- * reads are always 32 bit reads, so only one form of those routines.
- */
-
-/*
- * At the moment, none of the s-registers are writable, so no
- * ipath_write_sreg().
- */
-
-/**
- * ipath_read_ureg32 - read 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @port: port number
- *
- * Return the contents of a register that is virtualized to be per port.
- * Returns -1 on errors (not distinguishable from valid contents at
- * runtime; we may add a separate error variable at some point).
- */
-static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
-                                   ipath_ureg regno, int port)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-
-       return readl(regno + (u64 __iomem *)
-                    (dd->ipath_uregbase +
-                     (char __iomem *)dd->ipath_kregbase +
-                     dd->ipath_ureg_align * port));
-}
-
-/**
- * ipath_write_ureg - write 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @value: value
- * @port: port
- *
- * Write the contents of a register that is virtualized to be per port.
- */
-static inline void ipath_write_ureg(const struct ipath_devdata *dd,
-                                   ipath_ureg regno, u64 value, int port)
-{
-       u64 __iomem *ubase = (u64 __iomem *)
-               (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
-                dd->ipath_ureg_align * port);
-       if (dd->ipath_kregbase)
-               writeq(value, &ubase[regno]);
-}
-
-static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
-                                   ipath_kreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return -1;
-       return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
-                                   ipath_kreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return -1;
-
-       return readq(&dd->ipath_kregbase[regno]);
-}
-
-static inline void ipath_write_kreg(const struct ipath_devdata *dd,
-                                   ipath_kreg regno, u64 value)
-{
-       if (dd->ipath_kregbase)
-               writeq(value, &dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
-                                 ipath_sreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-
-       return readq(regno + (u64 __iomem *)
-                    (dd->ipath_cregbase +
-                     (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
-                                        ipath_sreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-       return readl(regno + (u64 __iomem *)
-                    (dd->ipath_cregbase +
-                     (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_write_creg(const struct ipath_devdata *dd,
-                                   ipath_creg regno, u64 value)
-{
-       if (dd->ipath_kregbase)
-               writeq(value, regno + (u64 __iomem *)
-                      (dd->ipath_cregbase +
-                       (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
-{
-       *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
-}
-
-static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
-{
-       return (u32) le64_to_cpu(*((volatile __le64 *)
-                               pd->port_rcvhdrtail_kvaddr));
-}
-
-static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
-{
-       const struct ipath_devdata *dd = pd->port_dd;
-       u32 hdrqtail;
-
-       if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-               __le32 *rhf_addr;
-               u32 seq;
-
-               rhf_addr = (__le32 *) pd->port_rcvhdrq +
-                       pd->port_head + dd->ipath_rhf_offset;
-               seq = ipath_hdrget_seq(rhf_addr);
-               hdrqtail = pd->port_head;
-               if (seq == pd->port_seq_cnt)
-                       hdrqtail++;
-       } else
-               hdrqtail = ipath_get_rcvhdrtail(pd);
-
-       return hdrqtail;
-}
-
-static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
-{
-       return (dd->ipath_flags & IPATH_INTREG_64) ?
-               ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return linkstate
- * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
- * everywhere, anyway (and should be, for almost all purposes).
- */
-static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
-               INFINIPATH_IBCS_LINKSTATE_MASK;
-       if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
-               state = INFINIPATH_IBCS_L_STATE_ACTIVE;
-       return state;
-}
-
-/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
-static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-               dd->ibcs_lts_mask;
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return logical link state
- * combination of link state and linktraining state (down, active, init,
- * arm, etc.
- */
-static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
-{
-       u32 ibs;
-       ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-               dd->ibcs_lts_mask;
-       ibs |= (u32)(ibcs &
-               (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
-       return ibs;
-}
-
-/*
- * sysfs interface.
- */
-
-struct device_driver;
-
-extern const char ib_ipath_version[];
-
-extern const struct attribute_group *ipath_driver_attr_groups[];
-
-int ipath_device_create_group(struct device *, struct ipath_devdata *);
-void ipath_device_remove_group(struct device *, struct ipath_devdata *);
-int ipath_expose_reset(struct device *);
-
-int ipath_init_ipathfs(void);
-void ipath_exit_ipathfs(void);
-int ipathfs_add_device(struct ipath_devdata *);
-int ipathfs_remove_device(struct ipath_devdata *);
-
-/*
- * dma_addr wrappers - all 0's invalid for hw
- */
-dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
-                         size_t, int);
-dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
-const char *ipath_get_unit_name(int unit);
-
-/*
- * Flush write combining store buffers (if present) and perform a write
- * barrier.
- */
-#if defined(CONFIG_X86_64)
-#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
-#else
-#define ipath_flush_wc() wmb()
-#endif
-
-extern unsigned ipath_debug; /* debugging bit mask */
-extern unsigned ipath_linkrecovery;
-extern unsigned ipath_mtu4096;
-extern struct mutex ipath_mutex;
-
-#define IPATH_DRV_NAME         "ib_ipath"
-#define IPATH_MAJOR            233
-#define IPATH_USER_MINOR_BASE  0
-#define IPATH_DIAGPKT_MINOR    127
-#define IPATH_DIAG_MINOR_BASE  129
-#define IPATH_NMINORS          255
-
-#define ipath_dev_err(dd,fmt,...) \
-       do { \
-               const struct ipath_devdata *__dd = (dd); \
-               if (__dd->pcidev) \
-                       dev_err(&__dd->pcidev->dev, "%s: " fmt, \
-                               ipath_get_unit_name(__dd->ipath_unit), \
-                               ##__VA_ARGS__); \
-               else \
-                       printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
-                              ipath_get_unit_name(__dd->ipath_unit), \
-                              ##__VA_ARGS__); \
-       } while (0)
-
-#if _IPATH_DEBUGGING
-
-# define __IPATH_DBG_WHICH(which,fmt,...) \
-       do { \
-               if (unlikely(ipath_debug & (which))) \
-                       printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
-                              __func__,##__VA_ARGS__); \
-       } while(0)
-
-# define ipath_dbg(fmt,...) \
-       __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
-# define ipath_cdbg(which,fmt,...) \
-       __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
-
-#else /* ! _IPATH_DEBUGGING */
-
-# define ipath_dbg(fmt,...)
-# define ipath_cdbg(which,fmt,...)
-
-#endif /* _IPATH_DEBUGGING */
-
-/*
- * this is used for formatting hw error messages...
- */
-struct ipath_hwerror_msgs {
-       u64 mask;
-       const char *msg;
-};
-
-#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
-
-/* in ipath_intr.c... */
-void ipath_format_hwerrors(u64 hwerrs,
-                          const struct ipath_hwerror_msgs *hwerrmsgs,
-                          size_t nhwerrmsgs,
-                          char *msg, size_t lmsg);
-
-#endif                         /* _IPATH_KERNEL_H */
diff --git a/drivers/staging/rdma/ipath/ipath_keys.c b/drivers/staging/rdma/ipath/ipath_keys.c
deleted file mode 100644 (file)
index c0e933f..0000000
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <asm/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
- * @mr: memory region that this lkey protects
- *
- * Returns 1 if successful, otherwise returns 0.
- */
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
-{
-       unsigned long flags;
-       u32 r;
-       u32 n;
-       int ret;
-
-       spin_lock_irqsave(&rkt->lock, flags);
-
-       /* Find the next available LKEY */
-       r = n = rkt->next;
-       for (;;) {
-               if (rkt->table[r] == NULL)
-                       break;
-               r = (r + 1) & (rkt->max - 1);
-               if (r == n) {
-                       spin_unlock_irqrestore(&rkt->lock, flags);
-                       ipath_dbg("LKEY table full\n");
-                       ret = 0;
-                       goto bail;
-               }
-       }
-       rkt->next = (r + 1) & (rkt->max - 1);
-       /*
-        * Make sure lkey is never zero which is reserved to indicate an
-        * unrestricted LKEY.
-        */
-       rkt->gen++;
-       mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
-               ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
-                << 8);
-       if (mr->lkey == 0) {
-               mr->lkey |= 1 << 8;
-               rkt->gen++;
-       }
-       rkt->table[r] = mr;
-       spin_unlock_irqrestore(&rkt->lock, flags);
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
- */
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
-{
-       unsigned long flags;
-       u32 r;
-
-       if (lkey == 0)
-               return;
-       r = lkey >> (32 - ib_ipath_lkey_table_size);
-       spin_lock_irqsave(&rkt->lock, flags);
-       rkt->table[r] = NULL;
-       spin_unlock_irqrestore(&rkt->lock, flags);
-}
-
-/**
- * ipath_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-                 struct ib_sge *sge, int acc)
-{
-       struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-       struct ipath_mregion *mr;
-       unsigned n, m;
-       size_t off;
-       int ret;
-
-       /*
-        * We use LKEY == zero for kernel virtual addresses
-        * (see ipath_get_dma_mr and ipath_dma.c).
-        */
-       if (sge->lkey == 0) {
-               /* always a kernel port, no locking needed */
-               struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-               if (pd->user) {
-                       ret = 0;
-                       goto bail;
-               }
-               isge->mr = NULL;
-               isge->vaddr = (void *) sge->addr;
-               isge->length = sge->length;
-               isge->sge_length = sge->length;
-               ret = 1;
-               goto bail;
-       }
-       mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
-       if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
-                    qp->ibqp.pd != mr->pd)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off = sge->addr - mr->user_base;
-       if (unlikely(sge->addr < mr->user_base ||
-                    off + sge->length > mr->length ||
-                    (mr->access_flags & acc) != acc)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off += mr->offset;
-       m = 0;
-       n = 0;
-       while (off >= mr->map[m]->segs[n].length) {
-               off -= mr->map[m]->segs[n].length;
-               n++;
-               if (n >= IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       isge->mr = mr;
-       isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-       isge->length = mr->map[m]->segs[n].length - off;
-       isge->sge_length = sge->length;
-       isge->m = m;
-       isge->n = n;
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_rkey_ok - check the IB virtual address, length, and RKEY
- * @dev: infiniband device
- * @ss: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- */
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-                 u32 len, u64 vaddr, u32 rkey, int acc)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_lkey_table *rkt = &dev->lk_table;
-       struct ipath_sge *sge = &ss->sge;
-       struct ipath_mregion *mr;
-       unsigned n, m;
-       size_t off;
-       int ret;
-
-       /*
-        * We use RKEY == zero for kernel virtual addresses
-        * (see ipath_get_dma_mr and ipath_dma.c).
-        */
-       if (rkey == 0) {
-               /* always a kernel port, no locking needed */
-               struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-               if (pd->user) {
-                       ret = 0;
-                       goto bail;
-               }
-               sge->mr = NULL;
-               sge->vaddr = (void *) vaddr;
-               sge->length = len;
-               sge->sge_length = len;
-               ss->sg_list = NULL;
-               ss->num_sge = 1;
-               ret = 1;
-               goto bail;
-       }
-
-       mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
-       if (unlikely(mr == NULL || mr->lkey != rkey ||
-                    qp->ibqp.pd != mr->pd)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off = vaddr - mr->iova;
-       if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-                    (mr->access_flags & acc) == 0)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off += mr->offset;
-       m = 0;
-       n = 0;
-       while (off >= mr->map[m]->segs[n].length) {
-               off -= mr->map[m]->segs[n].length;
-               n++;
-               if (n >= IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       sge->mr = mr;
-       sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-       sge->length = mr->map[m]->segs[n].length - off;
-       sge->sge_length = len;
-       sge->m = m;
-       sge->n = n;
-       ss->sg_list = NULL;
-       ss->num_sge = 1;
-
-       ret = 1;
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mad.c b/drivers/staging/rdma/ipath/ipath_mad.c
deleted file mode 100644 (file)
index ad3a926..0000000
+++ /dev/null
@@ -1,1521 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_pma.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define IB_SMP_UNSUP_VERSION   cpu_to_be16(0x0004)
-#define IB_SMP_UNSUP_METHOD    cpu_to_be16(0x0008)
-#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
-#define IB_SMP_INVALID_FIELD   cpu_to_be16(0x001C)
-
-static int reply(struct ib_smp *smp)
-{
-       /*
-        * The verbs framework will handle the directed/LID route
-        * packet changes.
-        */
-       smp->method = IB_MGMT_METHOD_GET_RESP;
-       if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-               smp->status |= IB_SMP_DIRECTION;
-       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-static int recv_subn_get_nodedescription(struct ib_smp *smp,
-                                        struct ib_device *ibdev)
-{
-       if (smp->attr_mod)
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
-
-       return reply(smp);
-}
-
-struct nodeinfo {
-       u8 base_version;
-       u8 class_version;
-       u8 node_type;
-       u8 num_ports;
-       __be64 sys_guid;
-       __be64 node_guid;
-       __be64 port_guid;
-       __be16 partition_cap;
-       __be16 device_id;
-       __be32 revision;
-       u8 local_port_num;
-       u8 vendor_id[3];
-} __attribute__ ((packed));
-
-static int recv_subn_get_nodeinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
-       struct ipath_devdata *dd = to_idev(ibdev)->dd;
-       u32 vendor, majrev, minrev;
-
-       /* GUID 0 is illegal */
-       if (smp->attr_mod || (dd->ipath_guid == 0))
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       nip->base_version = 1;
-       nip->class_version = 1;
-       nip->node_type = 1;     /* channel adapter */
-       /*
-        * XXX The num_ports value will need a layer function to get
-        * the value if we ever have more than one IB port on a chip.
-        * We will also need to get the GUID for the port.
-        */
-       nip->num_ports = ibdev->phys_port_cnt;
-       /* This is already in network order */
-       nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-       nip->node_guid = dd->ipath_guid;
-       nip->port_guid = dd->ipath_guid;
-       nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
-       nip->device_id = cpu_to_be16(dd->ipath_deviceid);
-       majrev = dd->ipath_majrev;
-       minrev = dd->ipath_minrev;
-       nip->revision = cpu_to_be32((majrev << 16) | minrev);
-       nip->local_port_num = port;
-       vendor = dd->ipath_vendorid;
-       nip->vendor_id[0] = IPATH_SRC_OUI_1;
-       nip->vendor_id[1] = IPATH_SRC_OUI_2;
-       nip->vendor_id[2] = IPATH_SRC_OUI_3;
-
-       return reply(smp);
-}
-
-static int recv_subn_get_guidinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev)
-{
-       u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
-       __be64 *p = (__be64 *) smp->data;
-
-       /* 32 blocks of 8 64-bit GUIDs per block */
-
-       memset(smp->data, 0, sizeof(smp->data));
-
-       /*
-        * We only support one GUID for now.  If this changes, the
-        * portinfo.guid_cap field needs to be updated too.
-        */
-       if (startgx == 0) {
-               __be64 g = to_idev(ibdev)->dd->ipath_guid;
-               if (g == 0)
-                       /* GUID 0 is illegal */
-                       smp->status |= IB_SMP_INVALID_FIELD;
-               else
-                       /* The first is a copy of the read-only HW GUID. */
-                       *p = g;
-       } else
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return reply(smp);
-}
-
-static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
-{
-       (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
-}
-
-static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
-{
-       (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
-}
-
-static int get_overrunthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-/**
- * set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-static int get_phyerrthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-/**
- * set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-/**
- * get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-static int get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-       return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-static int recv_subn_get_portinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct ipath_ibdev *dev;
-       struct ipath_devdata *dd;
-       struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-       u16 lid;
-       u8 ibcstat;
-       u8 mtu;
-       int ret;
-
-       if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
-               smp->status |= IB_SMP_INVALID_FIELD;
-               ret = reply(smp);
-               goto bail;
-       }
-
-       dev = to_idev(ibdev);
-       dd = dev->dd;
-
-       /* Clear all fields.  Only set the non-zero fields. */
-       memset(smp->data, 0, sizeof(smp->data));
-
-       /* Only return the mkey if the protection field allows it. */
-       if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
-           dev->mkeyprot == 0)
-               pip->mkey = dev->mkey;
-       pip->gid_prefix = dev->gid_prefix;
-       lid = dd->ipath_lid;
-       pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
-       pip->sm_lid = cpu_to_be16(dev->sm_lid);
-       pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
-       /* pip->diag_code; */
-       pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
-       pip->local_port_num = port;
-       pip->link_width_enabled = dd->ipath_link_width_enabled;
-       pip->link_width_supported = dd->ipath_link_width_supported;
-       pip->link_width_active = dd->ipath_link_width_active;
-       pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
-       ibcstat = dd->ipath_lastibcstat;
-       /* map LinkState to IB portinfo values.  */
-       pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
-
-       pip->portphysstate_linkdown =
-               (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
-               (get_linkdowndefaultstate(dd) ? 1 : 2);
-       pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
-       pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
-               dd->ipath_link_speed_enabled;
-       switch (dd->ipath_ibmtu) {
-       case 4096:
-               mtu = IB_MTU_4096;
-               break;
-       case 2048:
-               mtu = IB_MTU_2048;
-               break;
-       case 1024:
-               mtu = IB_MTU_1024;
-               break;
-       case 512:
-               mtu = IB_MTU_512;
-               break;
-       case 256:
-               mtu = IB_MTU_256;
-               break;
-       default:                /* oops, something is wrong */
-               mtu = IB_MTU_2048;
-               break;
-       }
-       pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
-       pip->vlcap_inittype = 0x10;     /* VLCap = VL0, InitType = 0 */
-       pip->vl_high_limit = dev->vl_high_limit;
-       /* pip->vl_arb_high_cap; // only one VL */
-       /* pip->vl_arb_low_cap; // only one VL */
-       /* InitTypeReply = 0 */
-       /* our mtu cap depends on whether 4K MTU enabled or not */
-       pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-       /* HCAs ignore VLStallCount and HOQLife */
-       /* pip->vlstallcnt_hoqlife; */
-       pip->operationalvl_pei_peo_fpi_fpo = 0x10;      /* OVLs = 1 */
-       pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
-       /* P_KeyViolations are counted by hardware. */
-       pip->pkey_violations =
-               cpu_to_be16((ipath_get_cr_errpkey(dd) -
-                            dev->z_pkey_violations) & 0xFFFF);
-       pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
-       /* Only the hardware GUID is supported for now */
-       pip->guid_cap = 1;
-       pip->clientrereg_resv_subnetto = dev->subnet_timeout;
-       /* 32.768 usec. response time (guessing) */
-       pip->resv_resptimevalue = 3;
-       pip->localphyerrors_overrunerrors =
-               (get_phyerrthreshold(dd) << 4) |
-               get_overrunthreshold(dd);
-       /* pip->max_credit_hint; */
-       if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
-               u32 v;
-
-               v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
-               pip->link_roundtrip_latency[0] = v >> 16;
-               pip->link_roundtrip_latency[1] = v >> 8;
-               pip->link_roundtrip_latency[2] = v;
-       }
-
-       ret = reply(smp);
-
-bail:
-       return ret;
-}
-
-/**
- * get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-       /* always a kernel port, no locking needed */
-       struct ipath_portdata *pd = dd->ipath_pd[0];
-
-       memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-       return 0;
-}
-
-static int recv_subn_get_pkeytable(struct ib_smp *smp,
-                                  struct ib_device *ibdev)
-{
-       u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-       u16 *p = (u16 *) smp->data;
-       __be16 *q = (__be16 *) smp->data;
-
-       /* 64 blocks of 32 16-bit P_Key entries */
-
-       memset(smp->data, 0, sizeof(smp->data));
-       if (startpx == 0) {
-               struct ipath_ibdev *dev = to_idev(ibdev);
-               unsigned i, n = ipath_get_npkeys(dev->dd);
-
-               get_pkeys(dev->dd, p);
-
-               for (i = 0; i < n; i++)
-                       q[i] = cpu_to_be16(p[i]);
-       } else
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return reply(smp);
-}
-
-static int recv_subn_set_guidinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev)
-{
-       /* The only GUID we support is the first read-only entry. */
-       return recv_subn_get_guidinfo(smp, ibdev);
-}
-
-/**
- * set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
-{
-       if (sleep)
-               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       else
-               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                        dd->ipath_ibcctrl);
-       return 0;
-}
-
-/**
- * recv_subn_set_portinfo - set port information
- * @smp: the incoming SM packet
- * @ibdev: the infiniband device
- * @port: the port on the device
- *
- * Set Portinfo (see ch. 14.2.5.6).
- */
-static int recv_subn_set_portinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-       struct ib_event event;
-       struct ipath_ibdev *dev;
-       struct ipath_devdata *dd;
-       char clientrereg = 0;
-       u16 lid, smlid;
-       u8 lwe;
-       u8 lse;
-       u8 state;
-       u16 lstate;
-       u32 mtu;
-       int ret, ore;
-
-       if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
-               goto err;
-
-       dev = to_idev(ibdev);
-       dd = dev->dd;
-       event.device = ibdev;
-       event.element.port_num = port;
-
-       dev->mkey = pip->mkey;
-       dev->gid_prefix = pip->gid_prefix;
-       dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
-
-       lid = be16_to_cpu(pip->lid);
-       if (dd->ipath_lid != lid ||
-           dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
-               /* Must be a valid unicast LID address. */
-               if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
-                       goto err;
-               ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
-               event.event = IB_EVENT_LID_CHANGE;
-               ib_dispatch_event(&event);
-       }
-
-       smlid = be16_to_cpu(pip->sm_lid);
-       if (smlid != dev->sm_lid) {
-               /* Must be a valid unicast LID address. */
-               if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
-                       goto err;
-               dev->sm_lid = smlid;
-               event.event = IB_EVENT_SM_CHANGE;
-               ib_dispatch_event(&event);
-       }
-
-       /* Allow 1x or 4x to be set (see 14.2.6.6). */
-       lwe = pip->link_width_enabled;
-       if (lwe) {
-               if (lwe == 0xFF)
-                       lwe = dd->ipath_link_width_supported;
-               else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
-                       goto err;
-               set_link_width_enabled(dd, lwe);
-       }
-
-       /* Allow 2.5 or 5.0 Gbs. */
-       lse = pip->linkspeedactive_enabled & 0xF;
-       if (lse) {
-               if (lse == 15)
-                       lse = dd->ipath_link_speed_supported;
-               else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
-                       goto err;
-               set_link_speed_enabled(dd, lse);
-       }
-
-       /* Set link down default state. */
-       switch (pip->portphysstate_linkdown & 0xF) {
-       case 0: /* NOP */
-               break;
-       case 1: /* SLEEP */
-               if (set_linkdowndefaultstate(dd, 1))
-                       goto err;
-               break;
-       case 2: /* POLL */
-               if (set_linkdowndefaultstate(dd, 0))
-                       goto err;
-               break;
-       default:
-               goto err;
-       }
-
-       dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-       dev->vl_high_limit = pip->vl_high_limit;
-
-       switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
-       case IB_MTU_256:
-               mtu = 256;
-               break;
-       case IB_MTU_512:
-               mtu = 512;
-               break;
-       case IB_MTU_1024:
-               mtu = 1024;
-               break;
-       case IB_MTU_2048:
-               mtu = 2048;
-               break;
-       case IB_MTU_4096:
-               if (!ipath_mtu4096)
-                       goto err;
-               mtu = 4096;
-               break;
-       default:
-               /* XXX We have already partially updated our state! */
-               goto err;
-       }
-       ipath_set_mtu(dd, mtu);
-
-       dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
-
-       /* We only support VL0 */
-       if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
-               goto err;
-
-       if (pip->mkey_violations == 0)
-               dev->mkey_violations = 0;
-
-       /*
-        * Hardware counter can't be reset so snapshot and subtract
-        * later.
-        */
-       if (pip->pkey_violations == 0)
-               dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
-
-       if (pip->qkey_violations == 0)
-               dev->qkey_violations = 0;
-
-       ore = pip->localphyerrors_overrunerrors;
-       if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
-               goto err;
-
-       if (set_overrunthreshold(dd, (ore & 0xF)))
-               goto err;
-
-       dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
-
-       if (pip->clientrereg_resv_subnetto & 0x80) {
-               clientrereg = 1;
-               event.event = IB_EVENT_CLIENT_REREGISTER;
-               ib_dispatch_event(&event);
-       }
-
-       /*
-        * Do the port state change now that the other link parameters
-        * have been set.
-        * Changing the port physical state only makes sense if the link
-        * is down or is being set to down.
-        */
-       state = pip->linkspeed_portstate & 0xF;
-       lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
-       if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
-               goto err;
-
-       /*
-        * Only state changes of DOWN, ARM, and ACTIVE are valid
-        * and must be in the correct state to take effect (see 7.2.6).
-        */
-       switch (state) {
-       case IB_PORT_NOP:
-               if (lstate == 0)
-                       break;
-               /* FALLTHROUGH */
-       case IB_PORT_DOWN:
-               if (lstate == 0)
-                       lstate = IPATH_IB_LINKDOWN_ONLY;
-               else if (lstate == 1)
-                       lstate = IPATH_IB_LINKDOWN_SLEEP;
-               else if (lstate == 2)
-                       lstate = IPATH_IB_LINKDOWN;
-               else if (lstate == 3)
-                       lstate = IPATH_IB_LINKDOWN_DISABLE;
-               else
-                       goto err;
-               ipath_set_linkstate(dd, lstate);
-               if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
-                       ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-                       goto done;
-               }
-               ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
-                               IPATH_LINKACTIVE, 1000);
-               break;
-       case IB_PORT_ARMED:
-               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-               break;
-       case IB_PORT_ACTIVE:
-               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-               break;
-       default:
-               /* XXX We have already partially updated our state! */
-               goto err;
-       }
-
-       ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-       if (clientrereg)
-               pip->clientrereg_resv_subnetto |= 0x80;
-
-       goto done;
-
-err:
-       smp->status |= IB_SMP_INVALID_FIELD;
-       ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-done:
-       return ret;
-}
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (dd->ipath_pkeys[i] != key)
-                       continue;
-               if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-                       dd->ipath_pkeys[i] = 0;
-                       ret = 1;
-                       goto bail;
-               }
-               break;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       u16 lkey = key & 0x7FFF;
-       int any = 0;
-       int ret;
-
-       if (lkey == 0x7FFF) {
-               ret = 0;
-               goto bail;
-       }
-
-       /* Look for an empty slot or a matching PKEY. */
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i]) {
-                       any++;
-                       continue;
-               }
-               /* If it matches exactly, try to increment the ref count */
-               if (dd->ipath_pkeys[i] == key) {
-                       if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-                               ret = 0;
-                               goto bail;
-                       }
-                       /* Lost the race. Look for an empty slot below. */
-                       atomic_dec(&dd->ipath_pkeyrefs[i]);
-                       any++;
-               }
-               /*
-                * It makes no sense to have both the limited and unlimited
-                * PKEY set at the same time since the unlimited one will
-                * disable the limited one.
-                */
-               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (!any) {
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i] &&
-                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-                       /* for ipathstats, etc. */
-                       ipath_stats.sps_pkeys[i] = lkey;
-                       dd->ipath_pkeys[i] = key;
-                       ret = 1;
-                       goto bail;
-               }
-       }
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-/**
- * set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
-{
-       struct ipath_portdata *pd;
-       int i;
-       int changed = 0;
-
-       /* always a kernel port, no locking needed */
-       pd = dd->ipath_pd[0];
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               u16 key = pkeys[i];
-               u16 okey = pd->port_pkeys[i];
-
-               if (key == okey)
-                       continue;
-               /*
-                * The value of this PKEY table entry is changing.
-                * Remove the old entry in the hardware's array of PKEYs.
-                */
-               if (okey & 0x7FFF)
-                       changed |= rm_pkey(dd, okey);
-               if (key & 0x7FFF) {
-                       int ret = add_pkey(dd, key);
-
-                       if (ret < 0)
-                               key = 0;
-                       else
-                               changed |= ret;
-               }
-               pd->port_pkeys[i] = key;
-       }
-       if (changed) {
-               u64 pkey;
-               struct ib_event event;
-
-               pkey = (u64) dd->ipath_pkeys[0] |
-                       ((u64) dd->ipath_pkeys[1] << 16) |
-                       ((u64) dd->ipath_pkeys[2] << 32) |
-                       ((u64) dd->ipath_pkeys[3] << 48);
-               ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-                          (unsigned long long) pkey);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-                                pkey);
-
-               event.event = IB_EVENT_PKEY_CHANGE;
-               event.device = &dd->verbs_dev->ibdev;
-               event.element.port_num = port;
-               ib_dispatch_event(&event);
-       }
-       return 0;
-}
-
-static int recv_subn_set_pkeytable(struct ib_smp *smp,
-                                  struct ib_device *ibdev, u8 port)
-{
-       u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-       __be16 *p = (__be16 *) smp->data;
-       u16 *q = (u16 *) smp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       unsigned i, n = ipath_get_npkeys(dev->dd);
-
-       for (i = 0; i < n; i++)
-               q[i] = be16_to_cpu(p[i]);
-
-       if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return recv_subn_get_pkeytable(smp, ibdev);
-}
-
-static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
-{
-       struct ib_class_port_info *p =
-               (struct ib_class_port_info *)pmp->data;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       if (pmp->mad_hdr.attr_mod != 0)
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       /* Indicate AllPortSelect is valid (only one port anyway) */
-       p->capability_mask = cpu_to_be16(1 << 8);
-       p->base_version = 1;
-       p->class_version = 1;
-       /*
-        * Expected response time is 4.096 usec. * 2^18 == 1.073741824
-        * sec.
-        */
-       p->resp_time_value = 18;
-
-       return reply((struct ib_smp *) pmp);
-}
-
-/*
- * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
- * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
- * We support 5 counters which only count the mandatory quantities.
- */
-#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
-#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
-                                   COUNTER_MASK(1, 1) | \
-                                   COUNTER_MASK(1, 2) | \
-                                   COUNTER_MASK(1, 3) | \
-                                   COUNTER_MASK(1, 4))
-
-static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
-                                          struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portsamplescontrol *p =
-               (struct ib_pma_portsamplescontrol *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       unsigned long flags;
-       u8 port_select = p->port_select;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-       /*
-        * Ticks are 10x the link transfer period which for 2.5Gbs is 4
-        * nsec.  0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec.  Sample
-        * intervals are counted in ticks.  Since we use Linux timers, that
-        * count in jiffies, we can't sample for less than 1000 ticks if HZ
-        * == 1000 (4000 ticks if HZ is 250).  link_speed_active returns 2 for
-        * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
-        * have hardware support for delaying packets.
-        */
-       if (crp->cr_psstat)
-               p->tick = dev->dd->ipath_link_speed_active - 1;
-       else
-               p->tick = 250;          /* 1 usec. */
-       p->counter_width = 4;   /* 32 bit counters */
-       p->counter_mask0_9 = COUNTER_MASK0_9;
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (crp->cr_psstat)
-               p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               p->sample_status = dev->pma_sample_status;
-       p->sample_start = cpu_to_be32(dev->pma_sample_start);
-       p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
-       p->tag = cpu_to_be16(dev->pma_tag);
-       p->counter_select[0] = dev->pma_counter_select[0];
-       p->counter_select[1] = dev->pma_counter_select[1];
-       p->counter_select[2] = dev->pma_counter_select[2];
-       p->counter_select[3] = dev->pma_counter_select[3];
-       p->counter_select[4] = dev->pma_counter_select[4];
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
-                                          struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portsamplescontrol *p =
-               (struct ib_pma_portsamplescontrol *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       unsigned long flags;
-       u8 status;
-       int ret;
-
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (p->port_select != port && p->port_select != 0xFF)) {
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-               ret = reply((struct ib_smp *) pmp);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       if (status == IB_PMA_SAMPLE_STATUS_DONE) {
-               dev->pma_sample_start = be32_to_cpu(p->sample_start);
-               dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
-               dev->pma_tag = be16_to_cpu(p->tag);
-               dev->pma_counter_select[0] = p->counter_select[0];
-               dev->pma_counter_select[1] = p->counter_select[1];
-               dev->pma_counter_select[2] = p->counter_select[2];
-               dev->pma_counter_select[3] = p->counter_select[3];
-               dev->pma_counter_select[4] = p->counter_select[4];
-               if (crp->cr_psstat) {
-                       ipath_write_creg(dev->dd, crp->cr_psinterval,
-                                        dev->pma_sample_interval);
-                       ipath_write_creg(dev->dd, crp->cr_psstart,
-                                        dev->pma_sample_start);
-               } else
-                       dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
-
-bail:
-       return ret;
-}
-
-static u64 get_counter(struct ipath_ibdev *dev,
-                      struct ipath_cregs const *crp,
-                      __be16 sel)
-{
-       u64 ret;
-
-       switch (sel) {
-       case IB_PMA_PORT_XMIT_DATA:
-               ret = (crp->cr_psxmitdatacount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
-                       dev->ipath_sword;
-               break;
-       case IB_PMA_PORT_RCV_DATA:
-               ret = (crp->cr_psrcvdatacount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
-                       dev->ipath_rword;
-               break;
-       case IB_PMA_PORT_XMIT_PKTS:
-               ret = (crp->cr_psxmitpktscount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
-                       dev->ipath_spkts;
-               break;
-       case IB_PMA_PORT_RCV_PKTS:
-               ret = (crp->cr_psrcvpktscount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
-                       dev->ipath_rpkts;
-               break;
-       case IB_PMA_PORT_XMIT_WAIT:
-               ret = (crp->cr_psxmitwaitcount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
-                       dev->ipath_xmit_wait;
-               break;
-       default:
-               ret = 0;
-       }
-
-       return ret;
-}
-
-static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
-                                         struct ib_device *ibdev)
-{
-       struct ib_pma_portsamplesresult *p =
-               (struct ib_pma_portsamplesresult *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       u8 status;
-       int i;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-       p->tag = cpu_to_be16(dev->pma_tag);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       p->sample_status = cpu_to_be16(status);
-       for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-               p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-                   cpu_to_be32(
-                       get_counter(dev, crp, dev->pma_counter_select[i]));
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
-                                             struct ib_device *ibdev)
-{
-       struct ib_pma_portsamplesresult_ext *p =
-               (struct ib_pma_portsamplesresult_ext *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       u8 status;
-       int i;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-       p->tag = cpu_to_be16(dev->pma_tag);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       p->sample_status = cpu_to_be16(status);
-       /* 64 bits */
-       p->extended_width = cpu_to_be32(0x80000000);
-       for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-               p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-                   cpu_to_be64(
-                       get_counter(dev, crp, dev->pma_counter_select[i]));
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
-                                    struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_verbs_counters cntrs;
-       u8 port_select = p->port_select;
-
-       ipath_get_counters(dev->dd, &cntrs);
-
-       /* Adjust counters for any resets done. */
-       cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
-       cntrs.link_error_recovery_counter -=
-               dev->z_link_error_recovery_counter;
-       cntrs.link_downed_counter -= dev->z_link_downed_counter;
-       cntrs.port_rcv_errors += dev->rcv_errors;
-       cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
-       cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
-       cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
-       cntrs.port_xmit_data -= dev->z_port_xmit_data;
-       cntrs.port_rcv_data -= dev->z_port_rcv_data;
-       cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
-       cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
-       cntrs.local_link_integrity_errors -=
-               dev->z_local_link_integrity_errors;
-       cntrs.excessive_buffer_overrun_errors -=
-               dev->z_excessive_buffer_overrun_errors;
-       cntrs.vl15_dropped -= dev->z_vl15_dropped;
-       cntrs.vl15_dropped += dev->n_vl15_dropped;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       if (cntrs.symbol_error_counter > 0xFFFFUL)
-               p->symbol_error_counter = cpu_to_be16(0xFFFF);
-       else
-               p->symbol_error_counter =
-                       cpu_to_be16((u16)cntrs.symbol_error_counter);
-       if (cntrs.link_error_recovery_counter > 0xFFUL)
-               p->link_error_recovery_counter = 0xFF;
-       else
-               p->link_error_recovery_counter =
-                       (u8)cntrs.link_error_recovery_counter;
-       if (cntrs.link_downed_counter > 0xFFUL)
-               p->link_downed_counter = 0xFF;
-       else
-               p->link_downed_counter = (u8)cntrs.link_downed_counter;
-       if (cntrs.port_rcv_errors > 0xFFFFUL)
-               p->port_rcv_errors = cpu_to_be16(0xFFFF);
-       else
-               p->port_rcv_errors =
-                       cpu_to_be16((u16) cntrs.port_rcv_errors);
-       if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
-               p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
-       else
-               p->port_rcv_remphys_errors =
-                       cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
-       if (cntrs.port_xmit_discards > 0xFFFFUL)
-               p->port_xmit_discards = cpu_to_be16(0xFFFF);
-       else
-               p->port_xmit_discards =
-                       cpu_to_be16((u16)cntrs.port_xmit_discards);
-       if (cntrs.local_link_integrity_errors > 0xFUL)
-               cntrs.local_link_integrity_errors = 0xFUL;
-       if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
-               cntrs.excessive_buffer_overrun_errors = 0xFUL;
-       p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
-               cntrs.excessive_buffer_overrun_errors;
-       if (cntrs.vl15_dropped > 0xFFFFUL)
-               p->vl15_dropped = cpu_to_be16(0xFFFF);
-       else
-               p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
-       if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
-               p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
-       if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
-               p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
-       if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
-               p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_xmit_packets =
-                       cpu_to_be32((u32)cntrs.port_xmit_packets);
-       if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
-               p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_rcv_packets =
-                       cpu_to_be32((u32) cntrs.port_rcv_packets);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
-                                        struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters_ext *p =
-               (struct ib_pma_portcounters_ext *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       u64 swords, rwords, spkts, rpkts, xwait;
-       u8 port_select = p->port_select;
-
-       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                               &rpkts, &xwait);
-
-       /* Adjust counters for any resets done. */
-       swords -= dev->z_port_xmit_data;
-       rwords -= dev->z_port_rcv_data;
-       spkts -= dev->z_port_xmit_packets;
-       rpkts -= dev->z_port_rcv_packets;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       p->port_xmit_data = cpu_to_be64(swords);
-       p->port_rcv_data = cpu_to_be64(rwords);
-       p->port_xmit_packets = cpu_to_be64(spkts);
-       p->port_rcv_packets = cpu_to_be64(rpkts);
-       p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
-       p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
-       p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
-       p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
-                                    struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_verbs_counters cntrs;
-
-       /*
-        * Since the HW doesn't support clearing counters, we save the
-        * current count and subtract it from future responses.
-        */
-       ipath_get_counters(dev->dd, &cntrs);
-
-       if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
-               dev->z_symbol_error_counter = cntrs.symbol_error_counter;
-
-       if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
-               dev->z_link_error_recovery_counter =
-                       cntrs.link_error_recovery_counter;
-
-       if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
-               dev->z_link_downed_counter = cntrs.link_downed_counter;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
-               dev->z_port_rcv_errors =
-                       cntrs.port_rcv_errors + dev->rcv_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
-               dev->z_port_rcv_remphys_errors =
-                       cntrs.port_rcv_remphys_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
-               dev->z_port_xmit_discards = cntrs.port_xmit_discards;
-
-       if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
-               dev->z_local_link_integrity_errors =
-                       cntrs.local_link_integrity_errors;
-
-       if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
-               dev->z_excessive_buffer_overrun_errors =
-                       cntrs.excessive_buffer_overrun_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-               dev->n_vl15_dropped = 0;
-               dev->z_vl15_dropped = cntrs.vl15_dropped;
-       }
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
-               dev->z_port_xmit_data = cntrs.port_xmit_data;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
-               dev->z_port_rcv_data = cntrs.port_rcv_data;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
-               dev->z_port_xmit_packets = cntrs.port_xmit_packets;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
-               dev->z_port_rcv_packets = cntrs.port_rcv_packets;
-
-       return recv_pma_get_portcounters(pmp, ibdev, port);
-}
-
-static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
-                                        struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       u64 swords, rwords, spkts, rpkts, xwait;
-
-       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                               &rpkts, &xwait);
-
-       if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
-               dev->z_port_xmit_data = swords;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
-               dev->z_port_rcv_data = rwords;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
-               dev->z_port_xmit_packets = spkts;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
-               dev->z_port_rcv_packets = rpkts;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
-               dev->n_unicast_xmit = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
-               dev->n_unicast_rcv = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
-               dev->n_multicast_xmit = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
-               dev->n_multicast_rcv = 0;
-
-       return recv_pma_get_portcounters_ext(pmp, ibdev, port);
-}
-
-static int process_subn(struct ib_device *ibdev, int mad_flags,
-                       u8 port_num, const struct ib_mad *in_mad,
-                       struct ib_mad *out_mad)
-{
-       struct ib_smp *smp = (struct ib_smp *)out_mad;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       *out_mad = *in_mad;
-       if (smp->class_version != 1) {
-               smp->status |= IB_SMP_UNSUP_VERSION;
-               ret = reply(smp);
-               goto bail;
-       }
-
-       /* Is the mkey in the process of expiring? */
-       if (dev->mkey_lease_timeout &&
-           time_after_eq(jiffies, dev->mkey_lease_timeout)) {
-               /* Clear timeout and mkey protection field. */
-               dev->mkey_lease_timeout = 0;
-               dev->mkeyprot = 0;
-       }
-
-       /*
-        * M_Key checking depends on
-        * Portinfo:M_Key_protect_bits
-        */
-       if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
-           dev->mkey != smp->mkey &&
-           (smp->method == IB_MGMT_METHOD_SET ||
-            (smp->method == IB_MGMT_METHOD_GET &&
-             dev->mkeyprot >= 2))) {
-               if (dev->mkey_violations != 0xFFFF)
-                       ++dev->mkey_violations;
-               if (dev->mkey_lease_timeout ||
-                   dev->mkey_lease_period == 0) {
-                       ret = IB_MAD_RESULT_SUCCESS |
-                               IB_MAD_RESULT_CONSUMED;
-                       goto bail;
-               }
-               dev->mkey_lease_timeout = jiffies +
-                       dev->mkey_lease_period * HZ;
-               /* Future: Generate a trap notice. */
-               ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-               goto bail;
-       } else if (dev->mkey_lease_timeout)
-               dev->mkey_lease_timeout = 0;
-
-       switch (smp->method) {
-       case IB_MGMT_METHOD_GET:
-               switch (smp->attr_id) {
-               case IB_SMP_ATTR_NODE_DESC:
-                       ret = recv_subn_get_nodedescription(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_NODE_INFO:
-                       ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_GUID_INFO:
-                       ret = recv_subn_get_guidinfo(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_PORT_INFO:
-                       ret = recv_subn_get_portinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_PKEY_TABLE:
-                       ret = recv_subn_get_pkeytable(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_SM_INFO:
-                       if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-                               ret = IB_MAD_RESULT_SUCCESS |
-                                       IB_MAD_RESULT_CONSUMED;
-                               goto bail;
-                       }
-                       if (dev->port_cap_flags & IB_PORT_SM) {
-                               ret = IB_MAD_RESULT_SUCCESS;
-                               goto bail;
-                       }
-                       /* FALLTHROUGH */
-               default:
-                       smp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply(smp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_SET:
-               switch (smp->attr_id) {
-               case IB_SMP_ATTR_GUID_INFO:
-                       ret = recv_subn_set_guidinfo(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_PORT_INFO:
-                       ret = recv_subn_set_portinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_PKEY_TABLE:
-                       ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_SM_INFO:
-                       if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-                               ret = IB_MAD_RESULT_SUCCESS |
-                                       IB_MAD_RESULT_CONSUMED;
-                               goto bail;
-                       }
-                       if (dev->port_cap_flags & IB_PORT_SM) {
-                               ret = IB_MAD_RESULT_SUCCESS;
-                               goto bail;
-                       }
-                       /* FALLTHROUGH */
-               default:
-                       smp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply(smp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_TRAP:
-       case IB_MGMT_METHOD_REPORT:
-       case IB_MGMT_METHOD_REPORT_RESP:
-       case IB_MGMT_METHOD_TRAP_REPRESS:
-       case IB_MGMT_METHOD_GET_RESP:
-               /*
-                * The ib_mad module will call us to process responses
-                * before checking for other consumers.
-                * Just tell the caller to process it normally.
-                */
-               ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
-       default:
-               smp->status |= IB_SMP_UNSUP_METHOD;
-               ret = reply(smp);
-       }
-
-bail:
-       return ret;
-}
-
-static int process_perf(struct ib_device *ibdev, u8 port_num,
-                       const struct ib_mad *in_mad,
-                       struct ib_mad *out_mad)
-{
-       struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
-       int ret;
-
-       *out_mad = *in_mad;
-       if (pmp->mad_hdr.class_version != 1) {
-               pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
-               ret = reply((struct ib_smp *) pmp);
-               goto bail;
-       }
-
-       switch (pmp->mad_hdr.method) {
-       case IB_MGMT_METHOD_GET:
-               switch (pmp->mad_hdr.attr_id) {
-               case IB_PMA_CLASS_PORT_INFO:
-                       ret = recv_pma_get_classportinfo(pmp);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_CONTROL:
-                       ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
-                                                             port_num);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_RESULT:
-                       ret = recv_pma_get_portsamplesresult(pmp, ibdev);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_RESULT_EXT:
-                       ret = recv_pma_get_portsamplesresult_ext(pmp,
-                                                                ibdev);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS:
-                       ret = recv_pma_get_portcounters(pmp, ibdev,
-                                                       port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS_EXT:
-                       ret = recv_pma_get_portcounters_ext(pmp, ibdev,
-                                                           port_num);
-                       goto bail;
-               default:
-                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) pmp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_SET:
-               switch (pmp->mad_hdr.attr_id) {
-               case IB_PMA_PORT_SAMPLES_CONTROL:
-                       ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
-                                                             port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS:
-                       ret = recv_pma_set_portcounters(pmp, ibdev,
-                                                       port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS_EXT:
-                       ret = recv_pma_set_portcounters_ext(pmp, ibdev,
-                                                           port_num);
-                       goto bail;
-               default:
-                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) pmp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_GET_RESP:
-               /*
-                * The ib_mad module will call us to process responses
-                * before checking for other consumers.
-                * Just tell the caller to process it normally.
-                */
-               ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
-       default:
-               pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
-               ret = reply((struct ib_smp *) pmp);
-       }
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_process_mad - process an incoming MAD packet
- * @ibdev: the infiniband device this packet came in on
- * @mad_flags: MAD flags
- * @port_num: the port number this packet came in on
- * @in_wc: the work completion entry for this packet
- * @in_grh: the global route header for this packet
- * @in_mad: the incoming MAD
- * @out_mad: any outgoing MAD reply
- *
- * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
- * interested in processing.
- *
- * Note that the verbs framework has already done the MAD sanity checks,
- * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
- * MADs.
- *
- * This is called by the ib_mad module.
- */
-int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                     const struct ib_mad_hdr *in, size_t in_mad_size,
-                     struct ib_mad_hdr *out, size_t *out_mad_size,
-                     u16 *out_mad_pkey_index)
-{
-       int ret;
-       const struct ib_mad *in_mad = (const struct ib_mad *)in;
-       struct ib_mad *out_mad = (struct ib_mad *)out;
-
-       if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-                        *out_mad_size != sizeof(*out_mad)))
-               return IB_MAD_RESULT_FAILURE;
-
-       switch (in_mad->mad_hdr.mgmt_class) {
-       case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
-       case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-               ret = process_subn(ibdev, mad_flags, port_num,
-                                  in_mad, out_mad);
-               goto bail;
-       case IB_MGMT_CLASS_PERF_MGMT:
-               ret = process_perf(ibdev, port_num, in_mad, out_mad);
-               goto bail;
-       default:
-               ret = IB_MAD_RESULT_SUCCESS;
-       }
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mmap.c b/drivers/staging/rdma/ipath/ipath_mmap.c
deleted file mode 100644 (file)
index e732742..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct ipath_mmap_info
- */
-void ipath_release_mmap_info(struct kref *ref)
-{
-       struct ipath_mmap_info *ip =
-               container_of(ref, struct ipath_mmap_info, ref);
-       struct ipath_ibdev *dev = to_idev(ip->context->device);
-
-       spin_lock_irq(&dev->pending_lock);
-       list_del(&ip->pending_mmaps);
-       spin_unlock_irq(&dev->pending_lock);
-
-       vfree(ip->obj);
-       kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void ipath_vma_open(struct vm_area_struct *vma)
-{
-       struct ipath_mmap_info *ip = vma->vm_private_data;
-
-       kref_get(&ip->ref);
-}
-
-static void ipath_vma_close(struct vm_area_struct *vma)
-{
-       struct ipath_mmap_info *ip = vma->vm_private_data;
-
-       kref_put(&ip->ref, ipath_release_mmap_info);
-}
-
-static const struct vm_operations_struct ipath_vm_ops = {
-       .open =     ipath_vma_open,
-       .close =    ipath_vma_close,
-};
-
-/**
- * ipath_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       struct ipath_ibdev *dev = to_idev(context->device);
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-       unsigned long size = vma->vm_end - vma->vm_start;
-       struct ipath_mmap_info *ip, *pp;
-       int ret = -EINVAL;
-
-       /*
-        * Search the device's list of objects waiting for a mmap call.
-        * Normally, this list is very short since a call to create a
-        * CQ, QP, or SRQ is soon followed by a call to mmap().
-        */
-       spin_lock_irq(&dev->pending_lock);
-       list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-                                pending_mmaps) {
-               /* Only the creator is allowed to mmap the object */
-               if (context != ip->context || (__u64) offset != ip->offset)
-                       continue;
-               /* Don't allow a mmap larger than the object. */
-               if (size > ip->size)
-                       break;
-
-               list_del_init(&ip->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-
-               ret = remap_vmalloc_range(vma, ip->obj, 0);
-               if (ret)
-                       goto done;
-               vma->vm_ops = &ipath_vm_ops;
-               vma->vm_private_data = ip;
-               ipath_vma_open(vma);
-               goto done;
-       }
-       spin_unlock_irq(&dev->pending_lock);
-done:
-       return ret;
-}
-
-/*
- * Allocate information for ipath_mmap
- */
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-                                              u32 size,
-                                              struct ib_ucontext *context,
-                                              void *obj) {
-       struct ipath_mmap_info *ip;
-
-       ip = kmalloc(sizeof *ip, GFP_KERNEL);
-       if (!ip)
-               goto bail;
-
-       size = PAGE_ALIGN(size);
-
-       spin_lock_irq(&dev->mmap_offset_lock);
-       if (dev->mmap_offset == 0)
-               dev->mmap_offset = PAGE_SIZE;
-       ip->offset = dev->mmap_offset;
-       dev->mmap_offset += size;
-       spin_unlock_irq(&dev->mmap_offset_lock);
-
-       INIT_LIST_HEAD(&ip->pending_mmaps);
-       ip->size = size;
-       ip->context = context;
-       ip->obj = obj;
-       kref_init(&ip->ref);
-
-bail:
-       return ip;
-}
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-                           struct ipath_mmap_info *ip,
-                           u32 size, void *obj) {
-       size = PAGE_ALIGN(size);
-
-       spin_lock_irq(&dev->mmap_offset_lock);
-       if (dev->mmap_offset == 0)
-               dev->mmap_offset = PAGE_SIZE;
-       ip->offset = dev->mmap_offset;
-       dev->mmap_offset += size;
-       spin_unlock_irq(&dev->mmap_offset_lock);
-
-       ip->size = size;
-       ip->obj = obj;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
deleted file mode 100644 (file)
index c7278f6..0000000
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/slab.h>
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-
-/* Fast memory region */
-struct ipath_fmr {
-       struct ib_fmr ibfmr;
-       u8 page_shift;
-       struct ipath_mregion mr;        /* must be last */
-};
-
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-       return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
-/**
- * ipath_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see ipath_dma.c).
- */
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ipath_mr *mr;
-       struct ib_mr *ret;
-
-       mr = kzalloc(sizeof *mr, GFP_KERNEL);
-       if (!mr) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       mr->mr.access_flags = acc;
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-static struct ipath_mr *alloc_mr(int count,
-                                struct ipath_lkey_table *lk_table)
-{
-       struct ipath_mr *mr;
-       int m, i = 0;
-
-       /* Allocate struct plus pointers to first level page tables. */
-       m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-       mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
-       if (!mr)
-               goto done;
-
-       /* Allocate first level page tables. */
-       for (; i < m; i++) {
-               mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
-               if (!mr->mr.map[i])
-                       goto bail;
-       }
-       mr->mr.mapsz = m;
-
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
-       if (!ipath_alloc_lkey(lk_table, &mr->mr))
-               goto bail;
-       mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
-
-       goto done;
-
-bail:
-       while (i) {
-               i--;
-               kfree(mr->mr.map[i]);
-       }
-       kfree(mr);
-       mr = NULL;
-
-done:
-       return mr;
-}
-
-/**
- * ipath_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
-                               struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct ipath_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
-       if (mr == NULL) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       mr->mr.pd = pd;
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.length = 0;
-       mr->mr.offset = 0;
-       mr->mr.access_flags = acc;
-       mr->mr.max_segs = num_phys_buf;
-       mr->umem = NULL;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the InfiniPath driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                               u64 virt_addr, int mr_access_flags,
-                               struct ib_udata *udata)
-{
-       struct ipath_mr *mr;
-       struct ib_umem *umem;
-       int n, m, entry;
-       struct scatterlist *sg;
-       struct ib_mr *ret;
-
-       if (length == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       umem = ib_umem_get(pd->uobject->context, start, length,
-                          mr_access_flags, 0);
-       if (IS_ERR(umem))
-               return (void *) umem;
-
-       n = umem->nmap;
-       mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
-       if (!mr) {
-               ret = ERR_PTR(-ENOMEM);
-               ib_umem_release(umem);
-               goto bail;
-       }
-
-       mr->mr.pd = pd;
-       mr->mr.user_base = start;
-       mr->mr.iova = virt_addr;
-       mr->mr.length = length;
-       mr->mr.offset = ib_umem_offset(umem);
-       mr->mr.access_flags = mr_access_flags;
-       mr->mr.max_segs = n;
-       mr->umem = umem;
-
-       m = 0;
-       n = 0;
-       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-               void *vaddr;
-
-               vaddr = page_address(sg_page(sg));
-               if (!vaddr) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-               mr->mr.map[m]->segs[n].vaddr = vaddr;
-               mr->mr.map[m]->segs[n].length = umem->page_size;
-               n++;
-               if (n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by ipath_get_dma_mr()
- * or ipath_reg_user_mr().
- */
-int ipath_dereg_mr(struct ib_mr *ibmr)
-{
-       struct ipath_mr *mr = to_imr(ibmr);
-       int i;
-
-       ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
-       i = mr->mr.mapsz;
-       while (i) {
-               i--;
-               kfree(mr->mr.map[i]);
-       }
-
-       if (mr->umem)
-               ib_umem_release(mr->umem);
-
-       kfree(mr);
-       return 0;
-}
-
-/**
- * ipath_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-                              struct ib_fmr_attr *fmr_attr)
-{
-       struct ipath_fmr *fmr;
-       int m, i = 0;
-       struct ib_fmr *ret;
-
-       /* Allocate struct plus pointers to first level page tables. */
-       m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-       fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
-       if (!fmr)
-               goto bail;
-
-       /* Allocate first level page tables. */
-       for (; i < m; i++) {
-               fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
-                                        GFP_KERNEL);
-               if (!fmr->mr.map[i])
-                       goto bail;
-       }
-       fmr->mr.mapsz = m;
-
-       /*
-        * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-        * rkey.
-        */
-       if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
-               goto bail;
-       fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
-       /*
-        * Resources are allocated but no valid mapping (RKEY can't be
-        * used).
-        */
-       fmr->mr.pd = pd;
-       fmr->mr.user_base = 0;
-       fmr->mr.iova = 0;
-       fmr->mr.length = 0;
-       fmr->mr.offset = 0;
-       fmr->mr.access_flags = mr_access_flags;
-       fmr->mr.max_segs = fmr_attr->max_pages;
-       fmr->page_shift = fmr_attr->page_shift;
-
-       ret = &fmr->ibfmr;
-       goto done;
-
-bail:
-       while (i)
-               kfree(fmr->mr.map[--i]);
-       kfree(fmr);
-       ret = ERR_PTR(-ENOMEM);
-
-done:
-       return ret;
-}
-
-/**
- * ipath_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-                      int list_len, u64 iova)
-{
-       struct ipath_fmr *fmr = to_ifmr(ibfmr);
-       struct ipath_lkey_table *rkt;
-       unsigned long flags;
-       int m, n, i;
-       u32 ps;
-       int ret;
-
-       if (list_len > fmr->mr.max_segs) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       rkt = &to_idev(ibfmr->device)->lk_table;
-       spin_lock_irqsave(&rkt->lock, flags);
-       fmr->mr.user_base = iova;
-       fmr->mr.iova = iova;
-       ps = 1 << fmr->page_shift;
-       fmr->mr.length = list_len * ps;
-       m = 0;
-       n = 0;
-       ps = 1 << fmr->page_shift;
-       for (i = 0; i < list_len; i++) {
-               fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-               fmr->mr.map[m]->segs[n].length = ps;
-               if (++n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       spin_unlock_irqrestore(&rkt->lock, flags);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int ipath_unmap_fmr(struct list_head *fmr_list)
-{
-       struct ipath_fmr *fmr;
-       struct ipath_lkey_table *rkt;
-       unsigned long flags;
-
-       list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-               rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-               spin_lock_irqsave(&rkt->lock, flags);
-               fmr->mr.user_base = 0;
-               fmr->mr.iova = 0;
-               fmr->mr.length = 0;
-               spin_unlock_irqrestore(&rkt->lock, flags);
-       }
-       return 0;
-}
-
-/**
- * ipath_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-       struct ipath_fmr *fmr = to_ifmr(ibfmr);
-       int i;
-
-       ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
-       i = fmr->mr.mapsz;
-       while (i)
-               kfree(fmr->mr.map[--i]);
-       kfree(fmr);
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_qp.c b/drivers/staging/rdma/ipath/ipath_qp.c
deleted file mode 100644 (file)
index 280cd2d..0000000
+++ /dev/null
@@ -1,1079 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-#define BITS_PER_PAGE          (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK     (BITS_PER_PAGE-1)
-#define mk_qpn(qpt, map, off)  (((map) - (qpt)->map) * BITS_PER_PAGE + \
-                                (off))
-#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
-                                                     BITS_PER_PAGE, off)
-
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static u32 credit_table[31] = {
-       0,                      /* 0 */
-       1,                      /* 1 */
-       2,                      /* 2 */
-       3,                      /* 3 */
-       4,                      /* 4 */
-       6,                      /* 5 */
-       8,                      /* 6 */
-       12,                     /* 7 */
-       16,                     /* 8 */
-       24,                     /* 9 */
-       32,                     /* A */
-       48,                     /* B */
-       64,                     /* C */
-       96,                     /* D */
-       128,                    /* E */
-       192,                    /* F */
-       256,                    /* 10 */
-       384,                    /* 11 */
-       512,                    /* 12 */
-       768,                    /* 13 */
-       1024,                   /* 14 */
-       1536,                   /* 15 */
-       2048,                   /* 16 */
-       3072,                   /* 17 */
-       4096,                   /* 18 */
-       6144,                   /* 19 */
-       8192,                   /* 1A */
-       12288,                  /* 1B */
-       16384,                  /* 1C */
-       24576,                  /* 1D */
-       32768                   /* 1E */
-};
-
-
-static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
-{
-       unsigned long page = get_zeroed_page(GFP_KERNEL);
-       unsigned long flags;
-
-       /*
-        * Free the page if someone raced with us installing it.
-        */
-
-       spin_lock_irqsave(&qpt->lock, flags);
-       if (map->page)
-               free_page(page);
-       else
-               map->page = (void *)page;
-       spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-
-static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
-{
-       u32 i, offset, max_scan, qpn;
-       struct qpn_map *map;
-       u32 ret = -1;
-
-       if (type == IB_QPT_SMI)
-               ret = 0;
-       else if (type == IB_QPT_GSI)
-               ret = 1;
-
-       if (ret != -1) {
-               map = &qpt->map[0];
-               if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
-                       if (unlikely(!map->page)) {
-                               ret = -ENOMEM;
-                               goto bail;
-                       }
-               }
-               if (!test_and_set_bit(ret, map->page))
-                       atomic_dec(&map->n_free);
-               else
-                       ret = -EBUSY;
-               goto bail;
-       }
-
-       qpn = qpt->last + 1;
-       if (qpn >= QPN_MAX)
-               qpn = 2;
-       offset = qpn & BITS_PER_PAGE_MASK;
-       map = &qpt->map[qpn / BITS_PER_PAGE];
-       max_scan = qpt->nmaps - !offset;
-       for (i = 0;;) {
-               if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
-                       if (unlikely(!map->page))
-                               break;
-               }
-               if (likely(atomic_read(&map->n_free))) {
-                       do {
-                               if (!test_and_set_bit(offset, map->page)) {
-                                       atomic_dec(&map->n_free);
-                                       qpt->last = qpn;
-                                       ret = qpn;
-                                       goto bail;
-                               }
-                               offset = find_next_offset(map, offset);
-                               qpn = mk_qpn(qpt, map, offset);
-                               /*
-                                * This test differs from alloc_pidmap().
-                                * If find_next_offset() does find a zero
-                                * bit, we don't need to check for QPN
-                                * wrapping around past our starting QPN.
-                                * We just need to be sure we don't loop
-                                * forever.
-                                */
-                       } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-               }
-               /*
-                * In order to keep the number of pages allocated to a
-                * minimum, we scan the all existing pages before increasing
-                * the size of the bitmap table.
-                */
-               if (++i > max_scan) {
-                       if (qpt->nmaps == QPNMAP_ENTRIES)
-                               break;
-                       map = &qpt->map[qpt->nmaps++];
-                       offset = 0;
-               } else if (map < &qpt->map[qpt->nmaps]) {
-                       ++map;
-                       offset = 0;
-               } else {
-                       map = &qpt->map[0];
-                       offset = 2;
-               }
-               qpn = mk_qpn(qpt, map, offset);
-       }
-
-       ret = -ENOMEM;
-
-bail:
-       return ret;
-}
-
-static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-       struct qpn_map *map;
-
-       map = qpt->map + qpn / BITS_PER_PAGE;
-       if (map->page)
-               clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-       atomic_inc(&map->n_free);
-}
-
-/**
- * ipath_alloc_qpn - allocate a QP number
- * @qpt: the QP table
- * @qp: the QP
- * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
- *
- * Allocate the next available QPN and put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
-                          enum ib_qp_type type)
-{
-       unsigned long flags;
-       int ret;
-
-       ret = alloc_qpn(qpt, type);
-       if (ret < 0)
-               goto bail;
-       qp->ibqp.qp_num = ret;
-
-       /* Add the QP to the hash table. */
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       ret %= qpt->max;
-       qp->next = qpt->table[ret];
-       qpt->table[ret] = qp;
-       atomic_inc(&qp->refcount);
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_free_qp - remove a QP from the QP table
- * @qpt: the QP table
- * @qp: the QP to remove
- *
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
-{
-       struct ipath_qp *q, **qpp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       /* Remove QP from the hash table. */
-       qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
-       for (; (q = *qpp) != NULL; qpp = &q->next) {
-               if (q == qp) {
-                       *qpp = qp->next;
-                       qp->next = NULL;
-                       atomic_dec(&qp->refcount);
-                       break;
-               }
-       }
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-/**
- * ipath_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
-{
-       unsigned long flags;
-       struct ipath_qp *qp;
-       u32 n, qp_inuse = 0;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-       for (n = 0; n < qpt->max; n++) {
-               qp = qpt->table[n];
-               qpt->table[n] = NULL;
-
-               for (; qp; qp = qp->next)
-                       qp_inuse++;
-       }
-       spin_unlock_irqrestore(&qpt->lock, flags);
-
-       for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
-               if (qpt->map[n].page)
-                       free_page((unsigned long) qpt->map[n].page);
-       return qp_inuse;
-}
-
-/**
- * ipath_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-       unsigned long flags;
-       struct ipath_qp *qp;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
-               if (qp->ibqp.qp_num == qpn) {
-                       atomic_inc(&qp->refcount);
-                       break;
-               }
-       }
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-       return qp;
-}
-
-/**
- * ipath_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
-{
-       qp->remote_qpn = 0;
-       qp->qkey = 0;
-       qp->qp_access_flags = 0;
-       atomic_set(&qp->s_dma_busy, 0);
-       qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
-       qp->s_hdrwords = 0;
-       qp->s_wqe = NULL;
-       qp->s_pkt_delay = 0;
-       qp->s_draining = 0;
-       qp->s_psn = 0;
-       qp->r_psn = 0;
-       qp->r_msn = 0;
-       if (type == IB_QPT_RC) {
-               qp->s_state = IB_OPCODE_RC_SEND_LAST;
-               qp->r_state = IB_OPCODE_RC_SEND_LAST;
-       } else {
-               qp->s_state = IB_OPCODE_UC_SEND_LAST;
-               qp->r_state = IB_OPCODE_UC_SEND_LAST;
-       }
-       qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-       qp->r_nak_state = 0;
-       qp->r_aflags = 0;
-       qp->r_flags = 0;
-       qp->s_rnr_timeout = 0;
-       qp->s_head = 0;
-       qp->s_tail = 0;
-       qp->s_cur = 0;
-       qp->s_last = 0;
-       qp->s_ssn = 1;
-       qp->s_lsn = 0;
-       memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-       qp->r_head_ack_queue = 0;
-       qp->s_tail_ack_queue = 0;
-       qp->s_num_rd_atomic = 0;
-       if (qp->r_rq.wq) {
-               qp->r_rq.wq->head = 0;
-               qp->r_rq.wq->tail = 0;
-       }
-}
-
-/**
- * ipath_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ib_wc wc;
-       int ret = 0;
-
-       if (qp->state == IB_QPS_ERR)
-               goto bail;
-
-       qp->state = IB_QPS_ERR;
-
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       if (!list_empty(&qp->piowait))
-               list_del_init(&qp->piowait);
-       spin_unlock(&dev->pending_lock);
-
-       /* Schedule the sending tasklet to drain the send work queue. */
-       if (qp->s_last != qp->s_head)
-               ipath_schedule_send(qp);
-
-       memset(&wc, 0, sizeof(wc));
-       wc.qp = &qp->ibqp;
-       wc.opcode = IB_WC_RECV;
-
-       if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
-               wc.wr_id = qp->r_wr_id;
-               wc.status = err;
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-       }
-       wc.status = IB_WC_WR_FLUSH_ERR;
-
-       if (qp->r_rq.wq) {
-               struct ipath_rwq *wq;
-               u32 head;
-               u32 tail;
-
-               spin_lock(&qp->r_rq.lock);
-
-               /* sanity check pointers before trusting them */
-               wq = qp->r_rq.wq;
-               head = wq->head;
-               if (head >= qp->r_rq.size)
-                       head = 0;
-               tail = wq->tail;
-               if (tail >= qp->r_rq.size)
-                       tail = 0;
-               while (tail != head) {
-                       wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-                       if (++tail >= qp->r_rq.size)
-                               tail = 0;
-                       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-               }
-               wq->tail = tail;
-
-               spin_unlock(&qp->r_rq.lock);
-       } else if (qp->ibqp.event_handler)
-               ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for ipathverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask, struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_qp *qp = to_iqp(ibqp);
-       enum ib_qp_state cur_state, new_state;
-       int lastwqe = 0;
-       int ret;
-
-       spin_lock_irq(&qp->s_lock);
-
-       cur_state = attr_mask & IB_QP_CUR_STATE ?
-               attr->cur_qp_state : qp->state;
-       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-       if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-                               attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-               goto inval;
-
-       if (attr_mask & IB_QP_AV) {
-               if (attr->ah_attr.dlid == 0 ||
-                   attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
-                       goto inval;
-
-               if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
-                   (attr->ah_attr.grh.sgid_index > 1))
-                       goto inval;
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX)
-               if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
-                       goto inval;
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER)
-               if (attr->min_rnr_timer > 31)
-                       goto inval;
-
-       if (attr_mask & IB_QP_PORT)
-               if (attr->port_num == 0 ||
-                   attr->port_num > ibqp->device->phys_port_cnt)
-                       goto inval;
-
-       /*
-        * don't allow invalid Path MTU values or greater than 2048
-        * unless we are configured for a 4KB MTU
-        */
-       if ((attr_mask & IB_QP_PATH_MTU) &&
-               (ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
-               (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
-               goto inval;
-
-       if (attr_mask & IB_QP_PATH_MIG_STATE)
-               if (attr->path_mig_state != IB_MIG_MIGRATED &&
-                   attr->path_mig_state != IB_MIG_REARM)
-                       goto inval;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
-                       goto inval;
-
-       switch (new_state) {
-       case IB_QPS_RESET:
-               if (qp->state != IB_QPS_RESET) {
-                       qp->state = IB_QPS_RESET;
-                       spin_lock(&dev->pending_lock);
-                       if (!list_empty(&qp->timerwait))
-                               list_del_init(&qp->timerwait);
-                       if (!list_empty(&qp->piowait))
-                               list_del_init(&qp->piowait);
-                       spin_unlock(&dev->pending_lock);
-                       qp->s_flags &= ~IPATH_S_ANY_WAIT;
-                       spin_unlock_irq(&qp->s_lock);
-                       /* Stop the sending tasklet */
-                       tasklet_kill(&qp->s_task);
-                       wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-                       spin_lock_irq(&qp->s_lock);
-               }
-               ipath_reset_qp(qp, ibqp->qp_type);
-               break;
-
-       case IB_QPS_SQD:
-               qp->s_draining = qp->s_last != qp->s_cur;
-               qp->state = new_state;
-               break;
-
-       case IB_QPS_SQE:
-               if (qp->ibqp.qp_type == IB_QPT_RC)
-                       goto inval;
-               qp->state = new_state;
-               break;
-
-       case IB_QPS_ERR:
-               lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-               break;
-
-       default:
-               qp->state = new_state;
-               break;
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX)
-               qp->s_pkey_index = attr->pkey_index;
-
-       if (attr_mask & IB_QP_DEST_QPN)
-               qp->remote_qpn = attr->dest_qp_num;
-
-       if (attr_mask & IB_QP_SQ_PSN) {
-               qp->s_psn = qp->s_next_psn = attr->sq_psn;
-               qp->s_last_psn = qp->s_next_psn - 1;
-       }
-
-       if (attr_mask & IB_QP_RQ_PSN)
-               qp->r_psn = attr->rq_psn;
-
-       if (attr_mask & IB_QP_ACCESS_FLAGS)
-               qp->qp_access_flags = attr->qp_access_flags;
-
-       if (attr_mask & IB_QP_AV) {
-               qp->remote_ah_attr = attr->ah_attr;
-               qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
-       }
-
-       if (attr_mask & IB_QP_PATH_MTU)
-               qp->path_mtu = attr->path_mtu;
-
-       if (attr_mask & IB_QP_RETRY_CNT)
-               qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
-
-       if (attr_mask & IB_QP_RNR_RETRY) {
-               qp->s_rnr_retry = attr->rnr_retry;
-               if (qp->s_rnr_retry > 7)
-                       qp->s_rnr_retry = 7;
-               qp->s_rnr_retry_cnt = qp->s_rnr_retry;
-       }
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER)
-               qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-       if (attr_mask & IB_QP_TIMEOUT)
-               qp->timeout = attr->timeout;
-
-       if (attr_mask & IB_QP_QKEY)
-               qp->qkey = attr->qkey;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-               qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-       spin_unlock_irq(&qp->s_lock);
-
-       if (lastwqe) {
-               struct ib_event ev;
-
-               ev.device = qp->ibqp.device;
-               ev.element.qp = &qp->ibqp;
-               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-       }
-       ret = 0;
-       goto bail;
-
-inval:
-       spin_unlock_irq(&qp->s_lock);
-       ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                  int attr_mask, struct ib_qp_init_attr *init_attr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-
-       attr->qp_state = qp->state;
-       attr->cur_qp_state = attr->qp_state;
-       attr->path_mtu = qp->path_mtu;
-       attr->path_mig_state = 0;
-       attr->qkey = qp->qkey;
-       attr->rq_psn = qp->r_psn;
-       attr->sq_psn = qp->s_next_psn;
-       attr->dest_qp_num = qp->remote_qpn;
-       attr->qp_access_flags = qp->qp_access_flags;
-       attr->cap.max_send_wr = qp->s_size - 1;
-       attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-       attr->cap.max_send_sge = qp->s_max_sge;
-       attr->cap.max_recv_sge = qp->r_rq.max_sge;
-       attr->cap.max_inline_data = 0;
-       attr->ah_attr = qp->remote_ah_attr;
-       memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
-       attr->pkey_index = qp->s_pkey_index;
-       attr->alt_pkey_index = 0;
-       attr->en_sqd_async_notify = 0;
-       attr->sq_draining = qp->s_draining;
-       attr->max_rd_atomic = qp->s_max_rd_atomic;
-       attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-       attr->min_rnr_timer = qp->r_min_rnr_timer;
-       attr->port_num = 1;
-       attr->timeout = qp->timeout;
-       attr->retry_cnt = qp->s_retry_cnt;
-       attr->rnr_retry = qp->s_rnr_retry_cnt;
-       attr->alt_port_num = 0;
-       attr->alt_timeout = 0;
-
-       init_attr->event_handler = qp->ibqp.event_handler;
-       init_attr->qp_context = qp->ibqp.qp_context;
-       init_attr->send_cq = qp->ibqp.send_cq;
-       init_attr->recv_cq = qp->ibqp.recv_cq;
-       init_attr->srq = qp->ibqp.srq;
-       init_attr->cap = attr->cap;
-       if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
-               init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-       else
-               init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-       init_attr->qp_type = qp->ibqp.qp_type;
-       init_attr->port_num = 1;
-       return 0;
-}
-
-/**
- * ipath_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 ipath_compute_aeth(struct ipath_qp *qp)
-{
-       u32 aeth = qp->r_msn & IPATH_MSN_MASK;
-
-       if (qp->ibqp.srq) {
-               /*
-                * Shared receive queues don't generate credits.
-                * Set the credit field to the invalid value.
-                */
-               aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
-       } else {
-               u32 min, max, x;
-               u32 credits;
-               struct ipath_rwq *wq = qp->r_rq.wq;
-               u32 head;
-               u32 tail;
-
-               /* sanity check pointers before trusting them */
-               head = wq->head;
-               if (head >= qp->r_rq.size)
-                       head = 0;
-               tail = wq->tail;
-               if (tail >= qp->r_rq.size)
-                       tail = 0;
-               /*
-                * Compute the number of credits available (RWQEs).
-                * XXX Not holding the r_rq.lock here so there is a small
-                * chance that the pair of reads are not atomic.
-                */
-               credits = head - tail;
-               if ((int)credits < 0)
-                       credits += qp->r_rq.size;
-               /*
-                * Binary search the credit table to find the code to
-                * use.
-                */
-               min = 0;
-               max = 31;
-               for (;;) {
-                       x = (min + max) / 2;
-                       if (credit_table[x] == credits)
-                               break;
-                       if (credit_table[x] > credits)
-                               max = x;
-                       else if (min == x)
-                               break;
-                       else
-                               min = x;
-               }
-               aeth |= x << IPATH_AETH_CREDIT_SHIFT;
-       }
-       return cpu_to_be32(aeth);
-}
-
-/**
- * ipath_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: unused by InfiniPath
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-                             struct ib_qp_init_attr *init_attr,
-                             struct ib_udata *udata)
-{
-       struct ipath_qp *qp;
-       int err;
-       struct ipath_swqe *swq = NULL;
-       struct ipath_ibdev *dev;
-       size_t sz;
-       size_t sg_list_sz;
-       struct ib_qp *ret;
-
-       if (init_attr->create_flags) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
-           init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       /* Check receive queue parameters if no SRQ is specified. */
-       if (!init_attr->srq) {
-               if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
-                   init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-               if (init_attr->cap.max_send_sge +
-                   init_attr->cap.max_send_wr +
-                   init_attr->cap.max_recv_sge +
-                   init_attr->cap.max_recv_wr == 0) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-       }
-
-       switch (init_attr->qp_type) {
-       case IB_QPT_UC:
-       case IB_QPT_RC:
-       case IB_QPT_UD:
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               sz = sizeof(struct ipath_sge) *
-                       init_attr->cap.max_send_sge +
-                       sizeof(struct ipath_swqe);
-               swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-               if (swq == NULL) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail;
-               }
-               sz = sizeof(*qp);
-               sg_list_sz = 0;
-               if (init_attr->srq) {
-                       struct ipath_srq *srq = to_isrq(init_attr->srq);
-
-                       if (srq->rq.max_sge > 1)
-                               sg_list_sz = sizeof(*qp->r_sg_list) *
-                                       (srq->rq.max_sge - 1);
-               } else if (init_attr->cap.max_recv_sge > 1)
-                       sg_list_sz = sizeof(*qp->r_sg_list) *
-                               (init_attr->cap.max_recv_sge - 1);
-               qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
-               if (!qp) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_swq;
-               }
-               if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
-                   init_attr->qp_type == IB_QPT_SMI ||
-                   init_attr->qp_type == IB_QPT_GSI)) {
-                       qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
-                       if (!qp->r_ud_sg_list) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_qp;
-                       }
-               } else
-                       qp->r_ud_sg_list = NULL;
-               if (init_attr->srq) {
-                       sz = 0;
-                       qp->r_rq.size = 0;
-                       qp->r_rq.max_sge = 0;
-                       qp->r_rq.wq = NULL;
-                       init_attr->cap.max_recv_wr = 0;
-                       init_attr->cap.max_recv_sge = 0;
-               } else {
-                       qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-                       qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-                       sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-                               sizeof(struct ipath_rwqe);
-                       qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
-                                             qp->r_rq.size * sz);
-                       if (!qp->r_rq.wq) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_sg_list;
-                       }
-               }
-
-               /*
-                * ib_create_qp() will initialize qp->ibqp
-                * except for qp->ibqp.qp_num.
-                */
-               spin_lock_init(&qp->s_lock);
-               spin_lock_init(&qp->r_rq.lock);
-               atomic_set(&qp->refcount, 0);
-               init_waitqueue_head(&qp->wait);
-               init_waitqueue_head(&qp->wait_dma);
-               tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
-               INIT_LIST_HEAD(&qp->piowait);
-               INIT_LIST_HEAD(&qp->timerwait);
-               qp->state = IB_QPS_RESET;
-               qp->s_wq = swq;
-               qp->s_size = init_attr->cap.max_send_wr + 1;
-               qp->s_max_sge = init_attr->cap.max_send_sge;
-               if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-                       qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
-               else
-                       qp->s_flags = 0;
-               dev = to_idev(ibpd->device);
-               err = ipath_alloc_qpn(&dev->qp_table, qp,
-                                     init_attr->qp_type);
-               if (err) {
-                       ret = ERR_PTR(err);
-                       vfree(qp->r_rq.wq);
-                       goto bail_sg_list;
-               }
-               qp->ip = NULL;
-               qp->s_tx = NULL;
-               ipath_reset_qp(qp, init_attr->qp_type);
-               break;
-
-       default:
-               /* Don't support raw QPs */
-               ret = ERR_PTR(-ENOSYS);
-               goto bail;
-       }
-
-       init_attr->cap.max_inline_data = 0;
-
-       /*
-        * Return the address of the RWQ as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               if (!qp->r_rq.wq) {
-                       __u64 offset = 0;
-
-                       err = ib_copy_to_udata(udata, &offset,
-                                              sizeof(offset));
-                       if (err) {
-                               ret = ERR_PTR(err);
-                               goto bail_ip;
-                       }
-               } else {
-                       u32 s = sizeof(struct ipath_rwq) +
-                               qp->r_rq.size * sz;
-
-                       qp->ip =
-                           ipath_create_mmap_info(dev, s,
-                                                  ibpd->uobject->context,
-                                                  qp->r_rq.wq);
-                       if (!qp->ip) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_ip;
-                       }
-
-                       err = ib_copy_to_udata(udata, &(qp->ip->offset),
-                                              sizeof(qp->ip->offset));
-                       if (err) {
-                               ret = ERR_PTR(err);
-                               goto bail_ip;
-                       }
-               }
-       }
-
-       spin_lock(&dev->n_qps_lock);
-       if (dev->n_qps_allocated == ib_ipath_max_qps) {
-               spin_unlock(&dev->n_qps_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_qps_allocated++;
-       spin_unlock(&dev->n_qps_lock);
-
-       if (qp->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = &qp->ibqp;
-       goto bail;
-
-bail_ip:
-       if (qp->ip)
-               kref_put(&qp->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(qp->r_rq.wq);
-       ipath_free_qp(&dev->qp_table, qp);
-       free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-bail_sg_list:
-       kfree(qp->r_ud_sg_list);
-bail_qp:
-       kfree(qp);
-bail_swq:
-       vfree(swq);
-bail:
-       return ret;
-}
-
-/**
- * ipath_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int ipath_destroy_qp(struct ib_qp *ibqp)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-
-       /* Make sure HW and driver activity is stopped. */
-       spin_lock_irq(&qp->s_lock);
-       if (qp->state != IB_QPS_RESET) {
-               qp->state = IB_QPS_RESET;
-               spin_lock(&dev->pending_lock);
-               if (!list_empty(&qp->timerwait))
-                       list_del_init(&qp->timerwait);
-               if (!list_empty(&qp->piowait))
-                       list_del_init(&qp->piowait);
-               spin_unlock(&dev->pending_lock);
-               qp->s_flags &= ~IPATH_S_ANY_WAIT;
-               spin_unlock_irq(&qp->s_lock);
-               /* Stop the sending tasklet */
-               tasklet_kill(&qp->s_task);
-               wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-       } else
-               spin_unlock_irq(&qp->s_lock);
-
-       ipath_free_qp(&dev->qp_table, qp);
-
-       if (qp->s_tx) {
-               atomic_dec(&qp->refcount);
-               if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-                       kfree(qp->s_tx->txreq.map_addr);
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
-               spin_unlock_irq(&dev->pending_lock);
-               qp->s_tx = NULL;
-       }
-
-       wait_event(qp->wait, !atomic_read(&qp->refcount));
-
-       /* all user's cleaned up, mark it available */
-       free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-       spin_lock(&dev->n_qps_lock);
-       dev->n_qps_allocated--;
-       spin_unlock(&dev->n_qps_lock);
-
-       if (qp->ip)
-               kref_put(&qp->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(qp->r_rq.wq);
-       kfree(qp->r_ud_sg_list);
-       vfree(qp->s_wq);
-       kfree(qp);
-       return 0;
-}
-
-/**
- * ipath_init_qp_table - initialize the QP table for a device
- * @idev: the device who's QP table we're initializing
- * @size: the size of the QP table
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
-{
-       int i;
-       int ret;
-
-       idev->qp_table.last = 1;        /* QPN 0 and 1 are special. */
-       idev->qp_table.max = size;
-       idev->qp_table.nmaps = 1;
-       idev->qp_table.table = kcalloc(size, sizeof(*idev->qp_table.table),
-                                      GFP_KERNEL);
-       if (idev->qp_table.table == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
-               atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
-               idev->qp_table.map[i].page = NULL;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
-{
-       u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
-
-       /*
-        * If the credit is invalid, we can send
-        * as many packets as we like.  Otherwise, we have to
-        * honor the credit field.
-        */
-       if (credit == IPATH_AETH_CREDIT_INVAL)
-               qp->s_lsn = (u32) -1;
-       else if (qp->s_lsn != (u32) -1) {
-               /* Compute new LSN (i.e., MSN + credit) */
-               credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
-               if (ipath_cmp24(credit, qp->s_lsn) > 0)
-                       qp->s_lsn = credit;
-       }
-
-       /* Restart sending if it was blocked due to lack of credits. */
-       if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
-           qp->s_cur != qp->s_head &&
-           (qp->s_lsn == (u32) -1 ||
-            ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
-                        qp->s_lsn + 1) <= 0))
-               ipath_schedule_send(qp);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
deleted file mode 100644 (file)
index d4aa535..0000000
+++ /dev/null
@@ -1,1969 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
-
-static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
-                      u32 psn, u32 pmtu)
-{
-       u32 len;
-
-       len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-       ss->sge = wqe->sg_list[0];
-       ss->sg_list = wqe->sg_list + 1;
-       ss->num_sge = wqe->wr.num_sge;
-       ipath_skip_sge(ss, len);
-       return wqe->length - len;
-}
-
-/**
- * ipath_init_restart- initialize the qp->s_sge after a restart
- * @qp: the QP who's SGE we're restarting
- * @wqe: the work queue to initialize the QP's SGE from
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
-{
-       struct ipath_ibdev *dev;
-
-       qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
-                               ib_mtu_enum_to_int(qp->path_mtu));
-       dev = to_idev(qp->ibqp.device);
-       spin_lock(&dev->pending_lock);
-       if (list_empty(&qp->timerwait))
-               list_add_tail(&qp->timerwait,
-                             &dev->pending[dev->pending_index]);
-       spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
- * @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- *
- * Return 1 if constructed; otherwise, return 0.
- * Note that we are in the responder's side of the QP context.
- * Note the QP s_lock must be held.
- */
-static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                            struct ipath_other_headers *ohdr, u32 pmtu)
-{
-       struct ipath_ack_entry *e;
-       u32 hwords;
-       u32 len;
-       u32 bth0;
-       u32 bth2;
-
-       /* Don't send an ACK if we aren't supposed to. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto bail;
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-
-       switch (qp->s_ack_state) {
-       case OP(RDMA_READ_RESPONSE_LAST):
-       case OP(RDMA_READ_RESPONSE_ONLY):
-       case OP(ATOMIC_ACKNOWLEDGE):
-               /*
-                * We can increment the tail pointer now that the last
-                * response has been sent instead of only being
-                * constructed.
-                */
-               if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
-                       qp->s_tail_ack_queue = 0;
-               /* FALLTHROUGH */
-       case OP(SEND_ONLY):
-       case OP(ACKNOWLEDGE):
-               /* Check for no next entry in the queue. */
-               if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-                       if (qp->s_flags & IPATH_S_ACK_PENDING)
-                               goto normal;
-                       qp->s_ack_state = OP(ACKNOWLEDGE);
-                       goto bail;
-               }
-
-               e = &qp->s_ack_queue[qp->s_tail_ack_queue];
-               if (e->opcode == OP(RDMA_READ_REQUEST)) {
-                       /* Copy SGE state in case we need to resend */
-                       qp->s_ack_rdma_sge = e->rdma_sge;
-                       qp->s_cur_sge = &qp->s_ack_rdma_sge;
-                       len = e->rdma_sge.sge.sge_length;
-                       if (len > pmtu) {
-                               len = pmtu;
-                               qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-                       } else {
-                               qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-                               e->sent = 1;
-                       }
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-                       hwords++;
-                       qp->s_ack_rdma_psn = e->psn;
-                       bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-               } else {
-                       /* COMPARE_SWAP or FETCH_ADD */
-                       qp->s_cur_sge = NULL;
-                       len = 0;
-                       qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
-                       ohdr->u.at.aeth = ipath_compute_aeth(qp);
-                       ohdr->u.at.atomic_ack_eth[0] =
-                               cpu_to_be32(e->atomic_data >> 32);
-                       ohdr->u.at.atomic_ack_eth[1] =
-                               cpu_to_be32(e->atomic_data);
-                       hwords += sizeof(ohdr->u.at) / sizeof(u32);
-                       bth2 = e->psn;
-                       e->sent = 1;
-               }
-               bth0 = qp->s_ack_state << 24;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               len = qp->s_ack_rdma_sge.sge.sge_length;
-               if (len > pmtu)
-                       len = pmtu;
-               else {
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-                       hwords++;
-                       qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-                       qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
-               }
-               bth0 = qp->s_ack_state << 24;
-               bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-               break;
-
-       default:
-       normal:
-               /*
-                * Send a regular ACK.
-                * Set the s_ack_state so we wait until after sending
-                * the ACK before setting s_ack_state to ACKNOWLEDGE
-                * (see above).
-                */
-               qp->s_ack_state = OP(SEND_ONLY);
-               qp->s_flags &= ~IPATH_S_ACK_PENDING;
-               qp->s_cur_sge = NULL;
-               if (qp->s_nak_state)
-                       ohdr->u.aeth =
-                               cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-                                           (qp->s_nak_state <<
-                                            IPATH_AETH_CREDIT_SHIFT));
-               else
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-               hwords++;
-               len = 0;
-               bth0 = OP(ACKNOWLEDGE) << 24;
-               bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
-       }
-       qp->s_hdrwords = hwords;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
-       return 1;
-
-bail:
-       return 0;
-}
-
-/**
- * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_rc_req(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_other_headers *ohdr;
-       struct ipath_sge_state *ss;
-       struct ipath_swqe *wqe;
-       u32 hwords;
-       u32 len;
-       u32 bth0;
-       u32 bth2;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       char newreq;
-       unsigned long flags;
-       int ret = 0;
-
-       ohdr = &qp->s_hdr.u.oth;
-       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-               ohdr = &qp->s_hdr.u.l.oth;
-
-       /*
-        * The lock is needed to synchronize between the sending tasklet,
-        * the receive interrupt handler, and timeout resends.
-        */
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Sending responses has higher priority over sending requests. */
-       if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-            (qp->s_flags & IPATH_S_ACK_PENDING) ||
-            qp->s_ack_state != OP(ACKNOWLEDGE)) &&
-           ipath_make_rc_ack(dev, qp, ohdr, pmtu))
-               goto done;
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       /* Leave BUSY set until RNR timeout. */
-       if (qp->s_rnr_timeout) {
-               qp->s_flags |= IPATH_S_WAITING;
-               goto bail;
-       }
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-       bth0 = 1 << 22; /* Set M bit */
-
-       /* Send a request. */
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       switch (qp->s_state) {
-       default:
-               if (!(ib_ipath_state_ops[qp->state] &
-                   IPATH_PROCESS_NEXT_SEND_OK))
-                       goto bail;
-               /*
-                * Resend an old request or start a new one.
-                *
-                * We keep track of the current SWQE so that
-                * we don't reset the "furthest progress" state
-                * if we need to back up.
-                */
-               newreq = 0;
-               if (qp->s_cur == qp->s_tail) {
-                       /* Check if send work queue is empty. */
-                       if (qp->s_tail == qp->s_head)
-                               goto bail;
-                       /*
-                        * If a fence is requested, wait for previous
-                        * RDMA read and atomic operations to finish.
-                        */
-                       if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
-                           qp->s_num_rd_atomic) {
-                               qp->s_flags |= IPATH_S_FENCE_PENDING;
-                               goto bail;
-                       }
-                       wqe->psn = qp->s_next_psn;
-                       newreq = 1;
-               }
-               /*
-                * Note that we have to be careful not to modify the
-                * original work request since we may need to resend
-                * it.
-                */
-               len = wqe->length;
-               ss = &qp->s_sge;
-               bth2 = 0;
-               switch (wqe->wr.opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_IMM:
-                       /* If no credit, return. */
-                       if (qp->s_lsn != (u32) -1 &&
-                           ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-                               goto bail;
-                       }
-                       wqe->lpsn = wqe->psn;
-                       if (len > pmtu) {
-                               wqe->lpsn += (len - 1) / pmtu;
-                               qp->s_state = OP(SEND_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_SEND)
-                               qp->s_state = OP(SEND_ONLY);
-                       else {
-                               qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                       }
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-                       bth2 = 1 << 31; /* Request ACK. */
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_WRITE:
-                       if (newreq && qp->s_lsn != (u32) -1)
-                               qp->s_lsn++;
-                       /* FALLTHROUGH */
-               case IB_WR_RDMA_WRITE_WITH_IMM:
-                       /* If no credit, return. */
-                       if (qp->s_lsn != (u32) -1 &&
-                           ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-                               goto bail;
-                       }
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       hwords += sizeof(struct ib_reth) / sizeof(u32);
-                       wqe->lpsn = wqe->psn;
-                       if (len > pmtu) {
-                               wqe->lpsn += (len - 1) / pmtu;
-                               qp->s_state = OP(RDMA_WRITE_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                               qp->s_state = OP(RDMA_WRITE_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                                       bth0 |= 1 << 23;
-                       }
-                       bth2 = 1 << 31; /* Request ACK. */
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_READ:
-                       /*
-                        * Don't allow more operations to be started
-                        * than the QP limits allow.
-                        */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= IPATH_S_RDMAR_PENDING;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
-                               if (qp->s_lsn != (u32) -1)
-                                       qp->s_lsn++;
-                               /*
-                                * Adjust s_next_psn to count the
-                                * expected number of responses.
-                                */
-                               if (len > pmtu)
-                                       qp->s_next_psn += (len - 1) / pmtu;
-                               wqe->lpsn = qp->s_next_psn++;
-                       }
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       qp->s_state = OP(RDMA_READ_REQUEST);
-                       hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-                       ss = NULL;
-                       len = 0;
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_ATOMIC_CMP_AND_SWP:
-               case IB_WR_ATOMIC_FETCH_AND_ADD:
-                       /*
-                        * Don't allow more operations to be started
-                        * than the QP limits allow.
-                        */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= IPATH_S_RDMAR_PENDING;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
-                               if (qp->s_lsn != (u32) -1)
-                                       qp->s_lsn++;
-                               wqe->lpsn = wqe->psn;
-                       }
-                       if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-                               qp->s_state = OP(COMPARE_SWAP);
-                               ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->atomic_wr.swap);
-                               ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-                                       wqe->atomic_wr.compare_add);
-                       } else {
-                               qp->s_state = OP(FETCH_ADD);
-                               ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->atomic_wr.compare_add);
-                               ohdr->u.atomic_eth.compare_data = 0;
-                       }
-                       ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-                               wqe->atomic_wr.remote_addr >> 32);
-                       ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-                               wqe->atomic_wr.remote_addr);
-                       ohdr->u.atomic_eth.rkey = cpu_to_be32(
-                               wqe->atomic_wr.rkey);
-                       hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
-                       ss = NULL;
-                       len = 0;
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               default:
-                       goto bail;
-               }
-               qp->s_sge.sge = wqe->sg_list[0];
-               qp->s_sge.sg_list = wqe->sg_list + 1;
-               qp->s_sge.num_sge = wqe->wr.num_sge;
-               qp->s_len = wqe->length;
-               if (newreq) {
-                       qp->s_tail++;
-                       if (qp->s_tail >= qp->s_size)
-                               qp->s_tail = 0;
-               }
-               bth2 |= qp->s_psn & IPATH_PSN_MASK;
-               if (wqe->wr.opcode == IB_WR_RDMA_READ)
-                       qp->s_psn = wqe->lpsn + 1;
-               else {
-                       qp->s_psn++;
-                       if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                               qp->s_next_psn = qp->s_psn;
-               }
-               /*
-                * Put the QP on the pending list so lost ACKs will cause
-                * a retry.  More than one request can be pending so the
-                * QP may already be on the dev->pending list.
-                */
-               spin_lock(&dev->pending_lock);
-               if (list_empty(&qp->timerwait))
-                       list_add_tail(&qp->timerwait,
-                                     &dev->pending[dev->pending_index]);
-               spin_unlock(&dev->pending_lock);
-               break;
-
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               /*
-                * This case can only happen if a send is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               /* FALLTHROUGH */
-       case OP(SEND_FIRST):
-               qp->s_state = OP(SEND_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-               if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                       qp->s_next_psn = qp->s_psn;
-               ss = &qp->s_sge;
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_SEND)
-                       qp->s_state = OP(SEND_LAST);
-               else {
-                       qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-               }
-               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                       bth0 |= 1 << 23;
-               bth2 |= 1 << 31;        /* Request ACK. */
-               qp->s_cur++;
-               if (qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_LAST):
-               /*
-                * This case can only happen if a RDMA write is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_FIRST):
-               qp->s_state = OP(RDMA_WRITE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-               if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                       qp->s_next_psn = qp->s_psn;
-               ss = &qp->s_sge;
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                       qp->s_state = OP(RDMA_WRITE_LAST);
-               else {
-                       qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-               }
-               bth2 |= 1 << 31;        /* Request ACK. */
-               qp->s_cur++;
-               if (qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               /*
-                * This case can only happen if a RDMA read is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-               ohdr->u.rc.reth.vaddr =
-                       cpu_to_be64(wqe->rdma_wr.remote_addr + len);
-               ohdr->u.rc.reth.rkey =
-                       cpu_to_be32(wqe->rdma_wr.rkey);
-               ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
-               qp->s_state = OP(RDMA_READ_REQUEST);
-               hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-               bth2 = qp->s_psn & IPATH_PSN_MASK;
-               qp->s_psn = wqe->lpsn + 1;
-               ss = NULL;
-               len = 0;
-               qp->s_cur++;
-               if (qp->s_cur == qp->s_size)
-                       qp->s_cur = 0;
-               break;
-       }
-       if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
-               bth2 |= 1 << 31;        /* Request ACK. */
-       qp->s_len -= len;
-       qp->s_hdrwords = hwords;
-       qp->s_cur_sge = ss;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
- *
- * This is called from ipath_rc_rcv() and only uses the receive
- * side QP state.
- * Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
- */
-static void send_rc_ack(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_devdata *dd;
-       u16 lrh0;
-       u32 bth0;
-       u32 hwords;
-       u32 __iomem *piobuf;
-       struct ipath_ib_header hdr;
-       struct ipath_other_headers *ohdr;
-       unsigned long flags;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-       if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-           (qp->s_flags & IPATH_S_ACK_PENDING) ||
-           qp->s_ack_state != OP(ACKNOWLEDGE))
-               goto queue_ack;
-
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-       /* Don't try to send ACKs if the link isn't ACTIVE */
-       dd = dev->dd;
-       if (!(dd->ipath_flags & IPATH_LINKACTIVE))
-               goto done;
-
-       piobuf = ipath_getpiobuf(dd, 0, NULL);
-       if (!piobuf) {
-               /*
-                * We are out of PIO buffers at the moment.
-                * Pass responsibility for sending the ACK to the
-                * send tasklet so that when a PIO buffer becomes
-                * available, the ACK is sent ahead of other outgoing
-                * packets.
-                */
-               spin_lock_irqsave(&qp->s_lock, flags);
-               goto queue_ack;
-       }
-
-       /* Construct the header. */
-       ohdr = &hdr.u.oth;
-       lrh0 = IPATH_LRH_BTH;
-       /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
-       hwords = 6;
-       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-               hwords += ipath_make_grh(dev, &hdr.u.l.grh,
-                                        &qp->remote_ah_attr.grh,
-                                        hwords, 0);
-               ohdr = &hdr.u.l.oth;
-               lrh0 = IPATH_LRH_GRH;
-       }
-       /* read pkey_index w/o lock (its atomic) */
-       bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
-               (OP(ACKNOWLEDGE) << 24) | (1 << 22);
-       if (qp->r_nak_state)
-               ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-                                           (qp->r_nak_state <<
-                                            IPATH_AETH_CREDIT_SHIFT));
-       else
-               ohdr->u.aeth = ipath_compute_aeth(qp);
-       lrh0 |= qp->remote_ah_attr.sl << 4;
-       hdr.lrh[0] = cpu_to_be16(lrh0);
-       hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-       hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-       hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
-                                qp->remote_ah_attr.src_path_bits);
-       ohdr->bth[0] = cpu_to_be32(bth0);
-       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
-
-       writeq(hwords + 1, piobuf);
-
-       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-               u32 *hdrp = (u32 *) &hdr;
-
-               ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
-               ipath_flush_wc();
-               __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
-       } else
-               __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
-
-       ipath_flush_wc();
-
-       dev->n_unicast_xmit++;
-       goto done;
-
-queue_ack:
-       if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
-               dev->n_rc_qacks++;
-               qp->s_flags |= IPATH_S_ACK_PENDING;
-               qp->s_nak_state = qp->r_nak_state;
-               qp->s_ack_psn = qp->r_ack_psn;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-       }
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return;
-}
-
-/**
- * reset_psn - reset the QP state to send starting from PSN
- * @qp: the QP
- * @psn: the packet sequence number to restart at
- *
- * This is called from ipath_rc_rcv() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held.
- */
-static void reset_psn(struct ipath_qp *qp, u32 psn)
-{
-       u32 n = qp->s_last;
-       struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
-       u32 opcode;
-
-       qp->s_cur = n;
-
-       /*
-        * If we are starting the request from the beginning,
-        * let the normal send code handle initialization.
-        */
-       if (ipath_cmp24(psn, wqe->psn) <= 0) {
-               qp->s_state = OP(SEND_LAST);
-               goto done;
-       }
-
-       /* Find the work request opcode corresponding to the given PSN. */
-       opcode = wqe->wr.opcode;
-       for (;;) {
-               int diff;
-
-               if (++n == qp->s_size)
-                       n = 0;
-               if (n == qp->s_tail)
-                       break;
-               wqe = get_swqe_ptr(qp, n);
-               diff = ipath_cmp24(psn, wqe->psn);
-               if (diff < 0)
-                       break;
-               qp->s_cur = n;
-               /*
-                * If we are starting the request from the beginning,
-                * let the normal send code handle initialization.
-                */
-               if (diff == 0) {
-                       qp->s_state = OP(SEND_LAST);
-                       goto done;
-               }
-               opcode = wqe->wr.opcode;
-       }
-
-       /*
-        * Set the state to restart in the middle of a request.
-        * Don't change the s_sge, s_cur_sge, or s_cur_size.
-        * See ipath_make_rc_req().
-        */
-       switch (opcode) {
-       case IB_WR_SEND:
-       case IB_WR_SEND_WITH_IMM:
-               qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
-               break;
-
-       case IB_WR_RDMA_WRITE:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
-               break;
-
-       case IB_WR_RDMA_READ:
-               qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-               break;
-
-       default:
-               /*
-                * This case shouldn't happen since its only
-                * one PSN per req.
-                */
-               qp->s_state = OP(SEND_LAST);
-       }
-done:
-       qp->s_psn = psn;
-}
-
-/**
- * ipath_restart_rc - back up requester to resend the last un-ACKed request
- * @qp: the QP to restart
- * @psn: packet sequence number for the request
- * @wc: the work completion request
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
-{
-       struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-       struct ipath_ibdev *dev;
-
-       if (qp->s_retry == 0) {
-               ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-               ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-               goto bail;
-       }
-       qp->s_retry--;
-
-       /*
-        * Remove the QP from the timeout queue.
-        * Note: it may already have been removed by ipath_ib_timer().
-        */
-       dev = to_idev(qp->ibqp.device);
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       if (!list_empty(&qp->piowait))
-               list_del_init(&qp->piowait);
-       spin_unlock(&dev->pending_lock);
-
-       if (wqe->wr.opcode == IB_WR_RDMA_READ)
-               dev->n_rc_resends++;
-       else
-               dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-       reset_psn(qp, psn);
-       ipath_schedule_send(qp);
-
-bail:
-       return;
-}
-
-static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
-{
-       qp->s_last_psn = psn;
-}
-
-/**
- * do_rc_ack - process an incoming RC ACK
- * @qp: the QP the ACK came in on
- * @psn: the packet sequence number of the ACK
- * @opcode: the opcode of the request that resulted in the ACK
- *
- * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held and interrupts disabled.
- * Returns 1 if OK, 0 if current operation should be aborted (NAK).
- */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
-                    u64 val)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ib_wc wc;
-       enum ib_wc_status status;
-       struct ipath_swqe *wqe;
-       int ret = 0;
-       u32 ack_psn;
-       int diff;
-
-       /*
-        * Remove the QP from the timeout queue (or RNR timeout queue).
-        * If ipath_ib_timer() has already removed it,
-        * it's OK since we hold the QP s_lock and ipath_restart_rc()
-        * just won't find anything to restart if we ACK everything.
-        */
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       spin_unlock(&dev->pending_lock);
-
-       /*
-        * Note that NAKs implicitly ACK outstanding SEND and RDMA write
-        * requests and implicitly NAK RDMA read and atomic requests issued
-        * before the NAK'ed request.  The MSN won't include the NAK'ed
-        * request but will include an ACK'ed request(s).
-        */
-       ack_psn = psn;
-       if (aeth >> 29)
-               ack_psn--;
-       wqe = get_swqe_ptr(qp, qp->s_last);
-
-       /*
-        * The MSN might be for a later WQE than the PSN indicates so
-        * only complete WQEs that the PSN finishes.
-        */
-       while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
-               /*
-                * RDMA_READ_RESPONSE_ONLY is a special case since
-                * we want to generate completion events for everything
-                * before the RDMA read, copy the data, then generate
-                * the completion for the read.
-                */
-               if (wqe->wr.opcode == IB_WR_RDMA_READ &&
-                   opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
-                   diff == 0) {
-                       ret = 1;
-                       goto bail;
-               }
-               /*
-                * If this request is a RDMA read or atomic, and the ACK is
-                * for a later operation, this ACK NAKs the RDMA read or
-                * atomic.  In other words, only a RDMA_READ_LAST or ONLY
-                * can ACK a RDMA read and likewise for atomic ops.  Note
-                * that the NAK case can only happen if relaxed ordering is
-                * used and requests are sent after an RDMA read or atomic
-                * is sent but before the response is received.
-                */
-               if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-                    (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
-                   ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-                    (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
-                       /*
-                        * The last valid PSN seen is the previous
-                        * request's.
-                        */
-                       update_last_psn(qp, wqe->psn - 1);
-                       /* Retry this request. */
-                       ipath_restart_rc(qp, wqe->psn);
-                       /*
-                        * No need to process the ACK/NAK since we are
-                        * restarting an earlier request.
-                        */
-                       goto bail;
-               }
-               if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                   wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-                       *(u64 *) wqe->sg_list[0].vaddr = val;
-               if (qp->s_num_rd_atomic &&
-                   (wqe->wr.opcode == IB_WR_RDMA_READ ||
-                    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
-                       qp->s_num_rd_atomic--;
-                       /* Restart sending task if fence is complete */
-                       if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
-                            !qp->s_num_rd_atomic) ||
-                           qp->s_flags & IPATH_S_RDMAR_PENDING)
-                               ipath_schedule_send(qp);
-               }
-               /* Post a send completion queue entry if requested. */
-               if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-                   (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-                       memset(&wc, 0, sizeof wc);
-                       wc.wr_id = wqe->wr.wr_id;
-                       wc.status = IB_WC_SUCCESS;
-                       wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-                       wc.byte_len = wqe->length;
-                       wc.qp = &qp->ibqp;
-                       wc.src_qp = qp->remote_qpn;
-                       wc.slid = qp->remote_ah_attr.dlid;
-                       wc.sl = qp->remote_ah_attr.sl;
-                       ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
-               }
-               qp->s_retry = qp->s_retry_cnt;
-               /*
-                * If we are completing a request which is in the process of
-                * being resent, we can stop resending it since we know the
-                * responder has already seen it.
-                */
-               if (qp->s_last == qp->s_cur) {
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       qp->s_last = qp->s_cur;
-                       if (qp->s_last == qp->s_tail)
-                               break;
-                       wqe = get_swqe_ptr(qp, qp->s_cur);
-                       qp->s_state = OP(SEND_LAST);
-                       qp->s_psn = wqe->psn;
-               } else {
-                       if (++qp->s_last >= qp->s_size)
-                               qp->s_last = 0;
-                       if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
-                               qp->s_draining = 0;
-                       if (qp->s_last == qp->s_tail)
-                               break;
-                       wqe = get_swqe_ptr(qp, qp->s_last);
-               }
-       }
-
-       switch (aeth >> 29) {
-       case 0:         /* ACK */
-               dev->n_rc_acks++;
-               /* If this is a partial ACK, reset the retransmit timer. */
-               if (qp->s_last != qp->s_tail) {
-                       spin_lock(&dev->pending_lock);
-                       if (list_empty(&qp->timerwait))
-                               list_add_tail(&qp->timerwait,
-                                       &dev->pending[dev->pending_index]);
-                       spin_unlock(&dev->pending_lock);
-                       /*
-                        * If we get a partial ACK for a resent operation,
-                        * we can stop resending the earlier packets and
-                        * continue with the next packet the receiver wants.
-                        */
-                       if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-                               reset_psn(qp, psn + 1);
-                               ipath_schedule_send(qp);
-                       }
-               } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-                       qp->s_state = OP(SEND_LAST);
-                       qp->s_psn = psn + 1;
-               }
-               ipath_get_credit(qp, aeth);
-               qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-               qp->s_retry = qp->s_retry_cnt;
-               update_last_psn(qp, psn);
-               ret = 1;
-               goto bail;
-
-       case 1:         /* RNR NAK */
-               dev->n_rnr_naks++;
-               if (qp->s_last == qp->s_tail)
-                       goto bail;
-               if (qp->s_rnr_retry == 0) {
-                       status = IB_WC_RNR_RETRY_EXC_ERR;
-                       goto class_b;
-               }
-               if (qp->s_rnr_retry_cnt < 7)
-                       qp->s_rnr_retry--;
-
-               /* The last valid PSN is the previous PSN. */
-               update_last_psn(qp, psn - 1);
-
-               if (wqe->wr.opcode == IB_WR_RDMA_READ)
-                       dev->n_rc_resends++;
-               else
-                       dev->n_rc_resends +=
-                               (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-               reset_psn(qp, psn);
-
-               qp->s_rnr_timeout =
-                       ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
-                                          IPATH_AETH_CREDIT_MASK];
-               ipath_insert_rnr_queue(qp);
-               ipath_schedule_send(qp);
-               goto bail;
-
-       case 3:         /* NAK */
-               if (qp->s_last == qp->s_tail)
-                       goto bail;
-               /* The last valid PSN is the previous PSN. */
-               update_last_psn(qp, psn - 1);
-               switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
-                       IPATH_AETH_CREDIT_MASK) {
-               case 0: /* PSN sequence error */
-                       dev->n_seq_naks++;
-                       /*
-                        * Back up to the responder's expected PSN.
-                        * Note that we might get a NAK in the middle of an
-                        * RDMA READ response which terminates the RDMA
-                        * READ.
-                        */
-                       ipath_restart_rc(qp, psn);
-                       break;
-
-               case 1: /* Invalid Request */
-                       status = IB_WC_REM_INV_REQ_ERR;
-                       dev->n_other_naks++;
-                       goto class_b;
-
-               case 2: /* Remote Access Error */
-                       status = IB_WC_REM_ACCESS_ERR;
-                       dev->n_other_naks++;
-                       goto class_b;
-
-               case 3: /* Remote Operation Error */
-                       status = IB_WC_REM_OP_ERR;
-                       dev->n_other_naks++;
-               class_b:
-                       ipath_send_complete(qp, wqe, status);
-                       ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-                       break;
-
-               default:
-                       /* Ignore other reserved NAK error codes */
-                       goto reserved;
-               }
-               qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-               goto bail;
-
-       default:                /* 2: reserved */
-       reserved:
-               /* Ignore reserved NAK codes. */
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_rc_rcv_resp - process an incoming RC response packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @hdrsize: the header length
- * @pmtu: the path MTU
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an incoming RC response
- * packet for the given QP.
- * Called at interrupt level.
- */
-static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
-                                    struct ipath_other_headers *ohdr,
-                                    void *data, u32 tlen,
-                                    struct ipath_qp *qp,
-                                    u32 opcode,
-                                    u32 psn, u32 hdrsize, u32 pmtu,
-                                    int header_in_data)
-{
-       struct ipath_swqe *wqe;
-       enum ib_wc_status status;
-       unsigned long flags;
-       int diff;
-       u32 pad;
-       u32 aeth;
-       u64 val;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto ack_done;
-
-       /* Ignore invalid responses. */
-       if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
-               goto ack_done;
-
-       /* Ignore duplicate responses. */
-       diff = ipath_cmp24(psn, qp->s_last_psn);
-       if (unlikely(diff <= 0)) {
-               /* Update credits for "ghost" ACKs */
-               if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
-                       if (!header_in_data)
-                               aeth = be32_to_cpu(ohdr->u.aeth);
-                       else {
-                               aeth = be32_to_cpu(((__be32 *) data)[0]);
-                               data += sizeof(__be32);
-                       }
-                       if ((aeth >> 29) == 0)
-                               ipath_get_credit(qp, aeth);
-               }
-               goto ack_done;
-       }
-
-       if (unlikely(qp->s_last == qp->s_tail))
-               goto ack_done;
-       wqe = get_swqe_ptr(qp, qp->s_last);
-       status = IB_WC_SUCCESS;
-
-       switch (opcode) {
-       case OP(ACKNOWLEDGE):
-       case OP(ATOMIC_ACKNOWLEDGE):
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else {
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               }
-               if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-                       if (!header_in_data) {
-                               __be32 *p = ohdr->u.at.atomic_ack_eth;
-
-                               val = ((u64) be32_to_cpu(p[0]) << 32) |
-                                       be32_to_cpu(p[1]);
-                       } else
-                               val = be64_to_cpu(((__be64 *) data)[0]);
-               } else
-                       val = 0;
-               if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
-                   opcode != OP(RDMA_READ_RESPONSE_FIRST))
-                       goto ack_done;
-               hdrsize += 4;
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-               qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
-               /*
-                * If this is a response to a resent RDMA read, we
-                * have to be careful to copy the data to the right
-                * location.
-                */
-               qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-                                                 wqe, psn, pmtu);
-               goto read_middle;
-
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               /* no AETH, no ACK */
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-                               goto ack_done;
-                       qp->r_flags |= IPATH_R_RDMAR_SEQ;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-                       goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-       read_middle:
-               if (unlikely(tlen != (hdrsize + pmtu + 4)))
-                       goto ack_len_err;
-               if (unlikely(pmtu >= qp->s_rdma_read_len))
-                       goto ack_len_err;
-
-               /* We got a response so update the timeout. */
-               spin_lock(&dev->pending_lock);
-               if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
-                       list_move_tail(&qp->timerwait,
-                                      &dev->pending[dev->pending_index]);
-               spin_unlock(&dev->pending_lock);
-
-               if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
-                       qp->s_retry = qp->s_retry_cnt;
-
-               /*
-                * Update the RDMA receive state but do the copy w/o
-                * holding the locks and blocking interrupts.
-                */
-               qp->s_rdma_read_len -= pmtu;
-               update_last_psn(qp, psn);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
-               goto bail;
-
-       case OP(RDMA_READ_RESPONSE_ONLY):
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-               if (!do_rc_ack(qp, aeth, psn, opcode, 0))
-                       goto ack_done;
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /*
-                * Check that the data size is >= 0 && <= pmtu.
-                * Remember to account for the AETH header (4) and
-                * ICRC (4).
-                */
-               if (unlikely(tlen < (hdrsize + pad + 8)))
-                       goto ack_len_err;
-               /*
-                * If this is a response to a resent RDMA read, we
-                * have to be careful to copy the data to the right
-                * location.
-                */
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-                                                 wqe, psn, pmtu);
-               goto read_last;
-
-       case OP(RDMA_READ_RESPONSE_LAST):
-               /* ACKs READ req. */
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-                               goto ack_done;
-                       qp->r_flags |= IPATH_R_RDMAR_SEQ;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-                       goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /*
-                * Check that the data size is >= 1 && <= pmtu.
-                * Remember to account for the AETH header (4) and
-                * ICRC (4).
-                */
-               if (unlikely(tlen <= (hdrsize + pad + 8)))
-                       goto ack_len_err;
-       read_last:
-               tlen -= hdrsize + pad + 8;
-               if (unlikely(tlen != qp->s_rdma_read_len))
-                       goto ack_len_err;
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else {
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               }
-               ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-               (void) do_rc_ack(qp, aeth, psn,
-                                OP(RDMA_READ_RESPONSE_LAST), 0);
-               goto ack_done;
-       }
-
-ack_op_err:
-       status = IB_WC_LOC_QP_OP_ERR;
-       goto ack_err;
-
-ack_len_err:
-       status = IB_WC_LOC_LEN_ERR;
-ack_err:
-       ipath_send_complete(qp, wqe, status);
-       ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-ack_done:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-bail:
-       return;
-}
-
-/**
- * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @diff: the difference between the PSN and the expected PSN
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an unexpected
- * incoming RC packet for the given QP.
- * Called at interrupt level.
- * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent.
- */
-static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
-                                    struct ipath_other_headers *ohdr,
-                                    void *data,
-                                    struct ipath_qp *qp,
-                                    u32 opcode,
-                                    u32 psn,
-                                    int diff,
-                                    int header_in_data)
-{
-       struct ipath_ack_entry *e;
-       u8 i, prev;
-       int old_req;
-       unsigned long flags;
-
-       if (diff > 0) {
-               /*
-                * Packet sequence error.
-                * A NAK will ACK earlier sends and RDMA writes.
-                * Don't queue the NAK if we already sent one.
-                */
-               if (!qp->r_nak_state) {
-                       qp->r_nak_state = IB_NAK_PSN_ERROR;
-                       /* Use the expected PSN. */
-                       qp->r_ack_psn = qp->r_psn;
-                       goto send_ack;
-               }
-               goto done;
-       }
-
-       /*
-        * Handle a duplicate request.  Don't re-execute SEND, RDMA
-        * write or atomic op.  Don't NAK errors, just silently drop
-        * the duplicate request.  Note that r_sge, r_len, and
-        * r_rcv_len may be in use so don't modify them.
-        *
-        * We are supposed to ACK the earliest duplicate PSN but we
-        * can coalesce an outstanding duplicate ACK.  We have to
-        * send the earliest so that RDMA reads can be restarted at
-        * the requester's expected PSN.
-        *
-        * First, find where this duplicate PSN falls within the
-        * ACKs previously sent.
-        */
-       psn &= IPATH_PSN_MASK;
-       e = NULL;
-       old_req = 1;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto unlock_done;
-
-       for (i = qp->r_head_ack_queue; ; i = prev) {
-               if (i == qp->s_tail_ack_queue)
-                       old_req = 0;
-               if (i)
-                       prev = i - 1;
-               else
-                       prev = IPATH_MAX_RDMA_ATOMIC;
-               if (prev == qp->r_head_ack_queue) {
-                       e = NULL;
-                       break;
-               }
-               e = &qp->s_ack_queue[prev];
-               if (!e->opcode) {
-                       e = NULL;
-                       break;
-               }
-               if (ipath_cmp24(psn, e->psn) >= 0) {
-                       if (prev == qp->s_tail_ack_queue)
-                               old_req = 0;
-                       break;
-               }
-       }
-       switch (opcode) {
-       case OP(RDMA_READ_REQUEST): {
-               struct ib_reth *reth;
-               u32 offset;
-               u32 len;
-
-               /*
-                * If we didn't find the RDMA read request in the ack queue,
-                * or the send tasklet is already backed up to send an
-                * earlier entry, we can ignore this request.
-                */
-               if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
-                       goto unlock_done;
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               /*
-                * Address range must be a subset of the original
-                * request and start on pmtu boundaries.
-                * We reuse the old ack_queue slot since the requester
-                * should not back up and request an earlier PSN for the
-                * same request.
-                */
-               offset = ((psn - e->psn) & IPATH_PSN_MASK) *
-                       ib_mtu_enum_to_int(qp->path_mtu);
-               len = be32_to_cpu(reth->length);
-               if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
-                       goto unlock_done;
-               if (len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       ok = ipath_rkey_ok(qp, &e->rdma_sge,
-                                          len, vaddr, rkey,
-                                          IB_ACCESS_REMOTE_READ);
-                       if (unlikely(!ok))
-                               goto unlock_done;
-               } else {
-                       e->rdma_sge.sg_list = NULL;
-                       e->rdma_sge.num_sge = 0;
-                       e->rdma_sge.sge.mr = NULL;
-                       e->rdma_sge.sge.vaddr = NULL;
-                       e->rdma_sge.sge.length = 0;
-                       e->rdma_sge.sge.sge_length = 0;
-               }
-               e->psn = psn;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = prev;
-               break;
-       }
-
-       case OP(COMPARE_SWAP):
-       case OP(FETCH_ADD): {
-               /*
-                * If we didn't find the atomic request in the ack queue
-                * or the send tasklet is already backed up to send an
-                * earlier entry, we can ignore this request.
-                */
-               if (!e || e->opcode != (u8) opcode || old_req)
-                       goto unlock_done;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = prev;
-               break;
-       }
-
-       default:
-               if (old_req)
-                       goto unlock_done;
-               /*
-                * Resend the most recent ACK if this request is
-                * after all the previous RDMA reads and atomics.
-                */
-               if (i == qp->r_head_ack_queue) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       qp->r_nak_state = 0;
-                       qp->r_ack_psn = qp->r_psn - 1;
-                       goto send_ack;
-               }
-               /*
-                * Try to send a simple ACK to work around a Mellanox bug
-                * which doesn't accept a RDMA read response or atomic
-                * response as an ACK for earlier SENDs or RDMA writes.
-                */
-               if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
-                   !(qp->s_flags & IPATH_S_ACK_PENDING) &&
-                   qp->s_ack_state == OP(ACKNOWLEDGE)) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       qp->r_nak_state = 0;
-                       qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
-                       goto send_ack;
-               }
-               /*
-                * Resend the RDMA read or atomic op which
-                * ACKs this duplicate request.
-                */
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = i;
-               break;
-       }
-       qp->r_nak_state = 0;
-       ipath_schedule_send(qp);
-
-unlock_done:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return 1;
-
-send_ack:
-       return 0;
-}
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
-{
-       unsigned long flags;
-       int lastwqe;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-       lastwqe = ipath_error_qp(qp, err);
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-       if (lastwqe) {
-               struct ib_event ev;
-
-               ev.device = qp->ibqp.device;
-               ev.element.qp = &qp->ibqp;
-               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-       }
-}
-
-static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
-{
-       unsigned next;
-
-       next = n + 1;
-       if (next > IPATH_MAX_RDMA_ATOMIC)
-               next = 0;
-       if (n == qp->s_tail_ack_queue) {
-               qp->s_tail_ack_queue = next;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-       }
-}
-
-/**
- * ipath_rc_rcv - process an incoming RC packet
- * @dev: the device this packet came in on
- * @hdr: the header of this packet
- * @has_grh: true if the header has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- *
- * This is called from ipath_qp_rcv() to process an incoming RC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       u32 opcode;
-       u32 hdrsize;
-       u32 psn;
-       u32 pad;
-       struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       int diff;
-       struct ib_reth *reth;
-       int header_in_data;
-       unsigned long flags;
-
-       /* Validate the SLID. See Ch. 9.6.1.5 */
-       if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-               goto done;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12;       /* LRH + BTH */
-               psn = be32_to_cpu(ohdr->bth[2]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
-               /*
-                * The header with GRH is 60 bytes and the core driver sets
-                * the eager header buffer size to 56 bytes so the last 4
-                * bytes of the BTH header (PSN) is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       psn = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               } else
-                       psn = be32_to_cpu(ohdr->bth[2]);
-       }
-
-       /*
-        * Process responses (ACKs) before anything else.  Note that the
-        * packet sequence number will be for something in the send work
-        * queue rather than the expected receive packet sequence number.
-        * In other words, this QP is the requester.
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-       if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
-           opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
-               ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
-                                 hdrsize, pmtu, header_in_data);
-               goto done;
-       }
-
-       /* Compute 24 bits worth of difference. */
-       diff = ipath_cmp24(psn, qp->r_psn);
-       if (unlikely(diff)) {
-               if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
-                                      psn, diff, header_in_data))
-                       goto done;
-               goto send_ack;
-       }
-
-       /* Check for opcode sequence errors. */
-       switch (qp->r_state) {
-       case OP(SEND_FIRST):
-       case OP(SEND_MIDDLE):
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-                       break;
-               goto nack_inv;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_MIDDLE):
-               if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       break;
-               goto nack_inv;
-
-       default:
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       goto nack_inv;
-               /*
-                * Note that it is up to the requester to not send a new
-                * RDMA read or atomic operation before receiving an ACK
-                * for the previous operation.
-                */
-               break;
-       }
-
-       memset(&wc, 0, sizeof wc);
-
-       /* OK, process the packet. */
-       switch (opcode) {
-       case OP(SEND_FIRST):
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               qp->r_rcv_len = 0;
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-       case OP(RDMA_WRITE_MIDDLE):
-       send_middle:
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4)))
-                       goto nack_inv;
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len))
-                       goto nack_inv;
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-               /* consume RWQE */
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               goto send_last_imm;
-
-       case OP(SEND_ONLY):
-       case OP(SEND_ONLY_WITH_IMMEDIATE):
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               qp->r_rcv_len = 0;
-               if (opcode == OP(SEND_ONLY))
-                       goto send_last;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST_WITH_IMMEDIATE):
-       send_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST):
-       case OP(RDMA_WRITE_LAST):
-       send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4)))
-                       goto nack_inv;
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               wc.byte_len = tlen + qp->r_rcv_len;
-               if (unlikely(wc.byte_len > qp->r_len))
-                       goto nack_inv;
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               qp->r_msn++;
-               if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-                       break;
-               wc.wr_id = qp->r_wr_id;
-               wc.status = IB_WC_SUCCESS;
-               if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-               else
-                       wc.opcode = IB_WC_RECV;
-               wc.qp = &qp->ibqp;
-               wc.src_qp = qp->remote_qpn;
-               wc.slid = qp->remote_ah_attr.dlid;
-               wc.sl = qp->remote_ah_attr.sl;
-               /* Signal completion event if the solicited bit is set. */
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                              (ohdr->bth[0] &
-                               cpu_to_be32(1 << 23)) != 0);
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_ONLY):
-       case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE)))
-                       goto nack_inv;
-               /* consume RWQE */
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               hdrsize += sizeof(*reth);
-               qp->r_len = be32_to_cpu(reth->length);
-               qp->r_rcv_len = 0;
-               if (qp->r_len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey & NAK */
-                       ok = ipath_rkey_ok(qp, &qp->r_sge,
-                                          qp->r_len, vaddr, rkey,
-                                          IB_ACCESS_REMOTE_WRITE);
-                       if (unlikely(!ok))
-                               goto nack_acc;
-               } else {
-                       qp->r_sge.sg_list = NULL;
-                       qp->r_sge.sge.mr = NULL;
-                       qp->r_sge.sge.vaddr = NULL;
-                       qp->r_sge.sge.length = 0;
-                       qp->r_sge.sge.sge_length = 0;
-               }
-               if (opcode == OP(RDMA_WRITE_FIRST))
-                       goto send_middle;
-               else if (opcode == OP(RDMA_WRITE_ONLY))
-                       goto send_last;
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               goto send_last_imm;
-
-       case OP(RDMA_READ_REQUEST): {
-               struct ipath_ack_entry *e;
-               u32 len;
-               u8 next;
-
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_READ)))
-                       goto nack_inv;
-               next = qp->r_head_ack_queue + 1;
-               if (next > IPATH_MAX_RDMA_ATOMIC)
-                       next = 0;
-               spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-                       goto unlock;
-               if (unlikely(next == qp->s_tail_ack_queue)) {
-                       if (!qp->s_ack_queue[next].sent)
-                               goto nack_inv_unlck;
-                       ipath_update_ack_queue(qp, next);
-               }
-               e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               len = be32_to_cpu(reth->length);
-               if (len) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey & NAK */
-                       ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-                                          rkey, IB_ACCESS_REMOTE_READ);
-                       if (unlikely(!ok))
-                               goto nack_acc_unlck;
-                       /*
-                        * Update the next expected PSN.  We add 1 later
-                        * below, so only add the remainder here.
-                        */
-                       if (len > pmtu)
-                               qp->r_psn += (len - 1) / pmtu;
-               } else {
-                       e->rdma_sge.sg_list = NULL;
-                       e->rdma_sge.num_sge = 0;
-                       e->rdma_sge.sge.mr = NULL;
-                       e->rdma_sge.sge.vaddr = NULL;
-                       e->rdma_sge.sge.length = 0;
-                       e->rdma_sge.sge.sge_length = 0;
-               }
-               e->opcode = opcode;
-               e->sent = 0;
-               e->psn = psn;
-               /*
-                * We need to increment the MSN here instead of when we
-                * finish sending the result since a duplicate request would
-                * increment it more than once.
-                */
-               qp->r_msn++;
-               qp->r_psn++;
-               qp->r_state = opcode;
-               qp->r_nak_state = 0;
-               qp->r_head_ack_queue = next;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-
-               goto unlock;
-       }
-
-       case OP(COMPARE_SWAP):
-       case OP(FETCH_ADD): {
-               struct ib_atomic_eth *ateth;
-               struct ipath_ack_entry *e;
-               u64 vaddr;
-               atomic64_t *maddr;
-               u64 sdata;
-               u32 rkey;
-               u8 next;
-
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_inv;
-               next = qp->r_head_ack_queue + 1;
-               if (next > IPATH_MAX_RDMA_ATOMIC)
-                       next = 0;
-               spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-                       goto unlock;
-               if (unlikely(next == qp->s_tail_ack_queue)) {
-                       if (!qp->s_ack_queue[next].sent)
-                               goto nack_inv_unlck;
-                       ipath_update_ack_queue(qp, next);
-               }
-               if (!header_in_data)
-                       ateth = &ohdr->u.atomic_eth;
-               else
-                       ateth = (struct ib_atomic_eth *)data;
-               vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
-                       be32_to_cpu(ateth->vaddr[1]);
-               if (unlikely(vaddr & (sizeof(u64) - 1)))
-                       goto nack_inv_unlck;
-               rkey = be32_to_cpu(ateth->rkey);
-               /* Check rkey & NAK */
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
-                                           sizeof(u64), vaddr, rkey,
-                                           IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_acc_unlck;
-               /* Perform atomic OP and save result. */
-               maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = be64_to_cpu(ateth->swap_data);
-               e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-                       (u64) atomic64_add_return(sdata, maddr) - sdata :
-                       (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     be64_to_cpu(ateth->compare_data),
-                                     sdata);
-               e->opcode = opcode;
-               e->sent = 0;
-               e->psn = psn & IPATH_PSN_MASK;
-               qp->r_msn++;
-               qp->r_psn++;
-               qp->r_state = opcode;
-               qp->r_nak_state = 0;
-               qp->r_head_ack_queue = next;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-
-               goto unlock;
-       }
-
-       default:
-               /* NAK unknown opcodes. */
-               goto nack_inv;
-       }
-       qp->r_psn++;
-       qp->r_state = opcode;
-       qp->r_ack_psn = psn;
-       qp->r_nak_state = 0;
-       /* Send an ACK if requested or required. */
-       if (psn & (1 << 31))
-               goto send_ack;
-       goto done;
-
-rnr_nak:
-       qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
-       qp->r_ack_psn = qp->r_psn;
-       goto send_ack;
-
-nack_inv_unlck:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_inv:
-       ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-       qp->r_nak_state = IB_NAK_INVALID_REQUEST;
-       qp->r_ack_psn = qp->r_psn;
-       goto send_ack;
-
-nack_acc_unlck:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_acc:
-       ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
-       qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-       qp->r_ack_psn = qp->r_psn;
-send_ack:
-       send_rc_ack(qp);
-       goto done;
-
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_registers.h b/drivers/staging/rdma/ipath/ipath_registers.h
deleted file mode 100644 (file)
index 8f44d0c..0000000
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_REGISTERS_H
-#define _IPATH_REGISTERS_H
-
-/*
- * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for InfiniPath chips.
- */
-
-/*
- * These are the InfiniPath register and buffer bit definitions,
- * that are visible to software, and needed only by the kernel
- * and diag code.  A few, that are visible to protocol and user
- * code are in ipath_common.h.  Some bits are specific
- * to a given chip implementation, and have been moved to the
- * chip-specific source file
- */
-
-/* kr_revision bits */
-#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
-#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
-#define INFINIPATH_R_ARCH_MASK 0xFF
-#define INFINIPATH_R_ARCH_SHIFT 16
-#define INFINIPATH_R_SOFTWARE_MASK 0xFF
-#define INFINIPATH_R_SOFTWARE_SHIFT 24
-#define INFINIPATH_R_BOARDID_MASK 0xFF
-#define INFINIPATH_R_BOARDID_SHIFT 32
-
-/* kr_control bits */
-#define INFINIPATH_C_FREEZEMODE 0x00000002
-#define INFINIPATH_C_LINKENABLE 0x00000004
-
-/* kr_sendctrl bits */
-#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
-#define INFINIPATH_S_UPDTHRESH_SHIFT 24
-#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
-
-#define IPATH_S_ABORT          0
-#define IPATH_S_PIOINTBUFAVAIL 1
-#define IPATH_S_PIOBUFAVAILUPD 2
-#define IPATH_S_PIOENABLE      3
-#define IPATH_S_SDMAINTENABLE  9
-#define IPATH_S_SDMASINGLEDESCRIPTOR   10
-#define IPATH_S_SDMAENABLE     11
-#define IPATH_S_SDMAHALT       12
-#define IPATH_S_DISARM         31
-
-#define INFINIPATH_S_ABORT             (1U << IPATH_S_ABORT)
-#define INFINIPATH_S_PIOINTBUFAVAIL    (1U << IPATH_S_PIOINTBUFAVAIL)
-#define INFINIPATH_S_PIOBUFAVAILUPD    (1U << IPATH_S_PIOBUFAVAILUPD)
-#define INFINIPATH_S_PIOENABLE         (1U << IPATH_S_PIOENABLE)
-#define INFINIPATH_S_SDMAINTENABLE     (1U << IPATH_S_SDMAINTENABLE)
-#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
-                                       (1U << IPATH_S_SDMASINGLEDESCRIPTOR)
-#define INFINIPATH_S_SDMAENABLE                (1U << IPATH_S_SDMAENABLE)
-#define INFINIPATH_S_SDMAHALT          (1U << IPATH_S_SDMAHALT)
-#define INFINIPATH_S_DISARM            (1U << IPATH_S_DISARM)
-
-/* kr_rcvctrl bits that are the same on multiple chips */
-#define INFINIPATH_R_PORTENABLE_SHIFT 0
-#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_SDMAINT           0x8000000000000000ULL
-#define INFINIPATH_I_SDMADISABLED      0x4000000000000000ULL
-#define INFINIPATH_I_ERROR             0x0000000080000000ULL
-#define INFINIPATH_I_SPIOSENT          0x0000000040000000ULL
-#define INFINIPATH_I_SPIOBUFAVAIL      0x0000000020000000ULL
-#define INFINIPATH_I_GPIO              0x0000000010000000ULL
-#define INFINIPATH_I_JINT              0x0000000004000000ULL
-
-/* kr_errorstatus, kr_errorclear, kr_errormask bits */
-#define INFINIPATH_E_RFORMATERR                        0x0000000000000001ULL
-#define INFINIPATH_E_RVCRC                     0x0000000000000002ULL
-#define INFINIPATH_E_RICRC                     0x0000000000000004ULL
-#define INFINIPATH_E_RMINPKTLEN                        0x0000000000000008ULL
-#define INFINIPATH_E_RMAXPKTLEN                        0x0000000000000010ULL
-#define INFINIPATH_E_RLONGPKTLEN               0x0000000000000020ULL
-#define INFINIPATH_E_RSHORTPKTLEN              0x0000000000000040ULL
-#define INFINIPATH_E_RUNEXPCHAR                        0x0000000000000080ULL
-#define INFINIPATH_E_RUNSUPVL                  0x0000000000000100ULL
-#define INFINIPATH_E_REBP                      0x0000000000000200ULL
-#define INFINIPATH_E_RIBFLOW                   0x0000000000000400ULL
-#define INFINIPATH_E_RBADVERSION               0x0000000000000800ULL
-#define INFINIPATH_E_RRCVEGRFULL               0x0000000000001000ULL
-#define INFINIPATH_E_RRCVHDRFULL               0x0000000000002000ULL
-#define INFINIPATH_E_RBADTID                   0x0000000000004000ULL
-#define INFINIPATH_E_RHDRLEN                   0x0000000000008000ULL
-#define INFINIPATH_E_RHDR                      0x0000000000010000ULL
-#define INFINIPATH_E_RIBLOSTLINK               0x0000000000020000ULL
-#define INFINIPATH_E_SENDSPECIALTRIGGER                0x0000000008000000ULL
-#define INFINIPATH_E_SDMADISABLED              0x0000000010000000ULL
-#define INFINIPATH_E_SMINPKTLEN                        0x0000000020000000ULL
-#define INFINIPATH_E_SMAXPKTLEN                        0x0000000040000000ULL
-#define INFINIPATH_E_SUNDERRUN                 0x0000000080000000ULL
-#define INFINIPATH_E_SPKTLEN                   0x0000000100000000ULL
-#define INFINIPATH_E_SDROPPEDSMPPKT            0x0000000200000000ULL
-#define INFINIPATH_E_SDROPPEDDATAPKT           0x0000000400000000ULL
-#define INFINIPATH_E_SPIOARMLAUNCH             0x0000000800000000ULL
-#define INFINIPATH_E_SUNEXPERRPKTNUM           0x0000001000000000ULL
-#define INFINIPATH_E_SUNSUPVL                  0x0000002000000000ULL
-#define INFINIPATH_E_SENDBUFMISUSE             0x0000004000000000ULL
-#define INFINIPATH_E_SDMAGENMISMATCH           0x0000008000000000ULL
-#define INFINIPATH_E_SDMAOUTOFBOUND            0x0000010000000000ULL
-#define INFINIPATH_E_SDMATAILOUTOFBOUND                0x0000020000000000ULL
-#define INFINIPATH_E_SDMABASE                  0x0000040000000000ULL
-#define INFINIPATH_E_SDMA1STDESC               0x0000080000000000ULL
-#define INFINIPATH_E_SDMARPYTAG                        0x0000100000000000ULL
-#define INFINIPATH_E_SDMADWEN                  0x0000200000000000ULL
-#define INFINIPATH_E_SDMAMISSINGDW             0x0000400000000000ULL
-#define INFINIPATH_E_SDMAUNEXPDATA             0x0000800000000000ULL
-#define INFINIPATH_E_IBSTATUSCHANGED           0x0001000000000000ULL
-#define INFINIPATH_E_INVALIDADDR               0x0002000000000000ULL
-#define INFINIPATH_E_RESET                     0x0004000000000000ULL
-#define INFINIPATH_E_HARDWARE                  0x0008000000000000ULL
-#define INFINIPATH_E_SDMADESCADDRMISALIGN      0x0010000000000000ULL
-#define INFINIPATH_E_INVALIDEEPCMD             0x0020000000000000ULL
-
-/*
- * this is used to print "common" packet errors only when the
- * __IPATH_ERRPKTDBG bit is set in ipath_debug.
- */
-#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
-               | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
-               | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
-               | INFINIPATH_E_REBP )
-
-/* Convenience for decoding Send DMA errors */
-#define INFINIPATH_E_SDMAERRS ( \
-       INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
-       INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
-       INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
-       INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
-       INFINIPATH_E_SDMAUNEXPDATA | \
-       INFINIPATH_E_SDMADESCADDRMISALIGN | \
-       INFINIPATH_E_SDMADISABLED | \
-       INFINIPATH_E_SENDBUFMISUSE)
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
- *             bit 4: flag buffer, 5: datainfo, 6: header info */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF  0x1ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC  0x2ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
-/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
-/* waldo specific -- find the rest in ipath_6110.c */
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
-#define INFINIPATH_HWE_MEMBISTFAILED   0x0040000000000000ULL
-
-/* kr_hwdiagctrl bits */
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR  0x0000000400000000ULL
-#define INFINIPATH_DC_COUNTERDISABLE            0x1000000000000000ULL
-#define INFINIPATH_DC_COUNTERWREN               0x2000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-
-/* kr_ibcctrl bits */
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
-#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
-/* cycle through TS1/TS2 till OK */
-#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
-/* wait for TS1, then go on */
-#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
-#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
-#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKCMD_DOWN 1         /* move to 0x11 */
-#define INFINIPATH_IBCC_LINKCMD_ARMED 2                /* move to 0x21 */
-#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3       /* move to 0x31 */
-#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
-#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
-#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
-#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
-#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
-#define INFINIPATH_IBCC_LOOPBACK             0x8000000000000000ULL
-#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
-
-/* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
-#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
-
-#define INFINIPATH_IBCS_TXREADY       0x40000000
-#define INFINIPATH_IBCS_TXCREDITOK    0x80000000
-/* link training states (shift by
-   INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
-#define INFINIPATH_IBCS_LT_STATE_DISABLED      0x00
-#define INFINIPATH_IBCS_LT_STATE_LINKUP                0x01
-#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE    0x02
-#define INFINIPATH_IBCS_LT_STATE_POLLQUIET     0x03
-#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY    0x04
-#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET    0x05
-#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE   0x08
-#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG    0x09
-#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT    0x0a
-#define INFINIPATH_IBCS_LT_STATE_CFGIDLE       0x0b
-#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN        0x0c
-#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT        0x0e
-#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE   0x0f
-/* link state machine states (shift by ibcs_ls_shift) */
-#define INFINIPATH_IBCS_L_STATE_DOWN           0x0
-#define INFINIPATH_IBCS_L_STATE_INIT           0x1
-#define INFINIPATH_IBCS_L_STATE_ARM            0x2
-#define INFINIPATH_IBCS_L_STATE_ACTIVE         0x3
-#define INFINIPATH_IBCS_L_STATE_ACT_DEFER      0x4
-
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
-#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
-#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
-
-/* kr_extctrl bits */
-#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
-#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
-#define INFINIPATH_EXTC_SERDESENABLE         0x80000000ULL
-#define INFINIPATH_EXTC_SERDESCONNECT        0x40000000ULL
-#define INFINIPATH_EXTC_SERDESENTRUNKING     0x20000000ULL
-#define INFINIPATH_EXTC_SERDESDISRXFIFO      0x10000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK1       0x08000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK2       0x04000000ULL
-#define INFINIPATH_EXTC_SERDESENENCDEC       0x02000000ULL
-#define INFINIPATH_EXTC_LED1SECPORT_ON       0x00000020ULL
-#define INFINIPATH_EXTC_LED2SECPORT_ON       0x00000010ULL
-#define INFINIPATH_EXTC_LED1PRIPORT_ON       0x00000008ULL
-#define INFINIPATH_EXTC_LED2PRIPORT_ON       0x00000004ULL
-#define INFINIPATH_EXTC_LEDGBLOK_ON          0x00000002ULL
-#define INFINIPATH_EXTC_LEDGBLERR_OFF        0x00000001ULL
-
-/* kr_partitionkey bits */
-#define INFINIPATH_PKEY_SIZE 16
-#define INFINIPATH_PKEY_MASK 0xFFFF
-#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
-
-/* kr_serdesconfig0 bits */
-#define INFINIPATH_SERDC0_RESET_MASK  0xfULL   /* overal reset bits */
-#define INFINIPATH_SERDC0_RESET_PLL   0x10000000ULL    /* pll reset */
-/* tx idle enables (per lane) */
-#define INFINIPATH_SERDC0_TXIDLE      0xF000ULL
-/* rx detect enables (per lane) */
-#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
-/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
-#define INFINIPATH_SERDC0_L1PWR_DN      0xF0ULL
-
-/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
-#define INFINIPATH_XGXS_RX_POL_SHIFT 19
-#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-
-
-/*
- * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
- * PIO send buffers.  This is well beyond anything currently
- * defined in the InfiniBand spec.
- */
-#define IPATH_PIO_MAXIBHDR 128
-
-typedef u64 ipath_err_t;
-
-/* The following change with the type of device, so
- * need to be part of the ipath_devdata struct, or
- * we could have problems plugging in devices of
- * different types (e.g. one HT, one PCIE)
- * in one system, to be managed by one driver.
- * On the other hand, this file is may also be included
- * by other code, so leave the declarations here
- * temporarily. Minor footprint issue if common-model
- * linker used, none if C89+ linker used.
- */
-
-/* mask of defined bits for various registers */
-extern u64 infinipath_i_bitsextant;
-extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-
-/* masks that are different in various chips, or only exist in some chips */
-extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
-/*
- * These are the infinipath general register numbers (not offsets).
- * The kernel registers are used directly, those beyond the kernel
- * registers are calculated from one of the base registers.  The use of
- * an integer type doesn't allow type-checking as thorough as, say,
- * an enum but allows for better hiding of chip differences.
- */
-typedef const u16 ipath_kreg,  /* infinipath general registers */
- ipath_creg,                   /* infinipath counter registers */
- ipath_sreg;                   /* kernel-only, infinipath send registers */
-
-/*
- * These are the chip registers common to all infinipath chips, and
- * used both by the kernel and the diagnostics or other user code.
- * They are all implemented such that 64 bit accesses work.
- * Some implement no more than 32 bits.  Because 64 bit reads
- * require 2 HT cmds on opteron, we access those with 32 bit
- * reads for efficiency (they are written as 64 bits, since
- * the extra 32 bits are nearly free on writes, and it slightly reduces
- * complexity).  The rest are all accessed as 64 bits.
- */
-struct ipath_kregs {
-       /* These are the 32 bit group */
-       ipath_kreg kr_control;
-       ipath_kreg kr_counterregbase;
-       ipath_kreg kr_intmask;
-       ipath_kreg kr_intstatus;
-       ipath_kreg kr_pagealign;
-       ipath_kreg kr_portcnt;
-       ipath_kreg kr_rcvtidbase;
-       ipath_kreg kr_rcvtidcnt;
-       ipath_kreg kr_rcvegrbase;
-       ipath_kreg kr_rcvegrcnt;
-       ipath_kreg kr_scratch;
-       ipath_kreg kr_sendctrl;
-       ipath_kreg kr_sendpiobufbase;
-       ipath_kreg kr_sendpiobufcnt;
-       ipath_kreg kr_sendpiosize;
-       ipath_kreg kr_sendregbase;
-       ipath_kreg kr_userregbase;
-       /* These are the 64 bit group */
-       ipath_kreg kr_debugport;
-       ipath_kreg kr_debugportselect;
-       ipath_kreg kr_errorclear;
-       ipath_kreg kr_errormask;
-       ipath_kreg kr_errorstatus;
-       ipath_kreg kr_extctrl;
-       ipath_kreg kr_extstatus;
-       ipath_kreg kr_gpio_clear;
-       ipath_kreg kr_gpio_mask;
-       ipath_kreg kr_gpio_out;
-       ipath_kreg kr_gpio_status;
-       ipath_kreg kr_hwdiagctrl;
-       ipath_kreg kr_hwerrclear;
-       ipath_kreg kr_hwerrmask;
-       ipath_kreg kr_hwerrstatus;
-       ipath_kreg kr_ibcctrl;
-       ipath_kreg kr_ibcstatus;
-       ipath_kreg kr_intblocked;
-       ipath_kreg kr_intclear;
-       ipath_kreg kr_interruptconfig;
-       ipath_kreg kr_mdio;
-       ipath_kreg kr_partitionkey;
-       ipath_kreg kr_rcvbthqp;
-       ipath_kreg kr_rcvbufbase;
-       ipath_kreg kr_rcvbufsize;
-       ipath_kreg kr_rcvctrl;
-       ipath_kreg kr_rcvhdrcnt;
-       ipath_kreg kr_rcvhdrentsize;
-       ipath_kreg kr_rcvhdrsize;
-       ipath_kreg kr_rcvintmembase;
-       ipath_kreg kr_rcvintmemsize;
-       ipath_kreg kr_revision;
-       ipath_kreg kr_sendbuffererror;
-       ipath_kreg kr_sendpioavailaddr;
-       ipath_kreg kr_serdesconfig0;
-       ipath_kreg kr_serdesconfig1;
-       ipath_kreg kr_serdesstatus;
-       ipath_kreg kr_txintmembase;
-       ipath_kreg kr_txintmemsize;
-       ipath_kreg kr_xgxsconfig;
-       ipath_kreg kr_ibpllcfg;
-       /* use these two (and the following N ports) only with
-        * ipath_k*_kreg64_port(); not *kreg64() */
-       ipath_kreg kr_rcvhdraddr;
-       ipath_kreg kr_rcvhdrtailaddr;
-
-       /* remaining registers are not present on all types of infinipath
-          chips  */
-       ipath_kreg kr_rcvpktledcnt;
-       ipath_kreg kr_pcierbuftestreg0;
-       ipath_kreg kr_pcierbuftestreg1;
-       ipath_kreg kr_pcieq0serdesconfig0;
-       ipath_kreg kr_pcieq0serdesconfig1;
-       ipath_kreg kr_pcieq0serdesstatus;
-       ipath_kreg kr_pcieq1serdesconfig0;
-       ipath_kreg kr_pcieq1serdesconfig1;
-       ipath_kreg kr_pcieq1serdesstatus;
-       ipath_kreg kr_hrtbt_guid;
-       ipath_kreg kr_ibcddrctrl;
-       ipath_kreg kr_ibcddrstatus;
-       ipath_kreg kr_jintreload;
-
-       /* send dma related regs */
-       ipath_kreg kr_senddmabase;
-       ipath_kreg kr_senddmalengen;
-       ipath_kreg kr_senddmatail;
-       ipath_kreg kr_senddmahead;
-       ipath_kreg kr_senddmaheadaddr;
-       ipath_kreg kr_senddmabufmask0;
-       ipath_kreg kr_senddmabufmask1;
-       ipath_kreg kr_senddmabufmask2;
-       ipath_kreg kr_senddmastatus;
-
-       /* SerDes related regs (IBA7220-only) */
-       ipath_kreg kr_ibserdesctrl;
-       ipath_kreg kr_ib_epbacc;
-       ipath_kreg kr_ib_epbtrans;
-       ipath_kreg kr_pcie_epbacc;
-       ipath_kreg kr_pcie_epbtrans;
-       ipath_kreg kr_ib_ddsrxeq;
-};
-
-struct ipath_cregs {
-       ipath_creg cr_badformatcnt;
-       ipath_creg cr_erricrccnt;
-       ipath_creg cr_errlinkcnt;
-       ipath_creg cr_errlpcrccnt;
-       ipath_creg cr_errpkey;
-       ipath_creg cr_errrcvflowctrlcnt;
-       ipath_creg cr_err_rlencnt;
-       ipath_creg cr_errslencnt;
-       ipath_creg cr_errtidfull;
-       ipath_creg cr_errtidvalid;
-       ipath_creg cr_errvcrccnt;
-       ipath_creg cr_ibstatuschange;
-       ipath_creg cr_intcnt;
-       ipath_creg cr_invalidrlencnt;
-       ipath_creg cr_invalidslencnt;
-       ipath_creg cr_lbflowstallcnt;
-       ipath_creg cr_iblinkdowncnt;
-       ipath_creg cr_iblinkerrrecovcnt;
-       ipath_creg cr_ibsymbolerrcnt;
-       ipath_creg cr_pktrcvcnt;
-       ipath_creg cr_pktrcvflowctrlcnt;
-       ipath_creg cr_pktsendcnt;
-       ipath_creg cr_pktsendflowcnt;
-       ipath_creg cr_portovflcnt;
-       ipath_creg cr_rcvebpcnt;
-       ipath_creg cr_rcvovflcnt;
-       ipath_creg cr_rxdroppktcnt;
-       ipath_creg cr_senddropped;
-       ipath_creg cr_sendstallcnt;
-       ipath_creg cr_sendunderruncnt;
-       ipath_creg cr_unsupvlcnt;
-       ipath_creg cr_wordrcvcnt;
-       ipath_creg cr_wordsendcnt;
-       ipath_creg cr_vl15droppedpktcnt;
-       ipath_creg cr_rxotherlocalphyerrcnt;
-       ipath_creg cr_excessbufferovflcnt;
-       ipath_creg cr_locallinkintegrityerrcnt;
-       ipath_creg cr_rxvlerrcnt;
-       ipath_creg cr_rxdlidfltrcnt;
-       ipath_creg cr_psstat;
-       ipath_creg cr_psstart;
-       ipath_creg cr_psinterval;
-       ipath_creg cr_psrcvdatacount;
-       ipath_creg cr_psrcvpktscount;
-       ipath_creg cr_psxmitdatacount;
-       ipath_creg cr_psxmitpktscount;
-       ipath_creg cr_psxmitwaitcount;
-};
-
-#endif                         /* _IPATH_REGISTERS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
deleted file mode 100644 (file)
index e541a01..0000000
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/*
- * Convert the AETH RNR timeout code into the number of milliseconds.
- */
-const u32 ib_ipath_rnr_table[32] = {
-       656,                    /* 0 */
-       1,                      /* 1 */
-       1,                      /* 2 */
-       1,                      /* 3 */
-       1,                      /* 4 */
-       1,                      /* 5 */
-       1,                      /* 6 */
-       1,                      /* 7 */
-       1,                      /* 8 */
-       1,                      /* 9 */
-       1,                      /* A */
-       1,                      /* B */
-       1,                      /* C */
-       1,                      /* D */
-       2,                      /* E */
-       2,                      /* F */
-       3,                      /* 10 */
-       4,                      /* 11 */
-       6,                      /* 12 */
-       8,                      /* 13 */
-       11,                     /* 14 */
-       16,                     /* 15 */
-       21,                     /* 16 */
-       31,                     /* 17 */
-       41,                     /* 18 */
-       62,                     /* 19 */
-       82,                     /* 1A */
-       123,                    /* 1B */
-       164,                    /* 1C */
-       246,                    /* 1D */
-       328,                    /* 1E */
-       492                     /* 1F */
-};
-
-/**
- * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
- * @qp: the QP
- *
- * Called with the QP s_lock held and interrupts disabled.
- * XXX Use a simple list for now.  We might need a priority
- * queue if we have lots of QPs waiting for RNR timeouts
- * but that should be rare.
- */
-void ipath_insert_rnr_queue(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-
-       /* We already did a spin_lock_irqsave(), so just use spin_lock */
-       spin_lock(&dev->pending_lock);
-       if (list_empty(&dev->rnrwait))
-               list_add(&qp->timerwait, &dev->rnrwait);
-       else {
-               struct list_head *l = &dev->rnrwait;
-               struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
-                                                 timerwait);
-
-               while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
-                       qp->s_rnr_timeout -= nqp->s_rnr_timeout;
-                       l = l->next;
-                       if (l->next == &dev->rnrwait) {
-                               nqp = NULL;
-                               break;
-                       }
-                       nqp = list_entry(l->next, struct ipath_qp,
-                                        timerwait);
-               }
-               if (nqp)
-                       nqp->s_rnr_timeout -= qp->s_rnr_timeout;
-               list_add(&qp->timerwait, l);
-       }
-       spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_init_sge - Validate a RWQE and fill in the SGE state
- * @qp: the QP
- *
- * Return 1 if OK.
- */
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-                  u32 *lengthp, struct ipath_sge_state *ss)
-{
-       int i, j, ret;
-       struct ib_wc wc;
-
-       *lengthp = 0;
-       for (i = j = 0; i < wqe->num_sge; i++) {
-               if (wqe->sg_list[i].length == 0)
-                       continue;
-               /* Check LKEY */
-               if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
-                       goto bad_lkey;
-               *lengthp += wqe->sg_list[i].length;
-               j++;
-       }
-       ss->num_sge = j;
-       ret = 1;
-       goto bail;
-
-bad_lkey:
-       memset(&wc, 0, sizeof(wc));
-       wc.wr_id = wqe->wr_id;
-       wc.status = IB_WC_LOC_PROT_ERR;
-       wc.opcode = IB_WC_RECV;
-       wc.qp = &qp->ibqp;
-       /* Signal solicited completion event. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return 0 if no RWQE is available, otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
-{
-       unsigned long flags;
-       struct ipath_rq *rq;
-       struct ipath_rwq *wq;
-       struct ipath_srq *srq;
-       struct ipath_rwqe *wqe;
-       void (*handler)(struct ib_event *, void *);
-       u32 tail;
-       int ret;
-
-       if (qp->ibqp.srq) {
-               srq = to_isrq(qp->ibqp.srq);
-               handler = srq->ibsrq.event_handler;
-               rq = &srq->rq;
-       } else {
-               srq = NULL;
-               handler = NULL;
-               rq = &qp->r_rq;
-       }
-
-       spin_lock_irqsave(&rq->lock, flags);
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               ret = 0;
-               goto unlock;
-       }
-
-       wq = rq->wq;
-       tail = wq->tail;
-       /* Validate tail before using it since it is user writable. */
-       if (tail >= rq->size)
-               tail = 0;
-       do {
-               if (unlikely(tail == wq->head)) {
-                       ret = 0;
-                       goto unlock;
-               }
-               /* Make sure entry is read after head index is read. */
-               smp_rmb();
-               wqe = get_rwqe_ptr(rq, tail);
-               if (++tail >= rq->size)
-                       tail = 0;
-               if (wr_id_only)
-                       break;
-               qp->r_sge.sg_list = qp->r_sg_list;
-       } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
-       qp->r_wr_id = wqe->wr_id;
-       wq->tail = tail;
-
-       ret = 1;
-       set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
-       if (handler) {
-               u32 n;
-
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               n = wq->head;
-               if (n >= rq->size)
-                       n = 0;
-               if (n < tail)
-                       n += rq->size - tail;
-               else
-                       n -= tail;
-               if (n < srq->limit) {
-                       struct ib_event ev;
-
-                       srq->limit = 0;
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       ev.device = qp->ibqp.device;
-                       ev.element.srq = qp->ibqp.srq;
-                       ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       handler(&ev, srq->ibsrq.srq_context);
-                       goto bail;
-               }
-       }
-unlock:
-       spin_unlock_irqrestore(&rq->lock, flags);
-bail:
-       return ret;
-}
-
-/**
- * ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from ipath_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send().  We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ipath_ruc_loopback(struct ipath_qp *sqp)
-{
-       struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-       struct ipath_qp *qp;
-       struct ipath_swqe *wqe;
-       struct ipath_sge *sge;
-       unsigned long flags;
-       struct ib_wc wc;
-       u64 sdata;
-       atomic64_t *maddr;
-       enum ib_wc_status send_status;
-
-       /*
-        * Note that we check the responder QP state after
-        * checking the requester's state.
-        */
-       qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
-
-       spin_lock_irqsave(&sqp->s_lock, flags);
-
-       /* Return if we are already busy processing a work request. */
-       if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-           !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-               goto unlock;
-
-       sqp->s_flags |= IPATH_S_BUSY;
-
-again:
-       if (sqp->s_last == sqp->s_head)
-               goto clr_busy;
-       wqe = get_swqe_ptr(sqp, sqp->s_last);
-
-       /* Return if it is not OK to start a new work reqeust. */
-       if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-               if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
-                       goto clr_busy;
-               /* We are in the error state, flush the work request. */
-               send_status = IB_WC_WR_FLUSH_ERR;
-               goto flush_send;
-       }
-
-       /*
-        * We can rely on the entry not changing without the s_lock
-        * being held until we update s_last.
-        * We increment s_cur to indicate s_last is in progress.
-        */
-       if (sqp->s_last == sqp->s_cur) {
-               if (++sqp->s_cur >= sqp->s_size)
-                       sqp->s_cur = 0;
-       }
-       spin_unlock_irqrestore(&sqp->s_lock, flags);
-
-       if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               /*
-                * For RC, the requester would timeout and retry so
-                * shortcut the timeouts and just signal too many retries.
-                */
-               if (sqp->ibqp.qp_type == IB_QPT_RC)
-                       send_status = IB_WC_RETRY_EXC_ERR;
-               else
-                       send_status = IB_WC_SUCCESS;
-               goto serr;
-       }
-
-       memset(&wc, 0, sizeof wc);
-       send_status = IB_WC_SUCCESS;
-
-       sqp->s_sge.sge = wqe->sg_list[0];
-       sqp->s_sge.sg_list = wqe->sg_list + 1;
-       sqp->s_sge.num_sge = wqe->wr.num_sge;
-       sqp->s_len = wqe->length;
-       switch (wqe->wr.opcode) {
-       case IB_WR_SEND_WITH_IMM:
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = wqe->wr.ex.imm_data;
-               /* FALLTHROUGH */
-       case IB_WR_SEND:
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               break;
-
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-                       goto inv_err;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = wqe->wr.ex.imm_data;
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               /* FALLTHROUGH */
-       case IB_WR_RDMA_WRITE:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-                       goto inv_err;
-               if (wqe->length == 0)
-                       break;
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
-                                           wqe->rdma_wr.remote_addr,
-                                           wqe->rdma_wr.rkey,
-                                           IB_ACCESS_REMOTE_WRITE)))
-                       goto acc_err;
-               break;
-
-       case IB_WR_RDMA_READ:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-                       goto inv_err;
-               if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
-                                           wqe->rdma_wr.remote_addr,
-                                           wqe->rdma_wr.rkey,
-                                           IB_ACCESS_REMOTE_READ)))
-                       goto acc_err;
-               qp->r_sge.sge = wqe->sg_list[0];
-               qp->r_sge.sg_list = wqe->sg_list + 1;
-               qp->r_sge.num_sge = wqe->wr.num_sge;
-               break;
-
-       case IB_WR_ATOMIC_CMP_AND_SWP:
-       case IB_WR_ATOMIC_FETCH_AND_ADD:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
-                       goto inv_err;
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-                                           wqe->atomic_wr.remote_addr,
-                                           wqe->atomic_wr.rkey,
-                                           IB_ACCESS_REMOTE_ATOMIC)))
-                       goto acc_err;
-               /* Perform atomic OP and save result. */
-               maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = wqe->atomic_wr.compare_add;
-               *(u64 *) sqp->s_sge.sge.vaddr =
-                       (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-                       (u64) atomic64_add_return(sdata, maddr) - sdata :
-                       (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     sdata, wqe->atomic_wr.swap);
-               goto send_comp;
-
-       default:
-               send_status = IB_WC_LOC_QP_OP_ERR;
-               goto serr;
-       }
-
-       sge = &sqp->s_sge.sge;
-       while (sqp->s_len) {
-               u32 len = sqp->s_len;
-
-               if (len > sge->length)
-                       len = sge->length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--sqp->s_sge.num_sge)
-                               *sge = *sqp->s_sge.sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               sqp->s_len -= len;
-       }
-
-       if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-               goto send_comp;
-
-       if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-               wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-       else
-               wc.opcode = IB_WC_RECV;
-       wc.wr_id = qp->r_wr_id;
-       wc.status = IB_WC_SUCCESS;
-       wc.byte_len = wqe->length;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = qp->remote_qpn;
-       wc.slid = qp->remote_ah_attr.dlid;
-       wc.sl = qp->remote_ah_attr.sl;
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
-       spin_lock_irqsave(&sqp->s_lock, flags);
-flush_send:
-       sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-       ipath_send_complete(sqp, wqe, send_status);
-       goto again;
-
-rnr_nak:
-       /* Handle RNR NAK */
-       if (qp->ibqp.qp_type == IB_QPT_UC)
-               goto send_comp;
-       /*
-        * Note: we don't need the s_lock held since the BUSY flag
-        * makes this single threaded.
-        */
-       if (sqp->s_rnr_retry == 0) {
-               send_status = IB_WC_RNR_RETRY_EXC_ERR;
-               goto serr;
-       }
-       if (sqp->s_rnr_retry_cnt < 7)
-               sqp->s_rnr_retry--;
-       spin_lock_irqsave(&sqp->s_lock, flags);
-       if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
-               goto clr_busy;
-       sqp->s_flags |= IPATH_S_WAITING;
-       dev->n_rnr_naks++;
-       sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
-       ipath_insert_rnr_queue(sqp);
-       goto clr_busy;
-
-inv_err:
-       send_status = IB_WC_REM_INV_REQ_ERR;
-       wc.status = IB_WC_LOC_QP_OP_ERR;
-       goto err;
-
-acc_err:
-       send_status = IB_WC_REM_ACCESS_ERR;
-       wc.status = IB_WC_LOC_PROT_ERR;
-err:
-       /* responder goes to error state */
-       ipath_rc_error(qp, wc.status);
-
-serr:
-       spin_lock_irqsave(&sqp->s_lock, flags);
-       ipath_send_complete(sqp, wqe, send_status);
-       if (sqp->ibqp.qp_type == IB_QPT_RC) {
-               int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
-               sqp->s_flags &= ~IPATH_S_BUSY;
-               spin_unlock_irqrestore(&sqp->s_lock, flags);
-               if (lastwqe) {
-                       struct ib_event ev;
-
-                       ev.device = sqp->ibqp.device;
-                       ev.element.qp = &sqp->ibqp;
-                       ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-                       sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
-               }
-               goto done;
-       }
-clr_busy:
-       sqp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
-       if (qp && atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
-{
-       if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
-           qp->ibqp.qp_type == IB_QPT_SMI) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-}
-
-/**
- * ipath_no_bufs_available - tell the layer driver we need buffers
- * @qp: the QP that caused the problem
- * @dev: the device we ran out of buffers on
- *
- * Called when we run out of PIO buffers.
- * If we are now in the error state, return zero to flush the
- * send work request.
- */
-static int ipath_no_bufs_available(struct ipath_qp *qp,
-                                   struct ipath_ibdev *dev)
-{
-       unsigned long flags;
-       int ret = 1;
-
-       /*
-        * Note that as soon as want_buffer() is called and
-        * possibly before it returns, ipath_ib_piobufavail()
-        * could be called. Therefore, put QP on the piowait list before
-        * enabling the PIO avail interrupt.
-        */
-       spin_lock_irqsave(&qp->s_lock, flags);
-       if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-               dev->n_piowait++;
-               qp->s_flags |= IPATH_S_WAITING;
-               qp->s_flags &= ~IPATH_S_BUSY;
-               spin_lock(&dev->pending_lock);
-               if (list_empty(&qp->piowait))
-                       list_add_tail(&qp->piowait, &dev->piowait);
-               spin_unlock(&dev->pending_lock);
-       } else
-               ret = 0;
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (ret)
-               want_buffer(dev->dd, qp);
-       return ret;
-}
-
-/**
- * ipath_make_grh - construct a GRH header
- * @dev: a pointer to the ipath device
- * @hdr: a pointer to the GRH header being constructed
- * @grh: the global route address to send to
- * @hwords: the number of 32 bit words of header being sent
- * @nwords: the number of 32 bit words of data being sent
- *
- * Return the size of the header in 32 bit words.
- */
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-                  struct ib_global_route *grh, u32 hwords, u32 nwords)
-{
-       hdr->version_tclass_flow =
-               cpu_to_be32((6 << 28) |
-                           (grh->traffic_class << 20) |
-                           grh->flow_label);
-       hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
-       /* next_hdr is defined by C8-7 in ch. 8.4.1 */
-       hdr->next_hdr = 0x1B;
-       hdr->hop_limit = grh->hop_limit;
-       /* The SGID is 32-bit aligned. */
-       hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-       hdr->sgid.global.interface_id = dev->dd->ipath_guid;
-       hdr->dgid = grh->dgid;
-
-       /* GRH header size in 32-bit words. */
-       return sizeof(struct ib_grh) / sizeof(u32);
-}
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                          struct ipath_other_headers *ohdr,
-                          u32 bth0, u32 bth2)
-{
-       u16 lrh0;
-       u32 nwords;
-       u32 extra_bytes;
-
-       /* Construct the header. */
-       extra_bytes = -qp->s_cur_size & 3;
-       nwords = (qp->s_cur_size + extra_bytes) >> 2;
-       lrh0 = IPATH_LRH_BTH;
-       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-                                                &qp->remote_ah_attr.grh,
-                                                qp->s_hdrwords, nwords);
-               lrh0 = IPATH_LRH_GRH;
-       }
-       lrh0 |= qp->remote_ah_attr.sl << 4;
-       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-       qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
-                                      qp->remote_ah_attr.src_path_bits);
-       bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
-       bth0 |= extra_bytes << 20;
-       ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
-       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(bth2);
-}
-
-/**
- * ipath_do_send - perform a send on a QP
- * @data: contains a pointer to the QP
- *
- * Process entries in the send work queue until credit or queue is
- * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, two threads could send packets out of order.
- */
-void ipath_do_send(unsigned long data)
-{
-       struct ipath_qp *qp = (struct ipath_qp *)data;
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       int (*make_req)(struct ipath_qp *qp);
-       unsigned long flags;
-
-       if ((qp->ibqp.qp_type == IB_QPT_RC ||
-            qp->ibqp.qp_type == IB_QPT_UC) &&
-           qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
-               ipath_ruc_loopback(qp);
-               goto bail;
-       }
-
-       if (qp->ibqp.qp_type == IB_QPT_RC)
-              make_req = ipath_make_rc_req;
-       else if (qp->ibqp.qp_type == IB_QPT_UC)
-              make_req = ipath_make_uc_req;
-       else
-              make_req = ipath_make_ud_req;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Return if we are already busy processing a work request. */
-       if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-           !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               goto bail;
-       }
-
-       qp->s_flags |= IPATH_S_BUSY;
-
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-again:
-       /* Check for a constructed packet to be sent. */
-       if (qp->s_hdrwords != 0) {
-               /*
-                * If no PIO bufs are available, return.  An interrupt will
-                * call ipath_ib_piobufavail() when one is available.
-                */
-               if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
-                                    qp->s_cur_sge, qp->s_cur_size)) {
-                       if (ipath_no_bufs_available(qp, dev))
-                               goto bail;
-               }
-               dev->n_unicast_xmit++;
-               /* Record that we sent the packet and s_hdr is empty. */
-               qp->s_hdrwords = 0;
-       }
-
-       if (make_req(qp))
-               goto again;
-
-bail:;
-}
-
-/*
- * This should be called with s_lock held.
- */
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-                        enum ib_wc_status status)
-{
-       u32 old_last, last;
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-               return;
-
-       /* See ch. 11.2.4.1 and 10.7.3.1 */
-       if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-           (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-           status != IB_WC_SUCCESS) {
-               struct ib_wc wc;
-
-               memset(&wc, 0, sizeof wc);
-               wc.wr_id = wqe->wr.wr_id;
-               wc.status = status;
-               wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-               wc.qp = &qp->ibqp;
-               if (status == IB_WC_SUCCESS)
-                       wc.byte_len = wqe->length;
-               ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-                              status != IB_WC_SUCCESS);
-       }
-
-       old_last = last = qp->s_last;
-       if (++last >= qp->s_size)
-               last = 0;
-       qp->s_last = last;
-       if (qp->s_cur == old_last)
-               qp->s_cur = last;
-       if (qp->s_tail == old_last)
-               qp->s_tail = last;
-       if (qp->state == IB_QPS_SQD && last == qp->s_cur)
-               qp->s_draining = 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sdma.c b/drivers/staging/rdma/ipath/ipath_sdma.c
deleted file mode 100644 (file)
index 1ffc06a..0000000
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-#include <linux/gfp.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
-
-static void vl15_watchdog_enq(struct ipath_devdata *dd)
-{
-       /* ipath_sdma_lock must already be held */
-       if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
-               unsigned long interval = (HZ + 19) / 20;
-               dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
-               add_timer(&dd->ipath_sdma_vl15_timer);
-       }
-}
-
-static void vl15_watchdog_deq(struct ipath_devdata *dd)
-{
-       /* ipath_sdma_lock must already be held */
-       if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
-               unsigned long interval = (HZ + 19) / 20;
-               mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
-       } else {
-               del_timer(&dd->ipath_sdma_vl15_timer);
-       }
-}
-
-static void vl15_watchdog_timeout(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
-               ipath_dbg("vl15 watchdog timeout - clearing\n");
-               ipath_cancel_sends(dd, 1);
-               ipath_hol_down(dd);
-       } else {
-               ipath_dbg("vl15 watchdog timeout - "
-                         "condition already cleared\n");
-       }
-}
-
-static void unmap_desc(struct ipath_devdata *dd, unsigned head)
-{
-       __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
-       u64 desc[2];
-       dma_addr_t addr;
-       size_t len;
-
-       desc[0] = le64_to_cpu(descqp[0]);
-       desc[1] = le64_to_cpu(descqp[1]);
-
-       addr = (desc[1] << 32) | (desc[0] >> 32);
-       len = (desc[0] >> 14) & (0x7ffULL << 2);
-       dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
-}
-
-/*
- * ipath_sdma_lock should be locked before calling this.
- */
-int ipath_sdma_make_progress(struct ipath_devdata *dd)
-{
-       struct list_head *lp = NULL;
-       struct ipath_sdma_txreq *txp = NULL;
-       u16 dmahead;
-       u16 start_idx = 0;
-       int progress = 0;
-
-       if (!list_empty(&dd->ipath_sdma_activelist)) {
-               lp = dd->ipath_sdma_activelist.next;
-               txp = list_entry(lp, struct ipath_sdma_txreq, list);
-               start_idx = txp->start_idx;
-       }
-
-       /*
-        * Read the SDMA head register in order to know that the
-        * interrupt clear has been written to the chip.
-        * Otherwise, we may not get an interrupt for the last
-        * descriptor in the queue.
-        */
-       dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
-       /* sanity check return value for error handling (chip reset, etc.) */
-       if (dmahead >= dd->ipath_sdma_descq_cnt)
-               goto done;
-
-       while (dd->ipath_sdma_descq_head != dmahead) {
-               if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
-                   dd->ipath_sdma_descq_head == start_idx) {
-                       unmap_desc(dd, dd->ipath_sdma_descq_head);
-                       start_idx++;
-                       if (start_idx == dd->ipath_sdma_descq_cnt)
-                               start_idx = 0;
-               }
-
-               /* increment free count and head */
-               dd->ipath_sdma_descq_removed++;
-               if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
-                       dd->ipath_sdma_descq_head = 0;
-
-               if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
-                       /* move to notify list */
-                       if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                               vl15_watchdog_deq(dd);
-                       list_move_tail(lp, &dd->ipath_sdma_notifylist);
-                       if (!list_empty(&dd->ipath_sdma_activelist)) {
-                               lp = dd->ipath_sdma_activelist.next;
-                               txp = list_entry(lp, struct ipath_sdma_txreq,
-                                                list);
-                               start_idx = txp->start_idx;
-                       } else {
-                               lp = NULL;
-                               txp = NULL;
-                       }
-               }
-               progress = 1;
-       }
-
-       if (progress)
-               tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-done:
-       return progress;
-}
-
-static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
-{
-       struct ipath_sdma_txreq *txp, *txp_next;
-
-       list_for_each_entry_safe(txp, txp_next, list, list) {
-               list_del_init(&txp->list);
-
-               if (txp->callback)
-                       (*txp->callback)(txp->callback_cookie,
-                                        txp->callback_status);
-       }
-}
-
-static void sdma_notify_taskbody(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       struct list_head list;
-
-       INIT_LIST_HEAD(&list);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       list_splice_init(&dd->ipath_sdma_notifylist, &list);
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       ipath_sdma_notify(dd, &list);
-
-       /*
-        * The IB verbs layer needs to see the callback before getting
-        * the call to ipath_ib_piobufavail() because the callback
-        * handles releasing resources the next send will need.
-        * Otherwise, we could do these calls in
-        * ipath_sdma_make_progress().
-        */
-       ipath_ib_piobufavail(dd->verbs_dev);
-}
-
-static void sdma_notify_task(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               sdma_notify_taskbody(dd);
-}
-
-static void dump_sdma_state(struct ipath_devdata *dd)
-{
-       unsigned long reg;
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
-       ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
-       ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-       ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-       ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
-}
-
-static void sdma_abort_task(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-       u64 status;
-       unsigned long flags;
-
-       if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               return;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
-
-       /* nothing to do */
-       if (status == IPATH_SDMA_ABORT_NONE)
-               goto unlock;
-
-       /* ipath_sdma_abort() is done, waiting for interrupt */
-       if (status == IPATH_SDMA_ABORT_DISARMED) {
-               if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
-                       goto resched_noprint;
-               /* give up, intr got lost somewhere */
-               ipath_dbg("give up waiting for SDMADISABLED intr\n");
-               __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               status = IPATH_SDMA_ABORT_ABORTED;
-       }
-
-       /* everything is stopped, time to clean up and restart */
-       if (status == IPATH_SDMA_ABORT_ABORTED) {
-               struct ipath_sdma_txreq *txp, *txpnext;
-               u64 hwstatus;
-               int notify = 0;
-
-               hwstatus = ipath_read_kreg64(dd,
-                               dd->ipath_kregs->kr_senddmastatus);
-
-               if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
-                                IPATH_SDMA_STATUS_ABORT_IN_PROG             |
-                                IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
-                   !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
-                       if (dd->ipath_sdma_reset_wait > 0) {
-                               /* not done shutting down sdma */
-                               --dd->ipath_sdma_reset_wait;
-                               goto resched;
-                       }
-                       ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
-                               "status after SDMA reset, continuing\n");
-                       dump_sdma_state(dd);
-               }
-
-               /* dequeue all "sent" requests */
-               list_for_each_entry_safe(txp, txpnext,
-                                        &dd->ipath_sdma_activelist, list) {
-                       txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
-                       if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                               vl15_watchdog_deq(dd);
-                       list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-                       notify = 1;
-               }
-               if (notify)
-                       tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-               /* reset our notion of head and tail */
-               dd->ipath_sdma_descq_tail = 0;
-               dd->ipath_sdma_descq_head = 0;
-               dd->ipath_sdma_head_dma[0] = 0;
-               dd->ipath_sdma_generation = 0;
-               dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
-
-               /* Reset SendDmaLenGen */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
-                       (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
-
-               /* done with sdma state for a bit */
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-               /*
-                * Don't restart sdma here (with the exception
-                * below). Wait until link is up to ACTIVE.  VL15 MADs
-                * used to bring the link up use PIO, and multiple link
-                * transitions otherwise cause the sdma engine to be
-                * stopped and started multiple times.
-                * The disable is done here, including the shadow,
-                * so the state is kept consistent.
-                * See ipath_restart_sdma() for the actual starting
-                * of sdma.
-                */
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-               /* make sure I see next message */
-               dd->ipath_sdma_abort_jiffies = 0;
-
-               /*
-                * Not everything that takes SDMA offline is a link
-                * status change.  If the link was up, restart SDMA.
-                */
-               if (dd->ipath_flags & IPATH_LINKACTIVE)
-                       ipath_restart_sdma(dd);
-
-               goto done;
-       }
-
-resched:
-       /*
-        * for now, keep spinning
-        * JAG - this is bad to just have default be a loop without
-        * state change
-        */
-       if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
-               ipath_dbg("looping with status 0x%08lx\n",
-                         dd->ipath_sdma_status);
-               dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
-       }
-resched_noprint:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-       return;
-
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-done:
-       return;
-}
-
-/*
- * This is called from interrupt context.
- */
-void ipath_sdma_intr(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       (void) ipath_sdma_make_progress(dd);
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-}
-
-static int alloc_sdma(struct ipath_devdata *dd)
-{
-       int ret = 0;
-
-       /* Allocate memory for SendDMA descriptor FIFO */
-       dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
-               SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
-
-       if (!dd->ipath_sdma_descq) {
-               ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
-                       "FIFO memory\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       dd->ipath_sdma_descq_cnt =
-               SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
-
-       /* Allocate memory for DMA of head register to memory */
-       dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
-               PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
-       if (!dd->ipath_sdma_head_dma) {
-               ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
-               ret = -ENOMEM;
-               goto cleanup_descq;
-       }
-       dd->ipath_sdma_head_dma[0] = 0;
-
-       setup_timer(&dd->ipath_sdma_vl15_timer, vl15_watchdog_timeout,
-                       (unsigned long)dd);
-
-       atomic_set(&dd->ipath_sdma_vl15_count, 0);
-
-       goto done;
-
-cleanup_descq:
-       dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-               (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
-       dd->ipath_sdma_descq = NULL;
-       dd->ipath_sdma_descq_phys = 0;
-done:
-       return ret;
-}
-
-int setup_sdma(struct ipath_devdata *dd)
-{
-       int ret = 0;
-       unsigned i, n;
-       u64 tmp64;
-       u64 senddmabufmask[3] = { 0 };
-       unsigned long flags;
-
-       ret = alloc_sdma(dd);
-       if (ret)
-               goto done;
-
-       if (!dd->ipath_sdma_descq) {
-               ipath_dev_err(dd, "SendDMA memory not allocated\n");
-               goto done;
-       }
-
-       /*
-        * Set initial status as if we had been up, then gone down.
-        * This lets initial start on transition to ACTIVE be the
-        * same as restart after link flap.
-        */
-       dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
-       dd->ipath_sdma_abort_jiffies = 0;
-       dd->ipath_sdma_generation = 0;
-       dd->ipath_sdma_descq_tail = 0;
-       dd->ipath_sdma_descq_head = 0;
-       dd->ipath_sdma_descq_removed = 0;
-       dd->ipath_sdma_descq_added = 0;
-
-       /* Set SendDmaBase */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
-                        dd->ipath_sdma_descq_phys);
-       /* Set SendDmaLenGen */
-       tmp64 = dd->ipath_sdma_descq_cnt;
-       tmp64 |= 1<<18; /* enable generation checking */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
-       /* Set SendDmaTail */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
-                        dd->ipath_sdma_descq_tail);
-       /* Set SendDmaHeadAddr */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
-                        dd->ipath_sdma_head_phys);
-
-       /*
-        * Reserve all the former "kernel" piobufs, using high number range
-        * so we get as many 4K buffers as possible
-        */
-       n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
-       ipath_chg_pioavailkernel(dd, i, n - i , 0);
-       for (; i < n; ++i) {
-               unsigned word = i / 64;
-               unsigned bit = i & 63;
-               BUG_ON(word >= 3);
-               senddmabufmask[word] |= 1ULL << bit;
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
-                        senddmabufmask[0]);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
-                        senddmabufmask[1]);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
-                        senddmabufmask[2]);
-
-       INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
-       INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
-
-       tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
-                    (unsigned long) dd);
-       tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
-                    (unsigned long) dd);
-
-       /*
-        * No use to turn on SDMA here, as link is probably not ACTIVE
-        * Just mark it RUNNING and enable the interrupt, and let the
-        * ipath_restart_sdma() on link transition to ACTIVE actually
-        * enable it.
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-done:
-       return ret;
-}
-
-void teardown_sdma(struct ipath_devdata *dd)
-{
-       struct ipath_sdma_txreq *txp, *txpnext;
-       unsigned long flags;
-       dma_addr_t sdma_head_phys = 0;
-       dma_addr_t sdma_descq_phys = 0;
-       void *sdma_descq = NULL;
-       void *sdma_head_dma = NULL;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-       __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       tasklet_kill(&dd->ipath_sdma_abort_task);
-       tasklet_kill(&dd->ipath_sdma_notify_task);
-
-       /* turn off sdma */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-               dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       /* dequeue all "sent" requests */
-       list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
-                                list) {
-               txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
-               if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                       vl15_watchdog_deq(dd);
-               list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       sdma_notify_taskbody(dd);
-
-       del_timer_sync(&dd->ipath_sdma_vl15_timer);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       dd->ipath_sdma_abort_jiffies = 0;
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
-
-       if (dd->ipath_sdma_head_dma) {
-               sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
-               sdma_head_phys = dd->ipath_sdma_head_phys;
-               dd->ipath_sdma_head_dma = NULL;
-               dd->ipath_sdma_head_phys = 0;
-       }
-
-       if (dd->ipath_sdma_descq) {
-               sdma_descq = dd->ipath_sdma_descq;
-               sdma_descq_phys = dd->ipath_sdma_descq_phys;
-               dd->ipath_sdma_descq = NULL;
-               dd->ipath_sdma_descq_phys = 0;
-       }
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       if (sdma_head_dma)
-               dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                 sdma_head_dma, sdma_head_phys);
-
-       if (sdma_descq)
-               dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-                                 sdma_descq, sdma_descq_phys);
-}
-
-/*
- * [Re]start SDMA, if we use it, and it's not already OK.
- * This is called on transition to link ACTIVE, either the first or
- * subsequent times.
- */
-void ipath_restart_sdma(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int needed = 1;
-
-       if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-               goto bail;
-
-       /*
-        * First, make sure we should, which is to say,
-        * check that we are "RUNNING" (not in teardown)
-        * and not "SHUTDOWN"
-        */
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
-               || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       needed = 0;
-       else {
-               __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-               __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!needed) {
-               ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
-                       dd->ipath_sdma_status);
-               goto bail;
-       }
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       /*
-        * First clear, just to be safe. Enable is only done
-        * in chip on 0->1 transition
-        */
-       dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /* notify upper layers */
-       ipath_ib_piobufavail(dd->verbs_dev);
-
-bail:
-       return;
-}
-
-static inline void make_sdma_desc(struct ipath_devdata *dd,
-       u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
-{
-       WARN_ON(addr & 3);
-       /* SDmaPhyAddr[47:32] */
-       sdmadesc[1] = addr >> 32;
-       /* SDmaPhyAddr[31:0] */
-       sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
-       /* SDmaGeneration[1:0] */
-       sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
-       /* SDmaDwordCount[10:0] */
-       sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
-       /* SDmaBufOffset[12:2] */
-       sdmadesc[0] |= dwoffset & 0x7ffULL;
-}
-
-/*
- * This function queues one IB packet onto the send DMA queue per call.
- * The caller is responsible for checking:
- * 1) The number of send DMA descriptor entries is less than the size of
- *    the descriptor queue.
- * 2) The IB SGE addresses and lengths are 32-bit aligned
- *    (except possibly the last SGE's length)
- * 3) The SGE addresses are suitable for passing to dma_map_single().
- */
-int ipath_sdma_verbs_send(struct ipath_devdata *dd,
-       struct ipath_sge_state *ss, u32 dwords,
-       struct ipath_verbs_txreq *tx)
-{
-
-       unsigned long flags;
-       struct ipath_sge *sge;
-       int ret = 0;
-       u16 tail;
-       __le64 *descqp;
-       u64 sdmadesc[2];
-       u32 dwoffset;
-       dma_addr_t addr;
-
-       if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
-               ipath_dbg("packet size %X > ibmax %X, fail\n",
-                       tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
-               ret = -EMSGSIZE;
-               goto fail;
-       }
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-retry:
-       if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
-               ret = -EBUSY;
-               goto unlock;
-       }
-
-       if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
-               if (ipath_sdma_make_progress(dd))
-                       goto retry;
-               ret = -ENOBUFS;
-               goto unlock;
-       }
-
-       addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
-                             tx->map_len, DMA_TO_DEVICE);
-       if (dma_mapping_error(&dd->pcidev->dev, addr))
-               goto ioerr;
-
-       dwoffset = tx->map_len >> 2;
-       make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
-
-       /* SDmaFirstDesc */
-       sdmadesc[0] |= 1ULL << 12;
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-               sdmadesc[0] |= 1ULL << 14;      /* SDmaUseLargeBuf */
-
-       /* write to the descq */
-       tail = dd->ipath_sdma_descq_tail;
-       descqp = &dd->ipath_sdma_descq[tail].qw[0];
-       *descqp++ = cpu_to_le64(sdmadesc[0]);
-       *descqp++ = cpu_to_le64(sdmadesc[1]);
-
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
-               tx->txreq.start_idx = tail;
-
-       /* increment the tail */
-       if (++tail == dd->ipath_sdma_descq_cnt) {
-               tail = 0;
-               descqp = &dd->ipath_sdma_descq[0].qw[0];
-               ++dd->ipath_sdma_generation;
-       }
-
-       sge = &ss->sge;
-       while (dwords) {
-               u32 dw;
-               u32 len;
-
-               len = dwords << 2;
-               if (len > sge->length)
-                       len = sge->length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               dw = (len + 3) >> 2;
-               addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
-                                     DMA_TO_DEVICE);
-               if (dma_mapping_error(&dd->pcidev->dev, addr))
-                       goto unmap;
-               make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
-               /* SDmaUseLargeBuf has to be set in every descriptor */
-               if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-                       sdmadesc[0] |= 1ULL << 14;
-               /* write to the descq */
-               *descqp++ = cpu_to_le64(sdmadesc[0]);
-               *descqp++ = cpu_to_le64(sdmadesc[1]);
-
-               /* increment the tail */
-               if (++tail == dd->ipath_sdma_descq_cnt) {
-                       tail = 0;
-                       descqp = &dd->ipath_sdma_descq[0].qw[0];
-                       ++dd->ipath_sdma_generation;
-               }
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-
-               dwoffset += dw;
-               dwords -= dw;
-       }
-
-       if (!tail)
-               descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
-       descqp -= 2;
-       /* SDmaLastDesc */
-       descqp[0] |= cpu_to_le64(1ULL << 11);
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
-               /* SDmaIntReq */
-               descqp[0] |= cpu_to_le64(1ULL << 15);
-       }
-
-       /* Commit writes to memory and advance the tail on the chip */
-       wmb();
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-
-       tx->txreq.next_descq_idx = tail;
-       tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
-       dd->ipath_sdma_descq_tail = tail;
-       dd->ipath_sdma_descq_added += tx->txreq.sg_count;
-       list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
-               vl15_watchdog_enq(dd);
-       goto unlock;
-
-unmap:
-       while (tail != dd->ipath_sdma_descq_tail) {
-               if (!tail)
-                       tail = dd->ipath_sdma_descq_cnt - 1;
-               else
-                       tail--;
-               unmap_desc(dd, tail);
-       }
-ioerr:
-       ret = -EIO;
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-fail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_srq.c b/drivers/staging/rdma/ipath/ipath_srq.c
deleted file mode 100644 (file)
index 2627198..0000000
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-                          struct ib_recv_wr **bad_wr)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_rwq *wq;
-       unsigned long flags;
-       int ret;
-
-       for (; wr; wr = wr->next) {
-               struct ipath_rwqe *wqe;
-               u32 next;
-               int i;
-
-               if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-                       *bad_wr = wr;
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               spin_lock_irqsave(&srq->rq.lock, flags);
-               wq = srq->rq.wq;
-               next = wq->head + 1;
-               if (next >= srq->rq.size)
-                       next = 0;
-               if (next == wq->tail) {
-                       spin_unlock_irqrestore(&srq->rq.lock, flags);
-                       *bad_wr = wr;
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               wqe = get_rwqe_ptr(&srq->rq, wq->head);
-               wqe->wr_id = wr->wr_id;
-               wqe->num_sge = wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++)
-                       wqe->sg_list[i] = wr->sg_list[i];
-               /* Make sure queue entry is written before the head index. */
-               smp_wmb();
-               wq->head = next;
-               spin_unlock_irqrestore(&srq->rq.lock, flags);
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libipathverbs when creating a user SRQ
- */
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-                               struct ib_srq_init_attr *srq_init_attr,
-                               struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibpd->device);
-       struct ipath_srq *srq;
-       u32 sz;
-       struct ib_srq *ret;
-
-       if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-               ret = ERR_PTR(-ENOSYS);
-               goto done;
-       }
-
-       if (srq_init_attr->attr.max_wr == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
-           (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-       if (!srq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto done;
-       }
-
-       /*
-        * Need to use vmalloc() if we want to support large #s of entries.
-        */
-       srq->rq.size = srq_init_attr->attr.max_wr + 1;
-       srq->rq.max_sge = srq_init_attr->attr.max_sge;
-       sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-               sizeof(struct ipath_rwqe);
-       srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
-       if (!srq->rq.wq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_srq;
-       }
-
-       /*
-        * Return the address of the RWQ as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               int err;
-               u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
-
-               srq->ip =
-                   ipath_create_mmap_info(dev, s,
-                                          ibpd->uobject->context,
-                                          srq->rq.wq);
-               if (!srq->ip) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_wq;
-               }
-
-               err = ib_copy_to_udata(udata, &srq->ip->offset,
-                                      sizeof(srq->ip->offset));
-               if (err) {
-                       ret = ERR_PTR(err);
-                       goto bail_ip;
-               }
-       } else
-               srq->ip = NULL;
-
-       /*
-        * ib_create_srq() will initialize srq->ibsrq.
-        */
-       spin_lock_init(&srq->rq.lock);
-       srq->rq.wq->head = 0;
-       srq->rq.wq->tail = 0;
-       srq->limit = srq_init_attr->attr.srq_limit;
-
-       spin_lock(&dev->n_srqs_lock);
-       if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
-               spin_unlock(&dev->n_srqs_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_srqs_allocated++;
-       spin_unlock(&dev->n_srqs_lock);
-
-       if (srq->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = &srq->ibsrq;
-       goto done;
-
-bail_ip:
-       kfree(srq->ip);
-bail_wq:
-       vfree(srq->rq.wq);
-bail_srq:
-       kfree(srq);
-done:
-       return ret;
-}
-
-/**
- * ipath_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for ipathverbs.so
- */
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask,
-                    struct ib_udata *udata)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_rwq *wq;
-       int ret = 0;
-
-       if (attr_mask & IB_SRQ_MAX_WR) {
-               struct ipath_rwq *owq;
-               struct ipath_rwqe *p;
-               u32 sz, size, n, head, tail;
-
-               /* Check that the requested sizes are below the limits. */
-               if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-                   ((attr_mask & IB_SRQ_LIMIT) ?
-                    attr->srq_limit : srq->limit) > attr->max_wr) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               sz = sizeof(struct ipath_rwqe) +
-                       srq->rq.max_sge * sizeof(struct ib_sge);
-               size = attr->max_wr + 1;
-               wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
-               if (!wq) {
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               /* Check that we can write the offset to mmap. */
-               if (udata && udata->inlen >= sizeof(__u64)) {
-                       __u64 offset_addr;
-                       __u64 offset = 0;
-
-                       ret = ib_copy_from_udata(&offset_addr, udata,
-                                                sizeof(offset_addr));
-                       if (ret)
-                               goto bail_free;
-                       udata->outbuf =
-                               (void __user *) (unsigned long) offset_addr;
-                       ret = ib_copy_to_udata(udata, &offset,
-                                              sizeof(offset));
-                       if (ret)
-                               goto bail_free;
-               }
-
-               spin_lock_irq(&srq->rq.lock);
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               owq = srq->rq.wq;
-               head = owq->head;
-               if (head >= srq->rq.size)
-                       head = 0;
-               tail = owq->tail;
-               if (tail >= srq->rq.size)
-                       tail = 0;
-               n = head;
-               if (n < tail)
-                       n += srq->rq.size - tail;
-               else
-                       n -= tail;
-               if (size <= n) {
-                       ret = -EINVAL;
-                       goto bail_unlock;
-               }
-               n = 0;
-               p = wq->wq;
-               while (tail != head) {
-                       struct ipath_rwqe *wqe;
-                       int i;
-
-                       wqe = get_rwqe_ptr(&srq->rq, tail);
-                       p->wr_id = wqe->wr_id;
-                       p->num_sge = wqe->num_sge;
-                       for (i = 0; i < wqe->num_sge; i++)
-                               p->sg_list[i] = wqe->sg_list[i];
-                       n++;
-                       p = (struct ipath_rwqe *)((char *) p + sz);
-                       if (++tail >= srq->rq.size)
-                               tail = 0;
-               }
-               srq->rq.wq = wq;
-               srq->rq.size = size;
-               wq->head = n;
-               wq->tail = 0;
-               if (attr_mask & IB_SRQ_LIMIT)
-                       srq->limit = attr->srq_limit;
-               spin_unlock_irq(&srq->rq.lock);
-
-               vfree(owq);
-
-               if (srq->ip) {
-                       struct ipath_mmap_info *ip = srq->ip;
-                       struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
-                       u32 s = sizeof(struct ipath_rwq) + size * sz;
-
-                       ipath_update_mmap_info(dev, ip, s, wq);
-
-                       /*
-                        * Return the offset to mmap.
-                        * See ipath_mmap() for details.
-                        */
-                       if (udata && udata->inlen >= sizeof(__u64)) {
-                               ret = ib_copy_to_udata(udata, &ip->offset,
-                                                      sizeof(ip->offset));
-                               if (ret)
-                                       goto bail;
-                       }
-
-                       spin_lock_irq(&dev->pending_lock);
-                       if (list_empty(&ip->pending_mmaps))
-                               list_add(&ip->pending_mmaps,
-                                        &dev->pending_mmaps);
-                       spin_unlock_irq(&dev->pending_lock);
-               }
-       } else if (attr_mask & IB_SRQ_LIMIT) {
-               spin_lock_irq(&srq->rq.lock);
-               if (attr->srq_limit >= srq->rq.size)
-                       ret = -EINVAL;
-               else
-                       srq->limit = attr->srq_limit;
-               spin_unlock_irq(&srq->rq.lock);
-       }
-       goto bail;
-
-bail_unlock:
-       spin_unlock_irq(&srq->rq.lock);
-bail_free:
-       vfree(wq);
-bail:
-       return ret;
-}
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-
-       attr->max_wr = srq->rq.size - 1;
-       attr->max_sge = srq->rq.max_sge;
-       attr->srq_limit = srq->limit;
-       return 0;
-}
-
-/**
- * ipath_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int ipath_destroy_srq(struct ib_srq *ibsrq)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_ibdev *dev = to_idev(ibsrq->device);
-
-       spin_lock(&dev->n_srqs_lock);
-       dev->n_srqs_allocated--;
-       spin_unlock(&dev->n_srqs_lock);
-       if (srq->ip)
-               kref_put(&srq->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(srq->rq.wq);
-       kfree(srq);
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_stats.c b/drivers/staging/rdma/ipath/ipath_stats.c
deleted file mode 100644 (file)
index f63e143..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_kernel.h"
-
-struct infinipath_stats ipath_stats;
-
-/**
- * ipath_snap_cntr - snapshot a chip counter
- * @dd: the infinipath device
- * @creg: the counter to snapshot
- *
- * called from add_timer and user counter read calls, to deal with
- * counters that wrap in "human time".  The words sent and received, and
- * the packets sent and received are all that we worry about.  For now,
- * at least, we don't worry about error counters, because if they wrap
- * that quickly, we probably don't care.  We may eventually just make this
- * handle all the counters.  word counters can wrap in about 20 seconds
- * of full bandwidth traffic, packet counters in a few hours.
- */
-
-u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
-{
-       u32 val, reg64 = 0;
-       u64 val64;
-       unsigned long t0, t1;
-       u64 ret;
-
-       t0 = jiffies;
-       /* If fast increment counters are only 32 bits, snapshot them,
-        * and maintain them as 64bit values in the driver */
-       if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
-           (creg == dd->ipath_cregs->cr_wordsendcnt ||
-            creg == dd->ipath_cregs->cr_wordrcvcnt ||
-            creg == dd->ipath_cregs->cr_pktsendcnt ||
-            creg == dd->ipath_cregs->cr_pktrcvcnt)) {
-               val64 = ipath_read_creg(dd, creg);
-               val = val64 == ~0ULL ? ~0U : 0;
-               reg64 = 1;
-       } else                  /* val64 just to keep gcc quiet... */
-               val64 = val = ipath_read_creg32(dd, creg);
-       /*
-        * See if a second has passed.  This is just a way to detect things
-        * that are quite broken.  Normally this should take just a few
-        * cycles (the check is for long enough that we don't care if we get
-        * pre-empted.)  An Opteron HT O read timeout is 4 seconds with
-        * normal NB values
-        */
-       t1 = jiffies;
-       if (time_before(t0 + HZ, t1) && val == -1) {
-               ipath_dev_err(dd, "Error!  Read counter 0x%x timed out\n",
-                             creg);
-               ret = 0ULL;
-               goto bail;
-       }
-       if (reg64) {
-               ret = val64;
-               goto bail;
-       }
-
-       if (creg == dd->ipath_cregs->cr_wordsendcnt) {
-               if (val != dd->ipath_lastsword) {
-                       dd->ipath_sword += val - dd->ipath_lastsword;
-                       dd->ipath_lastsword = val;
-               }
-               val64 = dd->ipath_sword;
-       } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
-               if (val != dd->ipath_lastrword) {
-                       dd->ipath_rword += val - dd->ipath_lastrword;
-                       dd->ipath_lastrword = val;
-               }
-               val64 = dd->ipath_rword;
-       } else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
-               if (val != dd->ipath_lastspkts) {
-                       dd->ipath_spkts += val - dd->ipath_lastspkts;
-                       dd->ipath_lastspkts = val;
-               }
-               val64 = dd->ipath_spkts;
-       } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
-               if (val != dd->ipath_lastrpkts) {
-                       dd->ipath_rpkts += val - dd->ipath_lastrpkts;
-                       dd->ipath_lastrpkts = val;
-               }
-               val64 = dd->ipath_rpkts;
-       } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
-               if (dd->ibdeltainprog)
-                       val64 -= val64 - dd->ibsymsnap;
-               val64 -= dd->ibsymdelta;
-       } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
-               if (dd->ibdeltainprog)
-                       val64 -= val64 - dd->iblnkerrsnap;
-               val64 -= dd->iblnkerrdelta;
-       } else
-               val64 = (u64) val;
-
-       ret = val64;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
- * @dd: the infinipath device
- *
- * print the delta of egrfull/hdrqfull errors for kernel ports no more than
- * every 5 seconds.  User processes are printed at close, but kernel doesn't
- * close, so...  Separate routine so may call from other places someday, and
- * so function name when printed by _IPATH_INFO is meaningfull
- */
-static void ipath_qcheck(struct ipath_devdata *dd)
-{
-       static u64 last_tot_hdrqfull;
-       struct ipath_portdata *pd = dd->ipath_pd[0];
-       size_t blen = 0;
-       char buf[128];
-       u32 hdrqtail;
-
-       *buf = 0;
-       if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
-               blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
-                               pd->port_hdrqfull -
-                               dd->ipath_p0_hdrqfull);
-               dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
-       }
-       if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
-               blen += snprintf(buf + blen, sizeof buf - blen,
-                                "%srcvegrfull %llu",
-                                blen ? ", " : "",
-                                (unsigned long long)
-                                (ipath_stats.sps_etidfull -
-                                 dd->ipath_last_tidfull));
-               dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
-       }
-
-       /*
-        * this is actually the number of hdrq full interrupts, not actual
-        * events, but at the moment that's mostly what I'm interested in.
-        * Actual count, etc. is in the counters, if needed.  For production
-        * users this won't ordinarily be printed.
-        */
-
-       if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
-           ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
-               blen += snprintf(buf + blen, sizeof buf - blen,
-                                "%shdrqfull %llu (all ports)",
-                                blen ? ", " : "",
-                                (unsigned long long)
-                                (ipath_stats.sps_hdrqfull -
-                                 last_tot_hdrqfull));
-               last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
-       }
-       if (blen)
-               ipath_dbg("%s\n", buf);
-
-       hdrqtail = ipath_get_hdrqtail(pd);
-       if (pd->port_head != hdrqtail) {
-               if (dd->ipath_lastport0rcv_cnt ==
-                   ipath_stats.sps_port0pkts) {
-                       ipath_cdbg(PKT, "missing rcv interrupts? "
-                                  "port0 hd=%x tl=%x; port0pkts %llx; write"
-                                  " hd (w/intr)\n",
-                                  pd->port_head, hdrqtail,
-                                  (unsigned long long)
-                                  ipath_stats.sps_port0pkts);
-                       ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
-                               dd->ipath_rhdrhead_intr_off, pd->port_port);
-               }
-               dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
-       }
-}
-
-static void ipath_chk_errormask(struct ipath_devdata *dd)
-{
-       static u32 fixed;
-       u32 ctrl;
-       unsigned long errormask;
-       unsigned long hwerrs;
-
-       if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-
-       if (errormask == dd->ipath_errormask)
-               return;
-       fixed++;
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-
-       if ((hwerrs & dd->ipath_hwerrmask) ||
-               (ctrl & INFINIPATH_C_FREEZEMODE)) {
-               /* force re-interrupt of pending events, just in case */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-               dev_info(&dd->pcidev->dev,
-                       "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
-                       fixed, errormask, (unsigned long)dd->ipath_errormask,
-                       ctrl, hwerrs);
-       } else
-               ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
-                       fixed, errormask,
-                       (unsigned long)dd->ipath_errormask);
-}
-
-
-/**
- * ipath_get_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the infinipath device ipath_devdata
- *
- * called from add_timer
- */
-void ipath_get_faststats(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-       int i;
-       static unsigned cnt;
-       unsigned long flags;
-       u64 traffic_wds;
-
-       /*
-        * don't access the chip while running diags, or memory diags can
-        * fail
-        */
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
-           ipath_diag_inuse)
-               /* but re-arm the timer, for diags case; won't hurt other */
-               goto done;
-
-       /*
-        * We now try to maintain a "active timer", based on traffic
-        * exceeding a threshold, so we need to check the word-counts
-        * even if they are 64-bit.
-        */
-       traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       traffic_wds -= dd->ipath_traffic_wds;
-       dd->ipath_traffic_wds += traffic_wds;
-       if (traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
-               atomic_add(5, &dd->ipath_active_time); /* S/B #define */
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-
-       if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       }
-
-       ipath_qcheck(dd);
-
-       /*
-        * deal with repeat error suppression.  Doesn't really matter if
-        * last error was almost a full interval ago, or just a few usecs
-        * ago; still won't get more than 2 per interval.  We may want
-        * longer intervals for this eventually, could do with mod, counter
-        * or separate timer.  Also see code in ipath_handle_errors() and
-        * ipath_handle_hwerrors().
-        */
-
-       if (dd->ipath_lasterror)
-               dd->ipath_lasterror = 0;
-       if (dd->ipath_lasthwerror)
-               dd->ipath_lasthwerror = 0;
-       if (dd->ipath_maskederrs
-           && time_after(jiffies, dd->ipath_unmasktime)) {
-               char ebuf[256];
-               int iserr;
-               iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
-                                        dd->ipath_maskederrs);
-               if (dd->ipath_maskederrs &
-                   ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                     INFINIPATH_E_PKTERRS))
-                       ipath_dev_err(dd, "Re-enabling masked errors "
-                                     "(%s)\n", ebuf);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal", for some
-                        * types of processes (mostly benchmarks) that send
-                        * huge numbers of messages, while not processing
-                        * them.  So only complain about these at debug
-                        * level.
-                        */
-                       if (iserr)
-                               ipath_dbg(
-                                       "Re-enabling queue full errors (%s)\n",
-                                       ebuf);
-                       else
-                               ipath_cdbg(ERRPKT, "Re-enabling packet"
-                                       " problem interrupt (%s)\n", ebuf);
-               }
-
-               /* re-enable masked errors */
-               dd->ipath_errormask |= dd->ipath_maskederrs;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                dd->ipath_errormask);
-               dd->ipath_maskederrs = 0;
-       }
-
-       /* limit qfull messages to ~one per minute per port */
-       if ((++cnt & 0x10)) {
-               for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
-                       struct ipath_portdata *pd = dd->ipath_pd[i];
-
-                       if (pd && pd->port_lastrcvhdrqtail != -1)
-                               pd->port_lastrcvhdrqtail = -1;
-               }
-       }
-
-       ipath_chk_errormask(dd);
-done:
-       mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sysfs.c b/drivers/staging/rdma/ipath/ipath_sysfs.c
deleted file mode 100644 (file)
index b12b1f6..0000000
+++ /dev/null
@@ -1,1237 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/ctype.h>
-#include <linux/stat.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-/**
- * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
- * @str: the string containing the number
- * @valp: where to put the result
- *
- * returns the number of bytes consumed, or negative value on error
- */
-int ipath_parse_ushort(const char *str, unsigned short *valp)
-{
-       unsigned long val;
-       char *end;
-       int ret;
-
-       if (!isdigit(str[0])) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       val = simple_strtoul(str, &end, 0);
-
-       if (val > 0xffff) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       *valp = val;
-
-       ret = end + 1 - str;
-       if (ret == 0)
-               ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-static ssize_t show_version(struct device_driver *dev, char *buf)
-{
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
-}
-
-static ssize_t show_num_units(struct device_driver *dev, char *buf)
-{
-       return scnprintf(buf, PAGE_SIZE, "%d\n",
-                        ipath_count_units(NULL, NULL, NULL));
-}
-
-static ssize_t show_status(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-
-       if (!dd->ipath_statusp) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
-                       (unsigned long long) *(dd->ipath_statusp));
-
-bail:
-       return ret;
-}
-
-static const char *ipath_status_str[] = {
-       "Initted",
-       "Disabled",
-       "Admin_Disabled",
-       "", /* This used to be the old "OIB_SMA" status. */
-       "", /* This used to be the old "SMA" status. */
-       "Present",
-       "IB_link_up",
-       "IB_configured",
-       "NoIBcable",
-       "Fatal_Hardware_Error",
-       NULL,
-};
-
-static ssize_t show_status_str(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int i, any;
-       u64 s;
-       ssize_t ret;
-
-       if (!dd->ipath_statusp) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       s = *(dd->ipath_statusp);
-       *buf = '\0';
-       for (any = i = 0; s && ipath_status_str[i]; i++) {
-               if (s & 1) {
-                       if (any && strlcat(buf, " ", PAGE_SIZE) >=
-                           PAGE_SIZE)
-                               /* overflow */
-                               break;
-                       if (strlcat(buf, ipath_status_str[i],
-                                   PAGE_SIZE) >= PAGE_SIZE)
-                               break;
-                       any = 1;
-               }
-               s >>= 1;
-       }
-       if (any)
-               strlcat(buf, "\n", PAGE_SIZE);
-
-       ret = strlen(buf);
-
-bail:
-       return ret;
-}
-
-static ssize_t show_boardversion(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
-}
-
-static ssize_t show_localbus_info(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
-}
-
-static ssize_t show_lmc(struct device *dev,
-                       struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
-}
-
-static ssize_t store_lmc(struct device *dev,
-                        struct device_attribute *attr,
-                        const char *buf,
-                        size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 lmc = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &lmc);
-       if (ret < 0)
-               goto invalid;
-
-       if (lmc > 7) {
-               ret = -EINVAL;
-               goto invalid;
-       }
-
-       ipath_set_lid(dd, dd->ipath_lid, lmc);
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
-bail:
-       return ret;
-}
-
-static ssize_t show_lid(struct device *dev,
-                       struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
-}
-
-static ssize_t store_lid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 lid = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &lid);
-       if (ret < 0)
-               goto invalid;
-
-       if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
-               ret = -EINVAL;
-               goto invalid;
-       }
-
-       ipath_set_lid(dd, lid, dd->ipath_lmc);
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
-bail:
-       return ret;
-}
-
-static ssize_t show_mlid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
-}
-
-static ssize_t store_mlid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 mlid;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &mlid);
-       if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
-               goto invalid;
-
-       dd->ipath_mlid = mlid;
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid MLID\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_guid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u8 *guid;
-
-       guid = (u8 *) & (dd->ipath_guid);
-
-       return scnprintf(buf, PAGE_SIZE,
-                        "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-                        guid[0], guid[1], guid[2], guid[3],
-                        guid[4], guid[5], guid[6], guid[7]);
-}
-
-static ssize_t store_guid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       unsigned short guid[8];
-       __be64 new_guid;
-       u8 *ng;
-       int i;
-
-       if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
-                  &guid[0], &guid[1], &guid[2], &guid[3],
-                  &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
-               goto invalid;
-
-       ng = (u8 *) &new_guid;
-
-       for (i = 0; i < 8; i++) {
-               if (guid[i] > 0xff)
-                       goto invalid;
-               ng[i] = guid[i];
-       }
-
-       if (new_guid == 0)
-               goto invalid;
-
-       dd->ipath_guid = new_guid;
-       dd->ipath_nguid = 1;
-       if (dd->verbs_dev)
-               dd->verbs_dev->ibdev.node_guid = new_guid;
-
-       ret = strlen(buf);
-       goto bail;
-
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid GUID\n");
-       ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-static ssize_t show_nguid(struct device *dev,
-                         struct device_attribute *attr,
-                         char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
-}
-
-static ssize_t show_nports(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       /* Return the number of user ports available. */
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
-}
-
-static ssize_t show_serial(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       buf[sizeof dd->ipath_serial] = '\0';
-       memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
-       strcat(buf, "\n");
-       return strlen(buf);
-}
-
-static ssize_t show_unit(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
-}
-
-static ssize_t show_jint_max_packets(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
-}
-
-static ssize_t store_jint_max_packets(struct device *dev,
-                                     struct device_attribute *attr,
-                                     const char *buf,
-                                     size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 v = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &v);
-       if (ret < 0)
-               ipath_dev_err(dd, "invalid jint_max_packets.\n");
-       else
-               dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
-
-       return ret;
-}
-
-static ssize_t show_jint_idle_ticks(struct device *dev,
-                                   struct device_attribute *attr,
-                                   char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
-}
-
-static ssize_t store_jint_idle_ticks(struct device *dev,
-                                    struct device_attribute *attr,
-                                    const char *buf,
-                                    size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 v = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &v);
-       if (ret < 0)
-               ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
-       else
-               dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
-
-       return ret;
-}
-
-#define DEVICE_COUNTER(name, attr) \
-       static ssize_t show_counter_##name(struct device *dev, \
-                                          struct device_attribute *attr, \
-                                          char *buf) \
-       { \
-               struct ipath_devdata *dd = dev_get_drvdata(dev); \
-               return scnprintf(\
-                       buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
-                       ipath_snap_cntr( \
-                               dd, offsetof(struct infinipath_counters, \
-                                            attr) / sizeof(u64)));     \
-       } \
-       static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
-
-DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
-DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
-DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
-DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
-DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
-DEVICE_COUNTER(lb_ints, LBIntCnt);
-DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
-DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
-DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
-DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
-DEVICE_COUNTER(rx_dwords, RxDwordCnt);
-DEVICE_COUNTER(rx_ebps, RxEBPCnt);
-DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
-DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
-DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
-DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
-DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
-DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
-DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
-DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
-DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
-DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
-DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
-DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
-DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
-DEVICE_COUNTER(tx_dwords, TxDwordCnt);
-DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
-DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
-DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
-DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
-DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
-DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
-
-static struct attribute *dev_counter_attributes[] = {
-       &dev_attr_ib_link_downeds.attr,
-       &dev_attr_ib_link_err_recoveries.attr,
-       &dev_attr_ib_status_changes.attr,
-       &dev_attr_ib_symbol_errs.attr,
-       &dev_attr_lb_flow_stalls.attr,
-       &dev_attr_lb_ints.attr,
-       &dev_attr_rx_bad_formats.attr,
-       &dev_attr_rx_buf_ovfls.attr,
-       &dev_attr_rx_data_pkts.attr,
-       &dev_attr_rx_dropped_pkts.attr,
-       &dev_attr_rx_dwords.attr,
-       &dev_attr_rx_ebps.attr,
-       &dev_attr_rx_flow_ctrl_errs.attr,
-       &dev_attr_rx_flow_pkts.attr,
-       &dev_attr_rx_icrc_errs.attr,
-       &dev_attr_rx_len_errs.attr,
-       &dev_attr_rx_link_problems.attr,
-       &dev_attr_rx_lpcrc_errs.attr,
-       &dev_attr_rx_max_min_len_errs.attr,
-       &dev_attr_rx_p0_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p1_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p2_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p3_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p4_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p5_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p6_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p7_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p8_hdr_egr_ovfls.attr,
-       &dev_attr_rx_pkey_mismatches.attr,
-       &dev_attr_rx_tid_full_errs.attr,
-       &dev_attr_rx_tid_valid_errs.attr,
-       &dev_attr_rx_vcrc_errs.attr,
-       &dev_attr_tx_data_pkts.attr,
-       &dev_attr_tx_dropped_pkts.attr,
-       &dev_attr_tx_dwords.attr,
-       &dev_attr_tx_flow_pkts.attr,
-       &dev_attr_tx_flow_stalls.attr,
-       &dev_attr_tx_len_errs.attr,
-       &dev_attr_tx_max_min_len_errs.attr,
-       &dev_attr_tx_underruns.attr,
-       &dev_attr_tx_unsup_vl_errs.attr,
-       NULL
-};
-
-static struct attribute_group dev_counter_attr_group = {
-       .name = "counters",
-       .attrs = dev_counter_attributes
-};
-
-static ssize_t store_reset(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       if (count < 5 || memcmp(buf, "reset", 5)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (dd->ipath_flags & IPATH_DISABLED) {
-               /*
-                * post-reset init would re-enable interrupts, etc.
-                * so don't allow reset on disabled devices.  Not
-                * perfect error, but about the best choice.
-                */
-               dev_info(dev,"Unit %d is disabled, can't reset\n",
-                        dd->ipath_unit);
-               ret = -EINVAL;
-               goto bail;
-       }
-       ret = ipath_reset_device(dd->ipath_unit);
-bail:
-       return ret<0 ? ret : count;
-}
-
-static ssize_t store_link_state(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 state;
-
-       ret = ipath_parse_ushort(buf, &state);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_linkstate(dd, state);
-       if (r < 0) {
-               ret = r;
-               goto bail;
-       }
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid link state\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_mtu(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
-}
-
-static ssize_t store_mtu(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       u16 mtu = 0;
-       int r;
-
-       ret = ipath_parse_ushort(buf, &mtu);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_mtu(dd, mtu);
-       if (r < 0)
-               ret = r;
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid MTU\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_enabled(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       return scnprintf(buf, PAGE_SIZE, "%u\n",
-                        (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
-}
-
-static ssize_t store_enabled(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       u16 enable = 0;
-
-       ret = ipath_parse_ushort(buf, &enable);
-       if (ret < 0) {
-               ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
-               goto bail;
-       }
-
-       if (enable) {
-               if (!(dd->ipath_flags & IPATH_DISABLED))
-                       goto bail;
-
-               dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
-               /* same as post-reset */
-               ret = ipath_init_chip(dd, 1);
-               if (ret)
-                       ipath_dev_err(dd, "Failed to enable unit %d\n",
-                                     dd->ipath_unit);
-               else {
-                       dd->ipath_flags &= ~IPATH_DISABLED;
-                       *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
-               }
-       } else if (!(dd->ipath_flags & IPATH_DISABLED)) {
-               dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
-               ipath_shutdown_device(dd);
-               dd->ipath_flags |= IPATH_DISABLED;
-               *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
-       }
-
-bail:
-       return ret;
-}
-
-static ssize_t store_rx_pol_inv(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_rx_pol_inv(dd, val);
-       if (r < 0) {
-               ret = r;
-               goto bail;
-       }
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
-bail:
-       return ret;
-}
-
-static ssize_t store_led_override(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret > 0)
-               ipath_set_led_override(dd, val);
-       else
-               ipath_dev_err(dd, "attempt to set invalid LED override\n");
-       return ret;
-}
-
-static ssize_t show_logged_errs(struct device *dev,
-                               struct device_attribute *attr,
-                               char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int idx, count;
-
-       /* force consistency with actual EEPROM */
-       if (ipath_update_eeprom_log(dd) != 0)
-               return -ENXIO;
-
-       count = 0;
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
-                       dd->ipath_eep_st_errs[idx],
-                       idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
-       }
-
-       return count;
-}
-
-/*
- * New sysfs entries to control various IB config. These all turn into
- * accesses via ipath_f_get/set_ib_cfg.
- *
- * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
- */
-static ssize_t show_hrtbt_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_hrtbt_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 3)
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
-               goto bail;
-       }
-
-       /*
-        * Set the "intentional" heartbeat enable per either of
-        * "Enable" and "Auto", as these are normally set together.
-        * This bit is consulted when leaving loopback mode,
-        * because entering loopback mode overrides it and automatically
-        * disables heartbeat.
-        */
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
-       if (r < 0)
-               ret = r;
-       else if (val == IPATH_IB_HRTBT_OFF)
-               dd->ipath_flags |= IPATH_NO_HRTBT;
-       else
-               dd->ipath_flags &= ~IPATH_NO_HRTBT;
-
-bail:
-       return ret;
-}
-
-/*
- * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
- * _not_ the particular encoding of any given chip)
- */
-static ssize_t show_lwid_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_lwid_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && (val == 0 || val > 3))
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Link Width (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/* Get current link width */
-static ssize_t show_lwid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-/*
- * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
- */
-static ssize_t show_spd_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_spd_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Link Speed (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/* Get current link speed */
-static ssize_t show_spd(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-/*
- * Get/Set RX polarity-invert enable. 0=no, 1=yes.
- */
-static ssize_t show_rx_polinv_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_rx_polinv_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 1) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Rx Polarity (enable)\n");
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/*
- * Get/Set RX lane-reversal enable. 0=no, 1=yes.
- */
-static ssize_t show_lanerev_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_lanerev_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 1) {
-               ret = -EINVAL;
-               ipath_dev_err(dd,
-                       "attempt to set invalid Lane reversal (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
-static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
-
-static struct attribute *driver_attributes[] = {
-       &driver_attr_num_units.attr,
-       &driver_attr_version.attr,
-       NULL
-};
-
-static struct attribute_group driver_attr_group = {
-       .attrs = driver_attributes
-};
-
-static ssize_t store_tempsense(struct device *dev,
-                              struct device_attribute *attr,
-                              const char *buf,
-                              size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, stat;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret <= 0) {
-               ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
-               goto bail;
-       }
-       /* If anything but the highest limit, enable T_CRIT_A "interrupt" */
-       stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set tempsense config\n");
-               ret = -1;
-               goto bail;
-       }
-       stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set local Tcrit\n");
-               ret = -1;
-               goto bail;
-       }
-       stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set remote Tcrit\n");
-               ret = -1;
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-/*
- * dump tempsense regs. in decimal, to ease shell-scripts.
- */
-static ssize_t show_tempsense(struct device *dev,
-                             struct device_attribute *attr,
-                             char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-       int idx;
-       u8 regvals[8];
-
-       ret = -ENXIO;
-       for (idx = 0; idx < 8; ++idx) {
-               if (idx == 6)
-                       continue;
-               ret = ipath_tempsense_read(dd, idx);
-               if (ret < 0)
-                       break;
-               regvals[idx] = ret;
-       }
-       if (idx == 8)
-               ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
-                       *(signed char *)(regvals),
-                       *(signed char *)(regvals + 1),
-                       regvals[2], regvals[3],
-                       *(signed char *)(regvals + 5),
-                       *(signed char *)(regvals + 7));
-       return ret;
-}
-
-const struct attribute_group *ipath_driver_attr_groups[] = {
-       &driver_attr_group,
-       NULL,
-};
-
-static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
-static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
-static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
-static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
-static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
-static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
-static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
-static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
-static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
-static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
-static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
-static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
-static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
-static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
-                  show_jint_max_packets, store_jint_max_packets);
-static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
-                  show_jint_idle_ticks, store_jint_idle_ticks);
-static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
-                  show_tempsense, store_tempsense);
-
-static struct attribute *dev_attributes[] = {
-       &dev_attr_guid.attr,
-       &dev_attr_lmc.attr,
-       &dev_attr_lid.attr,
-       &dev_attr_link_state.attr,
-       &dev_attr_mlid.attr,
-       &dev_attr_mtu.attr,
-       &dev_attr_nguid.attr,
-       &dev_attr_nports.attr,
-       &dev_attr_serial.attr,
-       &dev_attr_status.attr,
-       &dev_attr_status_str.attr,
-       &dev_attr_boardversion.attr,
-       &dev_attr_unit.attr,
-       &dev_attr_enabled.attr,
-       &dev_attr_rx_pol_inv.attr,
-       &dev_attr_led_override.attr,
-       &dev_attr_logged_errors.attr,
-       &dev_attr_tempsense.attr,
-       &dev_attr_localbus_info.attr,
-       NULL
-};
-
-static struct attribute_group dev_attr_group = {
-       .attrs = dev_attributes
-};
-
-static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
-                  store_hrtbt_enb);
-static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
-                  store_lwid_enb);
-static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
-static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
-                  store_spd_enb);
-static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
-static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
-                  store_rx_polinv_enb);
-static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
-                  store_lanerev_enb);
-
-static struct attribute *dev_ibcfg_attributes[] = {
-       &dev_attr_hrtbt_enable.attr,
-       &dev_attr_link_width_enable.attr,
-       &dev_attr_link_width.attr,
-       &dev_attr_link_speed_enable.attr,
-       &dev_attr_link_speed.attr,
-       &dev_attr_rx_pol_inv_enable.attr,
-       &dev_attr_rx_lane_rev_enable.attr,
-       NULL
-};
-
-static struct attribute_group dev_ibcfg_attr_group = {
-       .attrs = dev_ibcfg_attributes
-};
-
-/**
- * ipath_expose_reset - create a device reset file
- * @dev: the device structure
- *
- * Only expose a file that lets us reset the device after someone
- * enters diag mode.  A device reset is quite likely to crash the
- * machine entirely, so we don't want to normally make it
- * available.
- *
- * Called with ipath_mutex held.
- */
-int ipath_expose_reset(struct device *dev)
-{
-       static int exposed;
-       int ret;
-
-       if (!exposed) {
-               ret = device_create_file(dev, &dev_attr_reset);
-               exposed = 1;
-       } else {
-               ret = 0;
-       }
-
-       return ret;
-}
-
-int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
-{
-       int ret;
-
-       ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
-       if (ret)
-               goto bail;
-
-       ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
-       if (ret)
-               goto bail_attrs;
-
-       if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-               ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
-               if (ret)
-                       goto bail_counter;
-               ret = device_create_file(dev, &dev_attr_jint_max_packets);
-               if (ret)
-                       goto bail_idle;
-
-               ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
-               if (ret)
-                       goto bail_max;
-       }
-
-       return 0;
-
-bail_max:
-       device_remove_file(dev, &dev_attr_jint_max_packets);
-bail_idle:
-       device_remove_file(dev, &dev_attr_jint_idle_ticks);
-bail_counter:
-       sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-bail_attrs:
-       sysfs_remove_group(&dev->kobj, &dev_attr_group);
-bail:
-       return ret;
-}
-
-void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
-{
-       sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-
-       if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-               sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
-               device_remove_file(dev, &dev_attr_jint_idle_ticks);
-               device_remove_file(dev, &dev_attr_jint_max_packets);
-       }
-
-       sysfs_remove_group(&dev->kobj, &dev_attr_group);
-
-       device_remove_file(dev, &dev_attr_reset);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
deleted file mode 100644 (file)
index 0246b30..0000000
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/**
- * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_uc_req(struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       struct ipath_swqe *wqe;
-       unsigned long flags;
-       u32 hwords;
-       u32 bth0;
-       u32 len;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       int ret = 0;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       ohdr = &qp->s_hdr.u.oth;
-       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-               ohdr = &qp->s_hdr.u.l.oth;
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-       bth0 = 1 << 22; /* Set M bit */
-
-       /* Get the next send request. */
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       qp->s_wqe = NULL;
-       switch (qp->s_state) {
-       default:
-               if (!(ib_ipath_state_ops[qp->state] &
-                   IPATH_PROCESS_NEXT_SEND_OK))
-                       goto bail;
-               /* Check if send work queue is empty. */
-               if (qp->s_cur == qp->s_head)
-                       goto bail;
-               /*
-                * Start a new request.
-                */
-               qp->s_psn = wqe->psn = qp->s_next_psn;
-               qp->s_sge.sge = wqe->sg_list[0];
-               qp->s_sge.sg_list = wqe->sg_list + 1;
-               qp->s_sge.num_sge = wqe->wr.num_sge;
-               qp->s_len = len = wqe->length;
-               switch (wqe->wr.opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_IMM:
-                       if (len > pmtu) {
-                               qp->s_state = OP(SEND_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_SEND)
-                               qp->s_state = OP(SEND_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(SEND_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                       }
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-                       qp->s_wqe = wqe;
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_WRITE:
-               case IB_WR_RDMA_WRITE_WITH_IMM:
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       hwords += sizeof(struct ib_reth) / 4;
-                       if (len > pmtu) {
-                               qp->s_state = OP(RDMA_WRITE_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                               qp->s_state = OP(RDMA_WRITE_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                                       bth0 |= 1 << 23;
-                       }
-                       qp->s_wqe = wqe;
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               default:
-                       goto bail;
-               }
-               break;
-
-       case OP(SEND_FIRST):
-               qp->s_state = OP(SEND_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_SEND)
-                       qp->s_state = OP(SEND_LAST);
-               else {
-                       qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-               }
-               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                       bth0 |= 1 << 23;
-               qp->s_wqe = wqe;
-               if (++qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-               qp->s_state = OP(RDMA_WRITE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                       qp->s_state = OP(RDMA_WRITE_LAST);
-               else {
-                       qp->s_state =
-                               OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-               }
-               qp->s_wqe = wqe;
-               if (++qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-       }
-       qp->s_len -= len;
-       qp->s_hdrwords = hwords;
-       qp->s_cur_sge = &qp->s_sge;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(to_idev(qp->ibqp.device),
-                             qp, ohdr, bth0 | (qp->s_state << 24),
-                             qp->s_next_psn++ & IPATH_PSN_MASK);
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_uc_rcv - handle an incoming UC packet
- * @dev: the device the packet came in on
- * @hdr: the header of the packet
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
- *
- * This is called from ipath_qp_rcv() to process an incoming UC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       int opcode;
-       u32 hdrsize;
-       u32 psn;
-       u32 pad;
-       struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       struct ib_reth *reth;
-       int header_in_data;
-
-       /* Validate the SLID. See Ch. 9.6.1.5 */
-       if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-               goto done;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12;       /* LRH + BTH */
-               psn = be32_to_cpu(ohdr->bth[2]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
-               /*
-                * The header with GRH is 60 bytes and the
-                * core driver sets the eager header buffer
-                * size to 56 bytes so the last 4 bytes of
-                * the BTH header (PSN) is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       psn = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               } else
-                       psn = be32_to_cpu(ohdr->bth[2]);
-       }
-       /*
-        * The opcode is in the low byte when its in network order
-        * (top byte when in host order).
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-
-       memset(&wc, 0, sizeof wc);
-
-       /* Compare the PSN verses the expected PSN. */
-       if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
-               /*
-                * Handle a sequence error.
-                * Silently drop any current message.
-                */
-               qp->r_psn = psn;
-       inv:
-               qp->r_state = OP(SEND_LAST);
-               switch (opcode) {
-               case OP(SEND_FIRST):
-               case OP(SEND_ONLY):
-               case OP(SEND_ONLY_WITH_IMMEDIATE):
-                       goto send_first;
-
-               case OP(RDMA_WRITE_FIRST):
-               case OP(RDMA_WRITE_ONLY):
-               case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-                       goto rdma_first;
-
-               default:
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-       }
-
-       /* Check for opcode sequence errors. */
-       switch (qp->r_state) {
-       case OP(SEND_FIRST):
-       case OP(SEND_MIDDLE):
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_MIDDLE):
-               if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-
-       default:
-               if (opcode == OP(SEND_FIRST) ||
-                   opcode == OP(SEND_ONLY) ||
-                   opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_FIRST) ||
-                   opcode == OP(RDMA_WRITE_ONLY) ||
-                   opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-       }
-
-       /* OK, process the packet. */
-       switch (opcode) {
-       case OP(SEND_FIRST):
-       case OP(SEND_ONLY):
-       case OP(SEND_ONLY_WITH_IMMEDIATE):
-       send_first:
-               if (qp->r_flags & IPATH_R_REUSE_SGE) {
-                       qp->r_flags &= ~IPATH_R_REUSE_SGE;
-                       qp->r_sge = qp->s_rdma_read_sge;
-               } else if (!ipath_get_rwqe(qp, 0)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Save the WQE so we can reuse it in case of an error. */
-               qp->s_rdma_read_sge = qp->r_sge;
-               qp->r_rcv_len = 0;
-               if (opcode == OP(SEND_ONLY))
-                       goto send_last;
-               else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
-                       goto send_last_imm;
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len)) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(SEND_LAST_WITH_IMMEDIATE):
-       send_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST):
-       send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               wc.byte_len = tlen + qp->r_rcv_len;
-               if (unlikely(wc.byte_len > qp->r_len)) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               wc.opcode = IB_WC_RECV;
-       last_imm:
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               wc.wr_id = qp->r_wr_id;
-               wc.status = IB_WC_SUCCESS;
-               wc.qp = &qp->ibqp;
-               wc.src_qp = qp->remote_qpn;
-               wc.slid = qp->remote_ah_attr.dlid;
-               wc.sl = qp->remote_ah_attr.sl;
-               /* Signal completion event if the solicited bit is set. */
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                              (ohdr->bth[0] &
-                               cpu_to_be32(1 << 23)) != 0);
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_ONLY):
-       case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
-       rdma_first:
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               hdrsize += sizeof(*reth);
-               qp->r_len = be32_to_cpu(reth->length);
-               qp->r_rcv_len = 0;
-               if (qp->r_len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey */
-                       ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
-                                          vaddr, rkey,
-                                          IB_ACCESS_REMOTE_WRITE);
-                       if (unlikely(!ok)) {
-                               dev->n_pkt_drops++;
-                               goto done;
-                       }
-               } else {
-                       qp->r_sge.sg_list = NULL;
-                       qp->r_sge.sge.mr = NULL;
-                       qp->r_sge.sge.vaddr = NULL;
-                       qp->r_sge.sge.length = 0;
-                       qp->r_sge.sge.sge_length = 0;
-               }
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               if (opcode == OP(RDMA_WRITE_ONLY))
-                       goto rdma_last;
-               else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       goto rdma_last_imm;
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-       rdma_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               if (qp->r_flags & IPATH_R_REUSE_SGE)
-                       qp->r_flags &= ~IPATH_R_REUSE_SGE;
-               else if (!ipath_get_rwqe(qp, 1)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               wc.byte_len = qp->r_len;
-               wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-               goto last_imm;
-
-       case OP(RDMA_WRITE_LAST):
-       rdma_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               break;
-
-       default:
-               /* Drop packet for unknown opcodes. */
-               dev->n_pkt_drops++;
-               goto done;
-       }
-       qp->r_psn++;
-       qp->r_state = opcode;
-done:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
deleted file mode 100644 (file)
index 385d941..0000000
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_ud_loopback - handle send on loopback QPs
- * @sqp: the sending QP
- * @swqe: the send work request
- *
- * This is called from ipath_make_ud_req() to forward a WQE addressed
- * to the same HCA.
- * Note that the receive interrupt handler may be calling ipath_ud_rcv()
- * while this is being called.
- */
-static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
-{
-       struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-       struct ipath_qp *qp;
-       struct ib_ah_attr *ah_attr;
-       unsigned long flags;
-       struct ipath_rq *rq;
-       struct ipath_srq *srq;
-       struct ipath_sge_state rsge;
-       struct ipath_sge *sge;
-       struct ipath_rwq *wq;
-       struct ipath_rwqe *wqe;
-       void (*handler)(struct ib_event *, void *);
-       struct ib_wc wc;
-       u32 tail;
-       u32 rlen;
-       u32 length;
-
-       qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
-       if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               goto done;
-       }
-
-       /*
-        * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
-        * Qkeys with the high order bit set mean use the
-        * qkey from the QP context instead of the WR (see 10.2.5).
-        */
-       if (unlikely(qp->ibqp.qp_num &&
-                    ((int) swqe->ud_wr.remote_qkey < 0 ?
-                     sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
-               /* XXX OK to lose a count once in a while. */
-               dev->qkey_violations++;
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-
-       /*
-        * A GRH is expected to precede the data even if not
-        * present on the wire.
-        */
-       length = swqe->length;
-       memset(&wc, 0, sizeof wc);
-       wc.byte_len = length + sizeof(struct ib_grh);
-
-       if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = swqe->wr.ex.imm_data;
-       }
-
-       /*
-        * This would be a lot simpler if we could call ipath_get_rwqe()
-        * but that uses state that the receive interrupt handler uses
-        * so we would need to lock out receive interrupts while doing
-        * local loopback.
-        */
-       if (qp->ibqp.srq) {
-               srq = to_isrq(qp->ibqp.srq);
-               handler = srq->ibsrq.event_handler;
-               rq = &srq->rq;
-       } else {
-               srq = NULL;
-               handler = NULL;
-               rq = &qp->r_rq;
-       }
-
-       /*
-        * Get the next work request entry to find where to put the data.
-        * Note that it is safe to drop the lock after changing rq->tail
-        * since ipath_post_receive() won't fill the empty slot.
-        */
-       spin_lock_irqsave(&rq->lock, flags);
-       wq = rq->wq;
-       tail = wq->tail;
-       /* Validate tail before using it since it is user writable. */
-       if (tail >= rq->size)
-               tail = 0;
-       if (unlikely(tail == wq->head)) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       wqe = get_rwqe_ptr(rq, tail);
-       rsge.sg_list = qp->r_ud_sg_list;
-       if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       /* Silently drop packets which are too big. */
-       if (wc.byte_len > rlen) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       if (++tail >= rq->size)
-               tail = 0;
-       wq->tail = tail;
-       wc.wr_id = wqe->wr_id;
-       if (handler) {
-               u32 n;
-
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               n = wq->head;
-               if (n >= rq->size)
-                       n = 0;
-               if (n < tail)
-                       n += rq->size - tail;
-               else
-                       n -= tail;
-               if (n < srq->limit) {
-                       struct ib_event ev;
-
-                       srq->limit = 0;
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       ev.device = qp->ibqp.device;
-                       ev.element.srq = qp->ibqp.srq;
-                       ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       handler(&ev, srq->ibsrq.srq_context);
-               } else
-                       spin_unlock_irqrestore(&rq->lock, flags);
-       } else
-               spin_unlock_irqrestore(&rq->lock, flags);
-
-       ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
-               wc.wc_flags |= IB_WC_GRH;
-       } else
-               ipath_skip_sge(&rsge, sizeof(struct ib_grh));
-       sge = swqe->sg_list;
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               ipath_copy_sge(&rsge, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--swqe->wr.num_sge)
-                               sge++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               length -= len;
-       }
-       wc.status = IB_WC_SUCCESS;
-       wc.opcode = IB_WC_RECV;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = sqp->ibqp.qp_num;
-       /* XXX do we know which pkey matched? Only needed for GSI. */
-       wc.pkey_index = 0;
-       wc.slid = dev->dd->ipath_lid |
-               (ah_attr->src_path_bits &
-                ((1 << dev->dd->ipath_lmc) - 1));
-       wc.sl = ah_attr->sl;
-       wc.dlid_path_bits =
-               ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
-drop:
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-done:;
-}
-
-/**
- * ipath_make_ud_req - construct a UD request packet
- * @qp: the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_ud_req(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_other_headers *ohdr;
-       struct ib_ah_attr *ah_attr;
-       struct ipath_swqe *wqe;
-       unsigned long flags;
-       u32 nwords;
-       u32 extra_bytes;
-       u32 bth0;
-       u16 lrh0;
-       u16 lid;
-       int ret = 0;
-       int next_cur;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       if (qp->s_cur == qp->s_head)
-               goto bail;
-
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       next_cur = qp->s_cur + 1;
-       if (next_cur >= qp->s_size)
-               next_cur = 0;
-
-       /* Construct the header. */
-       ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-       if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
-               if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
-                       dev->n_multicast_xmit++;
-               else
-                       dev->n_unicast_xmit++;
-       } else {
-               dev->n_unicast_xmit++;
-               lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
-               if (unlikely(lid == dev->dd->ipath_lid)) {
-                       /*
-                        * If DMAs are in progress, we can't generate
-                        * a completion for the loopback packet since
-                        * it would be out of order.
-                        * XXX Instead of waiting, we could queue a
-                        * zero length descriptor so we get a callback.
-                        */
-                       if (atomic_read(&qp->s_dma_busy)) {
-                               qp->s_flags |= IPATH_S_WAIT_DMA;
-                               goto bail;
-                       }
-                       qp->s_cur = next_cur;
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       ipath_ud_loopback(qp, wqe);
-                       spin_lock_irqsave(&qp->s_lock, flags);
-                       ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
-                       goto done;
-               }
-       }
-
-       qp->s_cur = next_cur;
-       extra_bytes = -wqe->length & 3;
-       nwords = (wqe->length + extra_bytes) >> 2;
-
-       /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
-       qp->s_hdrwords = 7;
-       qp->s_cur_size = wqe->length;
-       qp->s_cur_sge = &qp->s_sge;
-       qp->s_dmult = ah_attr->static_rate;
-       qp->s_wqe = wqe;
-       qp->s_sge.sge = wqe->sg_list[0];
-       qp->s_sge.sg_list = wqe->sg_list + 1;
-       qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
-
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               /* Header size in 32-bit words. */
-               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-                                                &ah_attr->grh,
-                                                qp->s_hdrwords, nwords);
-               lrh0 = IPATH_LRH_GRH;
-               ohdr = &qp->s_hdr.u.l.oth;
-               /*
-                * Don't worry about sending to locally attached multicast
-                * QPs.  It is unspecified by the spec. what happens.
-                */
-       } else {
-               /* Header size in 32-bit words. */
-               lrh0 = IPATH_LRH_BTH;
-               ohdr = &qp->s_hdr.u.oth;
-       }
-       if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
-               qp->s_hdrwords++;
-               ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
-               bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-       } else
-               bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
-       lrh0 |= ah_attr->sl << 4;
-       if (qp->ibqp.qp_type == IB_QPT_SMI)
-               lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
-       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-       qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
-                                          SIZE_OF_CRC);
-       lid = dev->dd->ipath_lid;
-       if (lid) {
-               lid |= ah_attr->src_path_bits &
-                       ((1 << dev->dd->ipath_lmc) - 1);
-               qp->s_hdr.lrh[3] = cpu_to_be16(lid);
-       } else
-               qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
-       if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
-               bth0 |= 1 << 23;
-       bth0 |= extra_bytes << 20;
-       bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-               ipath_get_pkey(dev->dd, qp->s_pkey_index);
-       ohdr->bth[0] = cpu_to_be32(bth0);
-       /*
-        * Use the multicast QP if the destination LID is a multicast LID.
-        */
-       ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-               ah_attr->dlid != IPATH_PERMISSIVE_LID ?
-               cpu_to_be32(IPATH_MULTICAST_QPN) :
-               cpu_to_be32(wqe->ud_wr.remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
-       /*
-        * Qkeys with the high order bit set mean use the
-        * qkey from the QP context instead of the WR (see 10.2.5).
-        */
-       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
-                                        qp->qkey : wqe->ud_wr.remote_qkey);
-       ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
-
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_ud_rcv - receive an incoming UD packet
- * @dev: the device the packet came in on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_qp_rcv() to process an incoming UD packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       int opcode;
-       u32 hdrsize;
-       u32 pad;
-       struct ib_wc wc;
-       u32 qkey;
-       u32 src_qp;
-       u16 dlid;
-       int header_in_data;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12 + 8;   /* LRH + BTH + DETH */
-               qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-               src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
-               /*
-                * The header with GRH is 68 bytes and the core driver sets
-                * the eager header buffer size to 56 bytes so the last 12
-                * bytes of the IB header is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       qkey = be32_to_cpu(((__be32 *) data)[1]);
-                       src_qp = be32_to_cpu(((__be32 *) data)[2]);
-                       data += 12;
-               } else {
-                       qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-                       src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-               }
-       }
-       src_qp &= IPATH_QPN_MASK;
-
-       /*
-        * Check that the permissive LID is only used on QP0
-        * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
-        */
-       if (qp->ibqp.qp_num) {
-               if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
-                            hdr->lrh[3] == IB_LID_PERMISSIVE)) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               if (unlikely(qkey != qp->qkey)) {
-                       /* XXX OK to lose a count once in a while. */
-                       dev->qkey_violations++;
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-       } else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
-                  hdr->lrh[3] == IB_LID_PERMISSIVE) {
-               struct ib_smp *smp = (struct ib_smp *) data;
-
-               if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-       }
-
-       /*
-        * The opcode is in the low byte when its in network order
-        * (top byte when in host order).
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-       if (qp->ibqp.qp_num > 1 &&
-           opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else
-                       wc.ex.imm_data = ohdr->u.ud.imm_data;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               hdrsize += sizeof(u32);
-       } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
-               wc.ex.imm_data = 0;
-               wc.wc_flags = 0;
-       } else {
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-
-       /* Get the number of bytes the message was padded by. */
-       pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-       if (unlikely(tlen < (hdrsize + pad + 4))) {
-               /* Drop incomplete packets. */
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-       tlen -= hdrsize + pad + 4;
-
-       /* Drop invalid MAD packets (see 13.5.3.1). */
-       if (unlikely((qp->ibqp.qp_num == 0 &&
-                     (tlen != 256 ||
-                      (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
-                    (qp->ibqp.qp_num == 1 &&
-                     (tlen != 256 ||
-                      (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-
-       /*
-        * A GRH is expected to precede the data even if not
-        * present on the wire.
-        */
-       wc.byte_len = tlen + sizeof(struct ib_grh);
-
-       /*
-        * Get the next work request entry to find where to put the data.
-        */
-       if (qp->r_flags & IPATH_R_REUSE_SGE)
-               qp->r_flags &= ~IPATH_R_REUSE_SGE;
-       else if (!ipath_get_rwqe(qp, 0)) {
-               /*
-                * Count VL15 packets dropped due to no receive buffer.
-                * Otherwise, count them as buffer overruns since usually,
-                * the HW will be able to receive packets even if there are
-                * no QPs with posted receive buffers.
-                */
-               if (qp->ibqp.qp_num == 0)
-                       dev->n_vl15_dropped++;
-               else
-                       dev->rcv_errors++;
-               goto bail;
-       }
-       /* Silently drop packets which are too big. */
-       if (wc.byte_len > qp->r_len) {
-               qp->r_flags |= IPATH_R_REUSE_SGE;
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-       if (has_grh) {
-               ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-                              sizeof(struct ib_grh));
-               wc.wc_flags |= IB_WC_GRH;
-       } else
-               ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
-       ipath_copy_sge(&qp->r_sge, data,
-                      wc.byte_len - sizeof(struct ib_grh));
-       if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-               goto bail;
-       wc.wr_id = qp->r_wr_id;
-       wc.status = IB_WC_SUCCESS;
-       wc.opcode = IB_WC_RECV;
-       wc.vendor_err = 0;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = src_qp;
-       /* XXX do we know which pkey matched? Only needed for GSI. */
-       wc.pkey_index = 0;
-       wc.slid = be16_to_cpu(hdr->lrh[3]);
-       wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
-       dlid = be16_to_cpu(hdr->lrh[1]);
-       /*
-        * Save the LMC lower bits if the destination LID is a unicast LID.
-        */
-       wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
-               dlid & ((1 << dev->dd->ipath_lmc) - 1);
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      (ohdr->bth[0] &
-                       cpu_to_be32(1 << 23)) != 0);
-
-bail:;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c
deleted file mode 100644 (file)
index d29b4da..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/mm.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-static void __ipath_release_user_pages(struct page **p, size_t num_pages,
-                                  int dirty)
-{
-       size_t i;
-
-       for (i = 0; i < num_pages; i++) {
-               ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
-                          (unsigned long) num_pages, p[i]);
-               if (dirty)
-                       set_page_dirty_lock(p[i]);
-               put_page(p[i]);
-       }
-}
-
-/* call with current->mm->mmap_sem held */
-static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-                                 struct page **p)
-{
-       unsigned long lock_limit;
-       size_t got;
-       int ret;
-
-       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-       if (num_pages > lock_limit) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
-                  (unsigned long) num_pages, start_page);
-
-       for (got = 0; got < num_pages; got += ret) {
-               ret = get_user_pages(current, current->mm,
-                                    start_page + got * PAGE_SIZE,
-                                    num_pages - got, 1, 1,
-                                    p + got, NULL);
-               if (ret < 0)
-                       goto bail_release;
-       }
-
-       current->mm->pinned_vm += num_pages;
-
-       ret = 0;
-       goto bail;
-
-bail_release:
-       __ipath_release_user_pages(p, got, 0);
-bail:
-       return ret;
-}
-
-/**
- * ipath_map_page - a safety wrapper around pci_map_page()
- *
- * A dma_addr of all 0's is interpreted by the chip as "disabled".
- * Unfortunately, it can also be a valid dma_addr returned on some
- * architectures.
- *
- * The powerpc iommu assigns dma_addrs in ascending order, so we don't
- * have to bother with retries or mapping a dummy page to insure we
- * don't just get the same mapping again.
- *
- * I'm sure we won't be so lucky with other iommu's, so FIXME.
- */
-dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
-       unsigned long offset, size_t size, int direction)
-{
-       dma_addr_t phys;
-
-       phys = pci_map_page(hwdev, page, offset, size, direction);
-
-       if (phys == 0) {
-               pci_unmap_page(hwdev, phys, size, direction);
-               phys = pci_map_page(hwdev, page, offset, size, direction);
-               /*
-                * FIXME: If we get 0 again, we should keep this page,
-                * map another, then free the 0 page.
-                */
-       }
-
-       return phys;
-}
-
-/**
- * ipath_map_single - a safety wrapper around pci_map_single()
- *
- * Same idea as ipath_map_page().
- */
-dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
-       int direction)
-{
-       dma_addr_t phys;
-
-       phys = pci_map_single(hwdev, ptr, size, direction);
-
-       if (phys == 0) {
-               pci_unmap_single(hwdev, phys, size, direction);
-               phys = pci_map_single(hwdev, ptr, size, direction);
-               /*
-                * FIXME: If we get 0 again, we should keep this page,
-                * map another, then free the 0 page.
-                */
-       }
-
-       return phys;
-}
-
-/**
- * ipath_get_user_pages - lock user pages into memory
- * @start_page: the start page
- * @num_pages: the number of pages
- * @p: the output page structures
- *
- * This function takes a given start page (page aligned user virtual
- * address) and pins it and the following specified number of pages.  For
- * now, num_pages is always 1, but that will probably change at some point
- * (because caller is doing expected sends on a single virtually contiguous
- * buffer, so we can do all pages at once).
- */
-int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-                        struct page **p)
-{
-       int ret;
-
-       down_write(&current->mm->mmap_sem);
-
-       ret = __ipath_get_user_pages(start_page, num_pages, p);
-
-       up_write(&current->mm->mmap_sem);
-
-       return ret;
-}
-
-void ipath_release_user_pages(struct page **p, size_t num_pages)
-{
-       down_write(&current->mm->mmap_sem);
-
-       __ipath_release_user_pages(p, num_pages, 1);
-
-       current->mm->pinned_vm -= num_pages;
-
-       up_write(&current->mm->mmap_sem);
-}
-
-struct ipath_user_pages_work {
-       struct work_struct work;
-       struct mm_struct *mm;
-       unsigned long num_pages;
-};
-
-static void user_pages_account(struct work_struct *_work)
-{
-       struct ipath_user_pages_work *work =
-               container_of(_work, struct ipath_user_pages_work, work);
-
-       down_write(&work->mm->mmap_sem);
-       work->mm->pinned_vm -= work->num_pages;
-       up_write(&work->mm->mmap_sem);
-       mmput(work->mm);
-       kfree(work);
-}
-
-void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
-{
-       struct ipath_user_pages_work *work;
-       struct mm_struct *mm;
-
-       __ipath_release_user_pages(p, num_pages, 1);
-
-       mm = get_task_mm(current);
-       if (!mm)
-               return;
-
-       work = kmalloc(sizeof(*work), GFP_KERNEL);
-       if (!work)
-               goto bail_mm;
-
-       INIT_WORK(&work->work, user_pages_account);
-       work->mm = mm;
-       work->num_pages = num_pages;
-
-       queue_work(ib_wq, &work->work);
-       return;
-
-bail_mm:
-       mmput(mm);
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.c b/drivers/staging/rdma/ipath/ipath_user_sdma.c
deleted file mode 100644 (file)
index 8c12e3c..0000000
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/dmapool.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/uio.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_user_sdma.h"
-
-/* minimum size of header */
-#define IPATH_USER_SDMA_MIN_HEADER_LENGTH      64
-/* expected size of headers (for dma_pool) */
-#define IPATH_USER_SDMA_EXP_HEADER_LENGTH      64
-/* length mask in PBC (lower 11 bits) */
-#define IPATH_PBC_LENGTH_MASK                  ((1 << 11) - 1)
-
-struct ipath_user_sdma_pkt {
-       u8 naddr;               /* dimension of addr (1..3) ... */
-       u32 counter;            /* sdma pkts queued counter for this entry */
-       u64 added;              /* global descq number of entries */
-
-       struct {
-               u32 offset;                     /* offset for kvaddr, addr */
-               u32 length;                     /* length in page */
-               u8  put_page;                   /* should we put_page? */
-               u8  dma_mapped;                 /* is page dma_mapped? */
-               struct page *page;              /* may be NULL (coherent mem) */
-               void *kvaddr;                   /* FIXME: only for pio hack */
-               dma_addr_t addr;
-       } addr[4];   /* max pages, any more and we coalesce */
-       struct list_head list;  /* list element */
-};
-
-struct ipath_user_sdma_queue {
-       /*
-        * pkts sent to dma engine are queued on this
-        * list head.  the type of the elements of this
-        * list are struct ipath_user_sdma_pkt...
-        */
-       struct list_head sent;
-
-       /* headers with expected length are allocated from here... */
-       char header_cache_name[64];
-       struct dma_pool *header_cache;
-
-       /* packets are allocated from the slab cache... */
-       char pkt_slab_name[64];
-       struct kmem_cache *pkt_slab;
-
-       /* as packets go on the queued queue, they are counted... */
-       u32 counter;
-       u32 sent_counter;
-
-       /* dma page table */
-       struct rb_root dma_pages_root;
-
-       /* protect everything above... */
-       struct mutex lock;
-};
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
-{
-       struct ipath_user_sdma_queue *pq =
-               kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
-
-       if (!pq)
-               goto done;
-
-       pq->counter = 0;
-       pq->sent_counter = 0;
-       INIT_LIST_HEAD(&pq->sent);
-
-       mutex_init(&pq->lock);
-
-       snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
-                "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
-       pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
-                                        sizeof(struct ipath_user_sdma_pkt),
-                                        0, 0, NULL);
-
-       if (!pq->pkt_slab)
-               goto err_kfree;
-
-       snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
-                "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
-       pq->header_cache = dma_pool_create(pq->header_cache_name,
-                                          dev,
-                                          IPATH_USER_SDMA_EXP_HEADER_LENGTH,
-                                          4, 0);
-       if (!pq->header_cache)
-               goto err_slab;
-
-       pq->dma_pages_root = RB_ROOT;
-
-       goto done;
-
-err_slab:
-       kmem_cache_destroy(pq->pkt_slab);
-err_kfree:
-       kfree(pq);
-       pq = NULL;
-
-done:
-       return pq;
-}
-
-static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
-                                     int i, size_t offset, size_t len,
-                                     int put_page, int dma_mapped,
-                                     struct page *page,
-                                     void *kvaddr, dma_addr_t dma_addr)
-{
-       pkt->addr[i].offset = offset;
-       pkt->addr[i].length = len;
-       pkt->addr[i].put_page = put_page;
-       pkt->addr[i].dma_mapped = dma_mapped;
-       pkt->addr[i].page = page;
-       pkt->addr[i].kvaddr = kvaddr;
-       pkt->addr[i].addr = dma_addr;
-}
-
-static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
-                                       u32 counter, size_t offset,
-                                       size_t len, int dma_mapped,
-                                       struct page *page,
-                                       void *kvaddr, dma_addr_t dma_addr)
-{
-       pkt->naddr = 1;
-       pkt->counter = counter;
-       ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
-                                 kvaddr, dma_addr);
-}
-
-/* we've too many pages in the iovec, coalesce to a single page */
-static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
-                                   struct ipath_user_sdma_pkt *pkt,
-                                   const struct iovec *iov,
-                                   unsigned long niov) {
-       int ret = 0;
-       struct page *page = alloc_page(GFP_KERNEL);
-       void *mpage_save;
-       char *mpage;
-       int i;
-       int len = 0;
-       dma_addr_t dma_addr;
-
-       if (!page) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       mpage = kmap(page);
-       mpage_save = mpage;
-       for (i = 0; i < niov; i++) {
-               int cfur;
-
-               cfur = copy_from_user(mpage,
-                                     iov[i].iov_base, iov[i].iov_len);
-               if (cfur) {
-                       ret = -EFAULT;
-                       goto free_unmap;
-               }
-
-               mpage += iov[i].iov_len;
-               len += iov[i].iov_len;
-       }
-
-       dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
-                               DMA_TO_DEVICE);
-       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-               ret = -ENOMEM;
-               goto free_unmap;
-       }
-
-       ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
-                                 dma_addr);
-       pkt->naddr = 2;
-
-       goto done;
-
-free_unmap:
-       kunmap(page);
-       __free_page(page);
-done:
-       return ret;
-}
-
-/* how many pages in this iovec element? */
-static int ipath_user_sdma_num_pages(const struct iovec *iov)
-{
-       const unsigned long addr  = (unsigned long) iov->iov_base;
-       const unsigned long  len  = iov->iov_len;
-       const unsigned long spage = addr & PAGE_MASK;
-       const unsigned long epage = (addr + len - 1) & PAGE_MASK;
-
-       return 1 + ((epage - spage) >> PAGE_SHIFT);
-}
-
-/* truncate length to page boundary */
-static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
-       const unsigned long offset = offset_in_page(addr);
-
-       return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
-static void ipath_user_sdma_free_pkt_frag(struct device *dev,
-                                         struct ipath_user_sdma_queue *pq,
-                                         struct ipath_user_sdma_pkt *pkt,
-                                         int frag)
-{
-       const int i = frag;
-
-       if (pkt->addr[i].page) {
-               if (pkt->addr[i].dma_mapped)
-                       dma_unmap_page(dev,
-                                      pkt->addr[i].addr,
-                                      pkt->addr[i].length,
-                                      DMA_TO_DEVICE);
-
-               if (pkt->addr[i].kvaddr)
-                       kunmap(pkt->addr[i].page);
-
-               if (pkt->addr[i].put_page)
-                       put_page(pkt->addr[i].page);
-               else
-                       __free_page(pkt->addr[i].page);
-       } else if (pkt->addr[i].kvaddr)
-               /* free coherent mem from cache... */
-               dma_pool_free(pq->header_cache,
-                             pkt->addr[i].kvaddr, pkt->addr[i].addr);
-}
-
-/* return number of pages pinned... */
-static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
-                                    struct ipath_user_sdma_pkt *pkt,
-                                    unsigned long addr, int tlen, int npages)
-{
-       struct page *pages[2];
-       int j;
-       int ret;
-
-       ret = get_user_pages_fast(addr, npages, 0, pages);
-       if (ret != npages) {
-               int i;
-
-               for (i = 0; i < ret; i++)
-                       put_page(pages[i]);
-
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       for (j = 0; j < npages; j++) {
-               /* map the pages... */
-               const int flen =
-                       ipath_user_sdma_page_length(addr, tlen);
-               dma_addr_t dma_addr =
-                       dma_map_page(&dd->pcidev->dev,
-                                    pages[j], 0, flen, DMA_TO_DEVICE);
-               unsigned long fofs = offset_in_page(addr);
-
-               if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-                       ret = -ENOMEM;
-                       goto done;
-               }
-
-               ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
-                                         pages[j], kmap(pages[j]),
-                                         dma_addr);
-
-               pkt->naddr++;
-               addr += flen;
-               tlen -= flen;
-       }
-
-done:
-       return ret;
-}
-
-static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
-                                  struct ipath_user_sdma_queue *pq,
-                                  struct ipath_user_sdma_pkt *pkt,
-                                  const struct iovec *iov,
-                                  unsigned long niov)
-{
-       int ret = 0;
-       unsigned long idx;
-
-       for (idx = 0; idx < niov; idx++) {
-               const int npages = ipath_user_sdma_num_pages(iov + idx);
-               const unsigned long addr = (unsigned long) iov[idx].iov_base;
-
-               ret = ipath_user_sdma_pin_pages(dd, pkt,
-                                               addr, iov[idx].iov_len,
-                                               npages);
-               if (ret < 0)
-                       goto free_pkt;
-       }
-
-       goto done;
-
-free_pkt:
-       for (idx = 0; idx < pkt->naddr; idx++)
-               ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
-
-done:
-       return ret;
-}
-
-static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
-                                       struct ipath_user_sdma_queue *pq,
-                                       struct ipath_user_sdma_pkt *pkt,
-                                       const struct iovec *iov,
-                                       unsigned long niov, int npages)
-{
-       int ret = 0;
-
-       if (npages >= ARRAY_SIZE(pkt->addr))
-               ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
-       else
-               ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
-
-       return ret;
-}
-
-/* free a packet list -- return counter value of last packet */
-static void ipath_user_sdma_free_pkt_list(struct device *dev,
-                                         struct ipath_user_sdma_queue *pq,
-                                         struct list_head *list)
-{
-       struct ipath_user_sdma_pkt *pkt, *pkt_next;
-
-       list_for_each_entry_safe(pkt, pkt_next, list, list) {
-               int i;
-
-               for (i = 0; i < pkt->naddr; i++)
-                       ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
-
-               kmem_cache_free(pq->pkt_slab, pkt);
-       }
-}
-
-/*
- * copy headers, coalesce etc -- pq->lock must be held
- *
- * we queue all the packets to list, returning the
- * number of bytes total.  list must be empty initially,
- * as, if there is an error we clean it...
- */
-static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
-                                     struct ipath_user_sdma_queue *pq,
-                                     struct list_head *list,
-                                     const struct iovec *iov,
-                                     unsigned long niov,
-                                     int maxpkts)
-{
-       unsigned long idx = 0;
-       int ret = 0;
-       int npkts = 0;
-       struct page *page = NULL;
-       __le32 *pbc;
-       dma_addr_t dma_addr;
-       struct ipath_user_sdma_pkt *pkt = NULL;
-       size_t len;
-       size_t nw;
-       u32 counter = pq->counter;
-       int dma_mapped = 0;
-
-       while (idx < niov && npkts < maxpkts) {
-               const unsigned long addr = (unsigned long) iov[idx].iov_base;
-               const unsigned long idx_save = idx;
-               unsigned pktnw;
-               unsigned pktnwc;
-               int nfrags = 0;
-               int npages = 0;
-               int cfur;
-
-               dma_mapped = 0;
-               len = iov[idx].iov_len;
-               nw = len >> 2;
-               page = NULL;
-
-               pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
-               if (!pkt) {
-                       ret = -ENOMEM;
-                       goto free_list;
-               }
-
-               if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
-                   len > PAGE_SIZE || len & 3 || addr & 3) {
-                       ret = -EINVAL;
-                       goto free_pkt;
-               }
-
-               if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
-                       pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
-                                            &dma_addr);
-               else
-                       pbc = NULL;
-
-               if (!pbc) {
-                       page = alloc_page(GFP_KERNEL);
-                       if (!page) {
-                               ret = -ENOMEM;
-                               goto free_pkt;
-                       }
-                       pbc = kmap(page);
-               }
-
-               cfur = copy_from_user(pbc, iov[idx].iov_base, len);
-               if (cfur) {
-                       ret = -EFAULT;
-                       goto free_pbc;
-               }
-
-               /*
-                * this assignment is a bit strange.  it's because the
-                * the pbc counts the number of 32 bit words in the full
-                * packet _except_ the first word of the pbc itself...
-                */
-               pktnwc = nw - 1;
-
-               /*
-                * pktnw computation yields the number of 32 bit words
-                * that the caller has indicated in the PBC.  note that
-                * this is one less than the total number of words that
-                * goes to the send DMA engine as the first 32 bit word
-                * of the PBC itself is not counted.  Armed with this count,
-                * we can verify that the packet is consistent with the
-                * iovec lengths.
-                */
-               pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
-               if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
-                       ret = -EINVAL;
-                       goto free_pbc;
-               }
-
-
-               idx++;
-               while (pktnwc < pktnw && idx < niov) {
-                       const size_t slen = iov[idx].iov_len;
-                       const unsigned long faddr =
-                               (unsigned long) iov[idx].iov_base;
-
-                       if (slen & 3 || faddr & 3 || !slen ||
-                           slen > PAGE_SIZE) {
-                               ret = -EINVAL;
-                               goto free_pbc;
-                       }
-
-                       npages++;
-                       if ((faddr & PAGE_MASK) !=
-                           ((faddr + slen - 1) & PAGE_MASK))
-                               npages++;
-
-                       pktnwc += slen >> 2;
-                       idx++;
-                       nfrags++;
-               }
-
-               if (pktnwc != pktnw) {
-                       ret = -EINVAL;
-                       goto free_pbc;
-               }
-
-               if (page) {
-                       dma_addr = dma_map_page(&dd->pcidev->dev,
-                                               page, 0, len, DMA_TO_DEVICE);
-                       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-                               ret = -ENOMEM;
-                               goto free_pbc;
-                       }
-
-                       dma_mapped = 1;
-               }
-
-               ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
-                                           page, pbc, dma_addr);
-
-               if (nfrags) {
-                       ret = ipath_user_sdma_init_payload(dd, pq, pkt,
-                                                          iov + idx_save + 1,
-                                                          nfrags, npages);
-                       if (ret < 0)
-                               goto free_pbc_dma;
-               }
-
-               counter++;
-               npkts++;
-
-               list_add_tail(&pkt->list, list);
-       }
-
-       ret = idx;
-       goto done;
-
-free_pbc_dma:
-       if (dma_mapped)
-               dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
-free_pbc:
-       if (page) {
-               kunmap(page);
-               __free_page(page);
-       } else
-               dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
-       kmem_cache_free(pq->pkt_slab, pkt);
-free_list:
-       ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
-done:
-       return ret;
-}
-
-static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
-                                                u32 c)
-{
-       pq->sent_counter = c;
-}
-
-/* try to clean out queue -- needs pq->lock */
-static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
-                                      struct ipath_user_sdma_queue *pq)
-{
-       struct list_head free_list;
-       struct ipath_user_sdma_pkt *pkt;
-       struct ipath_user_sdma_pkt *pkt_prev;
-       int ret = 0;
-
-       INIT_LIST_HEAD(&free_list);
-
-       list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
-               s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
-
-               if (descd < 0)
-                       break;
-
-               list_move_tail(&pkt->list, &free_list);
-
-               /* one more packet cleaned */
-               ret++;
-       }
-
-       if (!list_empty(&free_list)) {
-               u32 counter;
-
-               pkt = list_entry(free_list.prev,
-                                struct ipath_user_sdma_pkt, list);
-               counter = pkt->counter;
-
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-               ipath_user_sdma_set_complete_counter(pq, counter);
-       }
-
-       return ret;
-}
-
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
-{
-       if (!pq)
-               return;
-
-       kmem_cache_destroy(pq->pkt_slab);
-       dma_pool_destroy(pq->header_cache);
-       kfree(pq);
-}
-
-/* clean descriptor queue, returns > 0 if some elements cleaned */
-static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
-{
-       int ret;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       ret = ipath_sdma_make_progress(dd);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       return ret;
-}
-
-/* we're in close, drain packets so that we can cleanup successfully... */
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-                                struct ipath_user_sdma_queue *pq)
-{
-       int i;
-
-       if (!pq)
-               return;
-
-       for (i = 0; i < 100; i++) {
-               mutex_lock(&pq->lock);
-               if (list_empty(&pq->sent)) {
-                       mutex_unlock(&pq->lock);
-                       break;
-               }
-               ipath_user_sdma_hwqueue_clean(dd);
-               ipath_user_sdma_queue_clean(dd, pq);
-               mutex_unlock(&pq->lock);
-               msleep(10);
-       }
-
-       if (!list_empty(&pq->sent)) {
-               struct list_head free_list;
-
-               printk(KERN_INFO "drain: lists not empty: forcing!\n");
-               INIT_LIST_HEAD(&free_list);
-               mutex_lock(&pq->lock);
-               list_splice_init(&pq->sent, &free_list);
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-               mutex_unlock(&pq->lock);
-       }
-}
-
-static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
-                                          u64 addr, u64 dwlen, u64 dwoffset)
-{
-       return cpu_to_le64(/* SDmaPhyAddr[31:0] */
-                          ((addr & 0xfffffffcULL) << 32) |
-                          /* SDmaGeneration[1:0] */
-                          ((dd->ipath_sdma_generation & 3ULL) << 30) |
-                          /* SDmaDwordCount[10:0] */
-                          ((dwlen & 0x7ffULL) << 16) |
-                          /* SDmaBufOffset[12:2] */
-                          (dwoffset & 0x7ffULL));
-}
-
-static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
-{
-       return descq | cpu_to_le64(1ULL << 12);
-}
-
-static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
-{
-                                             /* last */  /* dma head */
-       return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
-}
-
-static inline __le64 ipath_sdma_make_desc1(u64 addr)
-{
-       /* SDmaPhyAddr[47:32] */
-       return cpu_to_le64(addr >> 32);
-}
-
-static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
-                                     struct ipath_user_sdma_pkt *pkt, int idx,
-                                     unsigned ofs, u16 tail)
-{
-       const u64 addr = (u64) pkt->addr[idx].addr +
-               (u64) pkt->addr[idx].offset;
-       const u64 dwlen = (u64) pkt->addr[idx].length / 4;
-       __le64 *descqp;
-       __le64 descq0;
-
-       descqp = &dd->ipath_sdma_descq[tail].qw[0];
-
-       descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
-       if (idx == 0)
-               descq0 = ipath_sdma_make_first_desc0(descq0);
-       if (idx == pkt->naddr - 1)
-               descq0 = ipath_sdma_make_last_desc0(descq0);
-
-       descqp[0] = descq0;
-       descqp[1] = ipath_sdma_make_desc1(addr);
-}
-
-/* pq->lock must be held, get packets on the wire... */
-static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
-                                    struct ipath_user_sdma_queue *pq,
-                                    struct list_head *pktlist)
-{
-       int ret = 0;
-       unsigned long flags;
-       u16 tail;
-
-       if (list_empty(pktlist))
-               return 0;
-
-       if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
-               return -ECOMM;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
-               ret = -ECOMM;
-               goto unlock;
-       }
-
-       tail = dd->ipath_sdma_descq_tail;
-       while (!list_empty(pktlist)) {
-               struct ipath_user_sdma_pkt *pkt =
-                       list_entry(pktlist->next, struct ipath_user_sdma_pkt,
-                                  list);
-               int i;
-               unsigned ofs = 0;
-               u16 dtail = tail;
-
-               if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
-                       goto unlock_check_tail;
-
-               for (i = 0; i < pkt->naddr; i++) {
-                       ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
-                       ofs += pkt->addr[i].length >> 2;
-
-                       if (++tail == dd->ipath_sdma_descq_cnt) {
-                               tail = 0;
-                               ++dd->ipath_sdma_generation;
-                       }
-               }
-
-               if ((ofs<<2) > dd->ipath_ibmaxlen) {
-                       ipath_dbg("packet size %X > ibmax %X, fail\n",
-                               ofs<<2, dd->ipath_ibmaxlen);
-                       ret = -EMSGSIZE;
-                       goto unlock;
-               }
-
-               /*
-                * if the packet is >= 2KB mtu equivalent, we have to use
-                * the large buffers, and have to mark each descriptor as
-                * part of a large buffer packet.
-                */
-               if (ofs >= IPATH_SMALLBUF_DWORDS) {
-                       for (i = 0; i < pkt->naddr; i++) {
-                               dd->ipath_sdma_descq[dtail].qw[0] |=
-                                       cpu_to_le64(1ULL << 14);
-                               if (++dtail == dd->ipath_sdma_descq_cnt)
-                                       dtail = 0;
-                       }
-               }
-
-               dd->ipath_sdma_descq_added += pkt->naddr;
-               pkt->added = dd->ipath_sdma_descq_added;
-               list_move_tail(&pkt->list, &pq->sent);
-               ret++;
-       }
-
-unlock_check_tail:
-       /* advance the tail on the chip if necessary */
-       if (dd->ipath_sdma_descq_tail != tail) {
-               wmb();
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-               dd->ipath_sdma_descq_tail = tail;
-       }
-
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       return ret;
-}
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-                          struct ipath_user_sdma_queue *pq,
-                          const struct iovec *iov,
-                          unsigned long dim)
-{
-       int ret = 0;
-       struct list_head list;
-       int npkts = 0;
-
-       INIT_LIST_HEAD(&list);
-
-       mutex_lock(&pq->lock);
-
-       if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
-               ipath_user_sdma_hwqueue_clean(dd);
-               ipath_user_sdma_queue_clean(dd, pq);
-       }
-
-       while (dim) {
-               const int mxp = 8;
-
-               ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
-               if (ret <= 0)
-                       goto done_unlock;
-               else {
-                       dim -= ret;
-                       iov += ret;
-               }
-
-               /* force packets onto the sdma hw queue... */
-               if (!list_empty(&list)) {
-                       /*
-                        * lazily clean hw queue.  the 4 is a guess of about
-                        * how many sdma descriptors a packet will take (it
-                        * doesn't have to be perfect).
-                        */
-                       if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
-                               ipath_user_sdma_hwqueue_clean(dd);
-                               ipath_user_sdma_queue_clean(dd, pq);
-                       }
-
-                       ret = ipath_user_sdma_push_pkts(dd, pq, &list);
-                       if (ret < 0)
-                               goto done_unlock;
-                       else {
-                               npkts += ret;
-                               pq->counter += ret;
-
-                               if (!list_empty(&list))
-                                       goto done_unlock;
-                       }
-               }
-       }
-
-done_unlock:
-       if (!list_empty(&list))
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
-       mutex_unlock(&pq->lock);
-
-       return (ret < 0) ? ret : npkts;
-}
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-                                 struct ipath_user_sdma_queue *pq)
-{
-       int ret = 0;
-
-       mutex_lock(&pq->lock);
-       ipath_user_sdma_hwqueue_clean(dd);
-       ret = ipath_user_sdma_queue_clean(dd, pq);
-       mutex_unlock(&pq->lock);
-
-       return ret;
-}
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
-{
-       return pq->sent_counter;
-}
-
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
-{
-       return pq->counter;
-}
-
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.h b/drivers/staging/rdma/ipath/ipath_user_sdma.h
deleted file mode 100644 (file)
index fc76316..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/device.h>
-
-struct ipath_user_sdma_queue;
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-                          struct ipath_user_sdma_queue *pq,
-                          const struct iovec *iov,
-                          unsigned long dim);
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-                                 struct ipath_user_sdma_queue *pq);
-
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-                                struct ipath_user_sdma_queue *pq);
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
deleted file mode 100644 (file)
index 1778dee..0000000
+++ /dev/null
@@ -1,2377 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_mad.h>
-#include <rdma/ib_user_verbs.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/utsname.h>
-#include <linux/rculist.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-static unsigned int ib_ipath_qp_table_size = 251;
-module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
-MODULE_PARM_DESC(qp_table_size, "QP table size");
-
-unsigned int ib_ipath_lkey_table_size = 12;
-module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
-                  S_IRUGO);
-MODULE_PARM_DESC(lkey_table_size,
-                "LKEY table size in bits (2^n, 1 <= n <= 23)");
-
-static unsigned int ib_ipath_max_pds = 0xFFFF;
-module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_pds,
-                "Maximum number of protection domains to support");
-
-static unsigned int ib_ipath_max_ahs = 0xFFFF;
-module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-
-unsigned int ib_ipath_max_cqes = 0x2FFFF;
-module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqes,
-                "Maximum number of completion queue entries to support");
-
-unsigned int ib_ipath_max_cqs = 0x1FFFF;
-module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
-
-unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
-module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
-                  S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-
-unsigned int ib_ipath_max_qps = 16384;
-module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
-
-unsigned int ib_ipath_max_sges = 0x60;
-module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
-
-unsigned int ib_ipath_max_mcast_grps = 16384;
-module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
-                  S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_grps,
-                "Maximum number of multicast groups to support");
-
-unsigned int ib_ipath_max_mcast_qp_attached = 16;
-module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_qp_attached,
-                "Maximum number of attached QPs to support");
-
-unsigned int ib_ipath_max_srqs = 1024;
-module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
-
-unsigned int ib_ipath_max_srq_sges = 128;
-module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
-
-unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
-module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
-
-static unsigned int ib_ipath_disable_sma;
-module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(disable_sma, "Disable the SMA");
-
-/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; ipath_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
-       [IB_QPS_RESET] = 0,
-       [IB_QPS_INIT] = IPATH_POST_RECV_OK,
-       [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
-       [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
-           IPATH_PROCESS_NEXT_SEND_OK,
-       [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
-       [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-       [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
-           IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-};
-
-struct ipath_ucontext {
-       struct ib_ucontext ibucontext;
-};
-
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-                                                 *ibucontext)
-{
-       return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
-       [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
-       [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
-       [IB_WR_SEND] = IB_WC_SEND,
-       [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
-       [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
-       [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
-       [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
- * System image GUID.
- */
-static __be64 sys_image_guid;
-
-/**
- * ipath_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               memcpy(sge->vaddr, data, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               data += len;
-               length -= len;
-       }
-}
-
-/**
- * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               length -= len;
-       }
-}
-
-/*
- * Count the number of DMA descriptors needed to send length bytes of data.
- * Don't modify the ipath_sge_state to get the count.
- * Return zero if any of the segments is not aligned.
- */
-static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sg_list = ss->sg_list;
-       struct ipath_sge sge = ss->sge;
-       u8 num_sge = ss->num_sge;
-       u32 ndesc = 1;  /* count the header */
-
-       while (length) {
-               u32 len = sge.length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge.sge_length)
-                       len = sge.sge_length;
-               BUG_ON(len == 0);
-               if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
-                   (len != length && (len & (sizeof(u32) - 1)))) {
-                       ndesc = 0;
-                       break;
-               }
-               ndesc++;
-               sge.vaddr += len;
-               sge.length -= len;
-               sge.sge_length -= len;
-               if (sge.sge_length == 0) {
-                       if (--num_sge)
-                               sge = *sg_list++;
-               } else if (sge.length == 0 && sge.mr != NULL) {
-                       if (++sge.n >= IPATH_SEGSZ) {
-                               if (++sge.m >= sge.mr->mapsz)
-                                       break;
-                               sge.n = 0;
-                       }
-                       sge.vaddr =
-                               sge.mr->map[sge.m]->segs[sge.n].vaddr;
-                       sge.length =
-                               sge.mr->map[sge.m]->segs[sge.n].length;
-               }
-               length -= len;
-       }
-       return ndesc;
-}
-
-/*
- * Copy from the SGEs to the data buffer.
- */
-static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
-                               u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               memcpy(data, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               data += len;
-               length -= len;
-       }
-}
-
-/**
- * ipath_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-       struct ipath_swqe *wqe;
-       u32 next;
-       int i;
-       int j;
-       int acc;
-       int ret;
-       unsigned long flags;
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (qp->ibqp.qp_type != IB_QPT_SMI &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               ret = -ENETDOWN;
-               goto bail;
-       }
-
-       /* Check that state is OK to post send. */
-       if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
-               goto bail_inval;
-
-       /* IB spec says that num_sge == 0 is OK. */
-       if (wr->num_sge > qp->s_max_sge)
-               goto bail_inval;
-
-       /*
-        * Don't allow RDMA reads or atomic operations on UC or
-        * undefined operations.
-        * Make sure buffer is large enough to hold the result for atomics.
-        */
-       if (qp->ibqp.qp_type == IB_QPT_UC) {
-               if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-                       goto bail_inval;
-       } else if (qp->ibqp.qp_type == IB_QPT_UD) {
-               /* Check UD opcode */
-               if (wr->opcode != IB_WR_SEND &&
-                   wr->opcode != IB_WR_SEND_WITH_IMM)
-                       goto bail_inval;
-               /* Check UD destination address PD */
-               if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-                       goto bail_inval;
-       } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-               goto bail_inval;
-       else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-                  (wr->num_sge == 0 ||
-                   wr->sg_list[0].length < sizeof(u64) ||
-                   wr->sg_list[0].addr & (sizeof(u64) - 1)))
-               goto bail_inval;
-       else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-               goto bail_inval;
-
-       next = qp->s_head + 1;
-       if (next >= qp->s_size)
-               next = 0;
-       if (next == qp->s_last) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       wqe = get_swqe_ptr(qp, qp->s_head);
-
-       if (qp->ibqp.qp_type != IB_QPT_UC &&
-           qp->ibqp.qp_type != IB_QPT_RC)
-               memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-       else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-                wr->opcode == IB_WR_RDMA_WRITE ||
-                wr->opcode == IB_WR_RDMA_READ)
-               memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-       else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-               memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-       else
-               memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-       wqe->length = 0;
-       if (wr->num_sge) {
-               acc = wr->opcode >= IB_WR_RDMA_READ ?
-                       IB_ACCESS_LOCAL_WRITE : 0;
-               for (i = 0, j = 0; i < wr->num_sge; i++) {
-                       u32 length = wr->sg_list[i].length;
-                       int ok;
-
-                       if (length == 0)
-                               continue;
-                       ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
-                                          &wr->sg_list[i], acc);
-                       if (!ok)
-                               goto bail_inval;
-                       wqe->length += length;
-                       j++;
-               }
-               wqe->wr.num_sge = j;
-       }
-       if (qp->ibqp.qp_type == IB_QPT_UC ||
-           qp->ibqp.qp_type == IB_QPT_RC) {
-               if (wqe->length > 0x80000000U)
-                       goto bail_inval;
-       } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
-               goto bail_inval;
-       wqe->ssn = qp->s_ssn++;
-       qp->s_head = next;
-
-       ret = 0;
-       goto bail;
-
-bail_inval:
-       ret = -EINVAL;
-bail:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-                          struct ib_send_wr **bad_wr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       int err = 0;
-
-       for (; wr; wr = wr->next) {
-               err = ipath_post_one_send(qp, wr);
-               if (err) {
-                       *bad_wr = wr;
-                       goto bail;
-               }
-       }
-
-       /* Try to do the send work in the caller's context. */
-       ipath_do_send((unsigned long) qp);
-
-bail:
-       return err;
-}
-
-/**
- * ipath_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-                             struct ib_recv_wr **bad_wr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_rwq *wq = qp->r_rq.wq;
-       unsigned long flags;
-       int ret;
-
-       /* Check that state is OK to post receive. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
-               *bad_wr = wr;
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       for (; wr; wr = wr->next) {
-               struct ipath_rwqe *wqe;
-               u32 next;
-               int i;
-
-               if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-                       *bad_wr = wr;
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               spin_lock_irqsave(&qp->r_rq.lock, flags);
-               next = wq->head + 1;
-               if (next >= qp->r_rq.size)
-                       next = 0;
-               if (next == wq->tail) {
-                       spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-                       *bad_wr = wr;
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-               wqe->wr_id = wr->wr_id;
-               wqe->num_sge = wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++)
-                       wqe->sg_list[i] = wr->sg_list[i];
-               /* Make sure queue entry is written before the head index. */
-               smp_wmb();
-               wq->head = next;
-               spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_qp_rcv - processing an incoming packet on a QP
- * @dev: the device the packet came on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_ib_rcv() to process an incoming packet
- * for the given QP.
- * Called at interrupt level.
- */
-static void ipath_qp_rcv(struct ipath_ibdev *dev,
-                        struct ipath_ib_header *hdr, int has_grh,
-                        void *data, u32 tlen, struct ipath_qp *qp)
-{
-       /* Check for valid receive state. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               return;
-       }
-
-       switch (qp->ibqp.qp_type) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               if (ib_ipath_disable_sma)
-                       break;
-               /* FALLTHROUGH */
-       case IB_QPT_UD:
-               ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       case IB_QPT_RC:
-               ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       case IB_QPT_UC:
-               ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       default:
-               break;
-       }
-}
-
-/**
- * ipath_ib_rcv - process an incoming packet
- * @arg: the device pointer
- * @rhdr: the header of the packet
- * @data: the packet data
- * @tlen: the packet length
- *
- * This is called from ipath_kreceive() to process an incoming packet at
- * interrupt level. Tlen is the length of the header + data + CRC in bytes.
- */
-void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
-                 u32 tlen)
-{
-       struct ipath_ib_header *hdr = rhdr;
-       struct ipath_other_headers *ohdr;
-       struct ipath_qp *qp;
-       u32 qp_num;
-       int lnh;
-       u8 opcode;
-       u16 lid;
-
-       if (unlikely(dev == NULL))
-               goto bail;
-
-       if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
-               dev->rcv_errors++;
-               goto bail;
-       }
-
-       /* Check for a valid destination LID (see ch. 7.11.1). */
-       lid = be16_to_cpu(hdr->lrh[1]);
-       if (lid < IPATH_MULTICAST_LID_BASE) {
-               lid &= ~((1 << dev->dd->ipath_lmc) - 1);
-               if (unlikely(lid != dev->dd->ipath_lid)) {
-                       dev->rcv_errors++;
-                       goto bail;
-               }
-       }
-
-       /* Check for GRH */
-       lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-       if (lnh == IPATH_LRH_BTH)
-               ohdr = &hdr->u.oth;
-       else if (lnh == IPATH_LRH_GRH)
-               ohdr = &hdr->u.l.oth;
-       else {
-               dev->rcv_errors++;
-               goto bail;
-       }
-
-       opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
-       dev->opstats[opcode].n_bytes += tlen;
-       dev->opstats[opcode].n_packets++;
-
-       /* Get the destination QP number. */
-       qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
-       if (qp_num == IPATH_MULTICAST_QPN) {
-               struct ipath_mcast *mcast;
-               struct ipath_mcast_qp *p;
-
-               if (lnh != IPATH_LRH_GRH) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
-               if (mcast == NULL) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               dev->n_multicast_rcv++;
-               list_for_each_entry_rcu(p, &mcast->qp_list, list)
-                       ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
-               /*
-                * Notify ipath_multicast_detach() if it is waiting for us
-                * to finish.
-                */
-               if (atomic_dec_return(&mcast->refcount) <= 1)
-                       wake_up(&mcast->wait);
-       } else {
-               qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
-               if (qp) {
-                       dev->n_unicast_rcv++;
-                       ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-                                    tlen, qp);
-                       /*
-                        * Notify ipath_destroy_qp() if it is waiting
-                        * for us to finish.
-                        */
-                       if (atomic_dec_and_test(&qp->refcount))
-                               wake_up(&qp->wait);
-               } else
-                       dev->n_pkt_drops++;
-       }
-
-bail:;
-}
-
-/**
- * ipath_ib_timer - verbs timer
- * @arg: the device pointer
- *
- * This is called from ipath_do_rcv_timer() at interrupt level to check for
- * QPs which need retransmits and to collect performance numbers.
- */
-static void ipath_ib_timer(struct ipath_ibdev *dev)
-{
-       struct ipath_qp *resend = NULL;
-       struct ipath_qp *rnr = NULL;
-       struct list_head *last;
-       struct ipath_qp *qp;
-       unsigned long flags;
-
-       if (dev == NULL)
-               return;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       /* Start filling the next pending queue. */
-       if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
-               dev->pending_index = 0;
-       /* Save any requests still in the new queue, they have timed out. */
-       last = &dev->pending[dev->pending_index];
-       while (!list_empty(last)) {
-               qp = list_entry(last->next, struct ipath_qp, timerwait);
-               list_del_init(&qp->timerwait);
-               qp->timer_next = resend;
-               resend = qp;
-               atomic_inc(&qp->refcount);
-       }
-       last = &dev->rnrwait;
-       if (!list_empty(last)) {
-               qp = list_entry(last->next, struct ipath_qp, timerwait);
-               if (--qp->s_rnr_timeout == 0) {
-                       do {
-                               list_del_init(&qp->timerwait);
-                               qp->timer_next = rnr;
-                               rnr = qp;
-                               atomic_inc(&qp->refcount);
-                               if (list_empty(last))
-                                       break;
-                               qp = list_entry(last->next, struct ipath_qp,
-                                               timerwait);
-                       } while (qp->s_rnr_timeout == 0);
-               }
-       }
-       /*
-        * We should only be in the started state if pma_sample_start != 0
-        */
-       if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
-           --dev->pma_sample_start == 0) {
-               dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-               ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
-                                       &dev->ipath_rword,
-                                       &dev->ipath_spkts,
-                                       &dev->ipath_rpkts,
-                                       &dev->ipath_xmit_wait);
-       }
-       if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
-               if (dev->pma_sample_interval == 0) {
-                       u64 ta, tb, tc, td, te;
-
-                       dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-                       ipath_snapshot_counters(dev->dd, &ta, &tb,
-                                               &tc, &td, &te);
-
-                       dev->ipath_sword = ta - dev->ipath_sword;
-                       dev->ipath_rword = tb - dev->ipath_rword;
-                       dev->ipath_spkts = tc - dev->ipath_spkts;
-                       dev->ipath_rpkts = td - dev->ipath_rpkts;
-                       dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
-               } else {
-                       dev->pma_sample_interval--;
-               }
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       /* XXX What if timer fires again while this is running? */
-       while (resend != NULL) {
-               qp = resend;
-               resend = qp->timer_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (qp->s_last != qp->s_tail &&
-                   ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-                       dev->n_timeouts++;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-               }
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-       while (rnr != NULL) {
-               qp = rnr;
-               rnr = qp->timer_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-}
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       sge->vaddr += length;
-       sge->length -= length;
-       sge->sge_length -= length;
-       if (sge->sge_length == 0) {
-               if (--ss->num_sge)
-                       *sge = *ss->sg_list++;
-       } else if (sge->length == 0 && sge->mr != NULL) {
-               if (++sge->n >= IPATH_SEGSZ) {
-                       if (++sge->m >= sge->mr->mapsz)
-                               return;
-                       sge->n = 0;
-               }
-               sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-               sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-       }
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-                   u32 length, unsigned flush_wc)
-{
-       u32 extra = 0;
-       u32 data = 0;
-       u32 last;
-
-       while (1) {
-               u32 len = ss->sge.length;
-               u32 off;
-
-               if (len > length)
-                       len = length;
-               if (len > ss->sge.sge_length)
-                       len = ss->sge.sge_length;
-               BUG_ON(len == 0);
-               /* If the source address is not aligned, try to align it. */
-               off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-               if (off) {
-                       u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-                                           ~(sizeof(u32) - 1));
-                       u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-                       u32 y;
-
-                       y = sizeof(u32) - off;
-                       if (len > y)
-                               len = y;
-                       if (len + extra >= sizeof(u32)) {
-                               data |= set_upper_bits(v, extra *
-                                                      BITS_PER_BYTE);
-                               len = sizeof(u32) - extra;
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               __raw_writel(data, piobuf);
-                               piobuf++;
-                               extra = 0;
-                               data = 0;
-                       } else {
-                               /* Clear unused upper bytes */
-                               data |= clear_upper_bytes(v, len, extra);
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               extra += len;
-                       }
-               } else if (extra) {
-                       /* Source address is aligned. */
-                       u32 *addr = (u32 *) ss->sge.vaddr;
-                       int shift = extra * BITS_PER_BYTE;
-                       int ushift = 32 - shift;
-                       u32 l = len;
-
-                       while (l >= sizeof(u32)) {
-                               u32 v = *addr;
-
-                               data |= set_upper_bits(v, shift);
-                               __raw_writel(data, piobuf);
-                               data = get_upper_bits(v, ushift);
-                               piobuf++;
-                               addr++;
-                               l -= sizeof(u32);
-                       }
-                       /*
-                        * We still have 'extra' number of bytes leftover.
-                        */
-                       if (l) {
-                               u32 v = *addr;
-
-                               if (l + extra >= sizeof(u32)) {
-                                       data |= set_upper_bits(v, shift);
-                                       len -= l + extra - sizeof(u32);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       __raw_writel(data, piobuf);
-                                       piobuf++;
-                                       extra = 0;
-                                       data = 0;
-                               } else {
-                                       /* Clear unused upper bytes */
-                                       data |= clear_upper_bytes(v, l,
-                                                                 extra);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       extra += l;
-                               }
-                       } else if (len == length) {
-                               last = data;
-                               break;
-                       }
-               } else if (len == length) {
-                       u32 w;
-
-                       /*
-                        * Need to round up for the last dword in the
-                        * packet.
-                        */
-                       w = (len + 3) >> 2;
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-                       piobuf += w - 1;
-                       last = ((u32 *) ss->sge.vaddr)[w - 1];
-                       break;
-               } else {
-                       u32 w = len >> 2;
-
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w);
-                       piobuf += w;
-
-                       extra = len & (sizeof(u32) - 1);
-                       if (extra) {
-                               u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-                               /* Clear unused upper bytes */
-                               data = clear_upper_bytes(v, extra, 0);
-                       }
-               }
-               update_sge(ss, len);
-               length -= len;
-       }
-       /* Update address before sending packet. */
-       update_sge(ss, length);
-       if (flush_wc) {
-               /* must flush early everything before trigger word */
-               ipath_flush_wc();
-               __raw_writel(last, piobuf);
-               /* be sure trigger word is written */
-               ipath_flush_wc();
-       } else
-               __raw_writel(last, piobuf);
-}
-
-/*
- * Convert IB rate to delay multiplier.
- */
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
-{
-       switch (rate) {
-       case IB_RATE_2_5_GBPS: return 8;
-       case IB_RATE_5_GBPS:   return 4;
-       case IB_RATE_10_GBPS:  return 2;
-       case IB_RATE_20_GBPS:  return 1;
-       default:               return 0;
-       }
-}
-
-/*
- * Convert delay multiplier to IB rate
- */
-static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
-{
-       switch (mult) {
-       case 8:  return IB_RATE_2_5_GBPS;
-       case 4:  return IB_RATE_5_GBPS;
-       case 2:  return IB_RATE_10_GBPS;
-       case 1:  return IB_RATE_20_GBPS;
-       default: return IB_RATE_PORT_CURRENT;
-       }
-}
-
-static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
-{
-       struct ipath_verbs_txreq *tx = NULL;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (!list_empty(&dev->txreq_free)) {
-               struct list_head *l = dev->txreq_free.next;
-
-               list_del(l);
-               tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-       return tx;
-}
-
-static inline void put_txreq(struct ipath_ibdev *dev,
-                            struct ipath_verbs_txreq *tx)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       list_add(&tx->txreq.list, &dev->txreq_free);
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-}
-
-static void sdma_complete(void *cookie, int status)
-{
-       struct ipath_verbs_txreq *tx = cookie;
-       struct ipath_qp *qp = tx->qp;
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       unsigned long flags;
-       enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
-               IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
-       if (atomic_dec_and_test(&qp->s_dma_busy)) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (tx->wqe)
-                       ipath_send_complete(qp, tx->wqe, ibs);
-               if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-                    qp->s_last != qp->s_head) ||
-                   (qp->s_flags & IPATH_S_WAIT_DMA))
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               wake_up(&qp->wait_dma);
-       } else if (tx->wqe) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               ipath_send_complete(qp, tx->wqe, ibs);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-       }
-
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-               kfree(tx->txreq.map_addr);
-       put_txreq(dev, tx);
-
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-static void decrement_dma_busy(struct ipath_qp *qp)
-{
-       unsigned long flags;
-
-       if (atomic_dec_and_test(&qp->s_dma_busy)) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-                    qp->s_last != qp->s_head) ||
-                   (qp->s_flags & IPATH_S_WAIT_DMA))
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               wake_up(&qp->wait_dma);
-       }
-}
-
-/*
- * Compute the number of clock cycles of delay before sending the next packet.
- * The multipliers reflect the number of clocks for the fastest rate so
- * one tick at 4xDDR is 8 ticks at 1xSDR.
- * If the destination port will take longer to receive a packet than
- * the outgoing link can send it, we need to delay sending the next packet
- * by the difference in time it takes the receiver to receive and the sender
- * to send this packet.
- * Note that this delay is always correct for UC and RC but not always
- * optimal for UD. For UD, the destination HCA can be different for each
- * packet, in which case, we could send packets to a different destination
- * while "waiting" for the delay. The overhead for doing this without
- * HW support is more than just paying the cost of delaying some packets
- * unnecessarily.
- */
-static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
-{
-       return (rcv_mult > snd_mult) ?
-               (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
-}
-
-static int ipath_verbs_send_dma(struct ipath_qp *qp,
-                               struct ipath_ib_header *hdr, u32 hdrwords,
-                               struct ipath_sge_state *ss, u32 len,
-                               u32 plen, u32 dwords)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_devdata *dd = dev->dd;
-       struct ipath_verbs_txreq *tx;
-       u32 *piobuf;
-       u32 control;
-       u32 ndesc;
-       int ret;
-
-       tx = qp->s_tx;
-       if (tx) {
-               qp->s_tx = NULL;
-               /* resend previously constructed packet */
-               atomic_inc(&qp->s_dma_busy);
-               ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
-               if (ret) {
-                       qp->s_tx = tx;
-                       decrement_dma_busy(qp);
-               }
-               goto bail;
-       }
-
-       tx = get_txreq(dev);
-       if (!tx) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       /*
-        * Get the saved delay count we computed for the previous packet
-        * and save the delay count for this packet to be used next time
-        * we get here.
-        */
-       control = qp->s_pkt_delay;
-       qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-       tx->qp = qp;
-       atomic_inc(&qp->refcount);
-       tx->wqe = qp->s_wqe;
-       tx->txreq.callback = sdma_complete;
-       tx->txreq.callback_cookie = tx;
-       tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
-               IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
-       if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-               tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
-
-       /* VL15 packets bypass credit check */
-       if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
-               control |= 1ULL << 31;
-               tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
-       }
-
-       if (len) {
-               /*
-                * Don't try to DMA if it takes more descriptors than
-                * the queue holds.
-                */
-               ndesc = ipath_count_sge(ss, len);
-               if (ndesc >= dd->ipath_sdma_descq_cnt)
-                       ndesc = 0;
-       } else
-               ndesc = 1;
-       if (ndesc) {
-               tx->hdr.pbc[0] = cpu_to_le32(plen);
-               tx->hdr.pbc[1] = cpu_to_le32(control);
-               memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
-               tx->txreq.sg_count = ndesc;
-               tx->map_len = (hdrwords + 2) << 2;
-               tx->txreq.map_addr = &tx->hdr;
-               atomic_inc(&qp->s_dma_busy);
-               ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
-               if (ret) {
-                       /* save ss and length in dwords */
-                       tx->ss = ss;
-                       tx->len = dwords;
-                       qp->s_tx = tx;
-                       decrement_dma_busy(qp);
-               }
-               goto bail;
-       }
-
-       /* Allocate a buffer and copy the header and payload to it. */
-       tx->map_len = (plen + 1) << 2;
-       piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
-       if (unlikely(piobuf == NULL)) {
-               ret = -EBUSY;
-               goto err_tx;
-       }
-       tx->txreq.map_addr = piobuf;
-       tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
-       tx->txreq.sg_count = 1;
-
-       *piobuf++ = (__force u32) cpu_to_le32(plen);
-       *piobuf++ = (__force u32) cpu_to_le32(control);
-       memcpy(piobuf, hdr, hdrwords << 2);
-       ipath_copy_from_sge(piobuf + hdrwords, ss, len);
-
-       atomic_inc(&qp->s_dma_busy);
-       ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
-       /*
-        * If we couldn't queue the DMA request, save the info
-        * and try again later rather than destroying the
-        * buffer and undoing the side effects of the copy.
-        */
-       if (ret) {
-               tx->ss = NULL;
-               tx->len = 0;
-               qp->s_tx = tx;
-               decrement_dma_busy(qp);
-       }
-       dev->n_unaligned++;
-       goto bail;
-
-err_tx:
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-       put_txreq(dev, tx);
-bail:
-       return ret;
-}
-
-static int ipath_verbs_send_pio(struct ipath_qp *qp,
-                               struct ipath_ib_header *ibhdr, u32 hdrwords,
-                               struct ipath_sge_state *ss, u32 len,
-                               u32 plen, u32 dwords)
-{
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-       u32 *hdr = (u32 *) ibhdr;
-       u32 __iomem *piobuf;
-       unsigned flush_wc;
-       u32 control;
-       int ret;
-       unsigned long flags;
-
-       piobuf = ipath_getpiobuf(dd, plen, NULL);
-       if (unlikely(piobuf == NULL)) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       /*
-        * Get the saved delay count we computed for the previous packet
-        * and save the delay count for this packet to be used next time
-        * we get here.
-        */
-       control = qp->s_pkt_delay;
-       qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-       /* VL15 packets bypass credit check */
-       if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
-               control |= 1ULL << 31;
-
-       /*
-        * Write the length to the control qword plus any needed flags.
-        * We have to flush after the PBC for correctness on some cpus
-        * or WC buffer can be written out of order.
-        */
-       writeq(((u64) control << 32) | plen, piobuf);
-       piobuf += 2;
-
-       flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
-       if (len == 0) {
-               /*
-                * If there is just the header portion, must flush before
-                * writing last word of header for correctness, and after
-                * the last header word (trigger word).
-                */
-               if (flush_wc) {
-                       ipath_flush_wc();
-                       __iowrite32_copy(piobuf, hdr, hdrwords - 1);
-                       ipath_flush_wc();
-                       __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-                       ipath_flush_wc();
-               } else
-                       __iowrite32_copy(piobuf, hdr, hdrwords);
-               goto done;
-       }
-
-       if (flush_wc)
-               ipath_flush_wc();
-       __iowrite32_copy(piobuf, hdr, hdrwords);
-       piobuf += hdrwords;
-
-       /* The common case is aligned and contained in one segment. */
-       if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-                  !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-               u32 *addr = (u32 *) ss->sge.vaddr;
-
-               /* Update address before sending packet. */
-               update_sge(ss, len);
-               if (flush_wc) {
-                       __iowrite32_copy(piobuf, addr, dwords - 1);
-                       /* must flush early everything before trigger word */
-                       ipath_flush_wc();
-                       __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
-                       /* be sure trigger word is written */
-                       ipath_flush_wc();
-               } else
-                       __iowrite32_copy(piobuf, addr, dwords);
-               goto done;
-       }
-       copy_io(piobuf, ss, len, flush_wc);
-done:
-       if (qp->s_wqe) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_verbs_send - send a packet
- * @qp: the QP to send on
- * @hdr: the packet header
- * @hdrwords: the number of 32-bit words in the header
- * @ss: the SGE to send
- * @len: the length of the packet in bytes
- */
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-                    u32 hdrwords, struct ipath_sge_state *ss, u32 len)
-{
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-       u32 plen;
-       int ret;
-       u32 dwords = (len + 3) >> 2;
-
-       /*
-        * Calculate the send buffer trigger address.
-        * The +1 counts for the pbc control dword following the pbc length.
-        */
-       plen = hdrwords + dwords + 1;
-
-       /*
-        * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-        * can defer SDMA restart until link goes ACTIVE without
-        * worrying about just how we got there.
-        */
-       if (qp->ibqp.qp_type == IB_QPT_SMI ||
-           !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-               ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
-                                          plen, dwords);
-       else
-               ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
-                                          plen, dwords);
-
-       return ret;
-}
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                           u64 *rwords, u64 *spkts, u64 *rpkts,
-                           u64 *xmit_wait)
-{
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ret = -EINVAL;
-               goto bail;
-       }
-       *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-       *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-       *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_get_counters(struct ipath_devdata *dd,
-                      struct ipath_verbs_counters *cntrs)
-{
-       struct ipath_cregs const *crp = dd->ipath_cregs;
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ret = -EINVAL;
-               goto bail;
-       }
-       cntrs->symbol_error_counter =
-               ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
-       cntrs->link_error_recovery_counter =
-               ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
-       /*
-        * The link downed counter counts when the other side downs the
-        * connection.  We add in the number of times we downed the link
-        * due to local link integrity errors to compensate.
-        */
-       cntrs->link_downed_counter =
-               ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
-       cntrs->port_rcv_errors =
-               ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
-               ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
-               ipath_snap_cntr(dd, crp->cr_portovflcnt) +
-               ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
-               ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
-               ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
-               ipath_snap_cntr(dd, crp->cr_erricrccnt) +
-               ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
-               ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
-               ipath_snap_cntr(dd, crp->cr_badformatcnt) +
-               dd->ipath_rxfc_unsupvl_errs;
-       if (crp->cr_rxotherlocalphyerrcnt)
-               cntrs->port_rcv_errors +=
-                       ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
-       if (crp->cr_rxvlerrcnt)
-               cntrs->port_rcv_errors +=
-                       ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
-       cntrs->port_rcv_remphys_errors =
-               ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
-       cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
-       cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
-       cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
-       cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
-       cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
-       cntrs->local_link_integrity_errors =
-               crp->cr_locallinkintegrityerrcnt ?
-               ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
-               ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-                dd->ipath_lli_errs : dd->ipath_lli_errors);
-       cntrs->excessive_buffer_overrun_errors =
-               crp->cr_excessbufferovflcnt ?
-               ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
-               dd->ipath_overrun_thresh_errs;
-       cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
-               ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_ib_piobufavail - callback when a PIO buffer is available
- * @arg: the device pointer
- *
- * This is called from ipath_intr() at interrupt level when a PIO buffer is
- * available after ipath_verbs_send() returned an error that no buffers were
- * available.  Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just restart the send tasklet and
- * return zero).
- */
-int ipath_ib_piobufavail(struct ipath_ibdev *dev)
-{
-       struct list_head *list;
-       struct ipath_qp *qplist;
-       struct ipath_qp *qp;
-       unsigned long flags;
-
-       if (dev == NULL)
-               goto bail;
-
-       list = &dev->piowait;
-       qplist = NULL;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       while (!list_empty(list)) {
-               qp = list_entry(list->next, struct ipath_qp, piowait);
-               list_del_init(&qp->piowait);
-               qp->pio_next = qplist;
-               qplist = qp;
-               atomic_inc(&qp->refcount);
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       while (qplist != NULL) {
-               qp = qplist;
-               qplist = qp->pio_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-
-bail:
-       return 0;
-}
-
-static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                             struct ib_udata *uhw)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       memset(props, 0, sizeof(*props));
-
-       props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-               IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-               IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-               IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-       props->page_size_cap = PAGE_SIZE;
-       props->vendor_id =
-               IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
-       props->vendor_part_id = dev->dd->ipath_deviceid;
-       props->hw_ver = dev->dd->ipath_pcirev;
-
-       props->sys_image_guid = dev->sys_image_guid;
-
-       props->max_mr_size = ~0ull;
-       props->max_qp = ib_ipath_max_qps;
-       props->max_qp_wr = ib_ipath_max_qp_wrs;
-       props->max_sge = ib_ipath_max_sges;
-       props->max_sge_rd = ib_ipath_max_sges;
-       props->max_cq = ib_ipath_max_cqs;
-       props->max_ah = ib_ipath_max_ahs;
-       props->max_cqe = ib_ipath_max_cqes;
-       props->max_mr = dev->lk_table.max;
-       props->max_fmr = dev->lk_table.max;
-       props->max_map_per_fmr = 32767;
-       props->max_pd = ib_ipath_max_pds;
-       props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
-       props->max_qp_init_rd_atom = 255;
-       /* props->max_res_rd_atom */
-       props->max_srq = ib_ipath_max_srqs;
-       props->max_srq_wr = ib_ipath_max_srq_wrs;
-       props->max_srq_sge = ib_ipath_max_srq_sges;
-       /* props->local_ca_ack_delay */
-       props->atomic_cap = IB_ATOMIC_GLOB;
-       props->max_pkeys = ipath_get_npkeys(dev->dd);
-       props->max_mcast_grp = ib_ipath_max_mcast_grps;
-       props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
-       props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-               props->max_mcast_grp;
-
-       return 0;
-}
-
-const u8 ipath_cvt_physportstate[32] = {
-       [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
-       [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
-       [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
-       [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
-       [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
-       [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
-       [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
-};
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
-{
-       return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-static int ipath_query_port(struct ib_device *ibdev,
-                           u8 port, struct ib_port_attr *props)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_devdata *dd = dev->dd;
-       enum ib_mtu mtu;
-       u16 lid = dd->ipath_lid;
-       u64 ibcstat;
-
-       memset(props, 0, sizeof(*props));
-       props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
-       props->lmc = dd->ipath_lmc;
-       props->sm_lid = dev->sm_lid;
-       props->sm_sl = dev->sm_sl;
-       ibcstat = dd->ipath_lastibcstat;
-       /* map LinkState to IB portinfo values.  */
-       props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
-
-       /* See phys_state_show() */
-       props->phys_state = /* MEA: assumes shift == 0 */
-               ipath_cvt_physportstate[dd->ipath_lastibcstat &
-               dd->ibcs_lts_mask];
-       props->port_cap_flags = dev->port_cap_flags;
-       props->gid_tbl_len = 1;
-       props->max_msg_sz = 0x80000000;
-       props->pkey_tbl_len = ipath_get_npkeys(dd);
-       props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
-               dev->z_pkey_violations;
-       props->qkey_viol_cntr = dev->qkey_violations;
-       props->active_width = dd->ipath_link_width_active;
-       /* See rate_show() */
-       props->active_speed = dd->ipath_link_speed_active;
-       props->max_vl_num = 1;          /* VLCap = VL0 */
-       props->init_type_reply = 0;
-
-       props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-       switch (dd->ipath_ibmtu) {
-       case 4096:
-               mtu = IB_MTU_4096;
-               break;
-       case 2048:
-               mtu = IB_MTU_2048;
-               break;
-       case 1024:
-               mtu = IB_MTU_1024;
-               break;
-       case 512:
-               mtu = IB_MTU_512;
-               break;
-       case 256:
-               mtu = IB_MTU_256;
-               break;
-       default:
-               mtu = IB_MTU_2048;
-       }
-       props->active_mtu = mtu;
-       props->subnet_timeout = dev->subnet_timeout;
-
-       return 0;
-}
-
-static int ipath_modify_device(struct ib_device *device,
-                              int device_modify_mask,
-                              struct ib_device_modify *device_modify)
-{
-       int ret;
-
-       if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
-                                  IB_DEVICE_MODIFY_NODE_DESC)) {
-               ret = -EOPNOTSUPP;
-               goto bail;
-       }
-
-       if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
-               memcpy(device->node_desc, device_modify->node_desc, 64);
-
-       if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
-               to_idev(device)->sys_image_guid =
-                       cpu_to_be64(device_modify->sys_image_guid);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_modify_port(struct ib_device *ibdev,
-                            u8 port, int port_modify_mask,
-                            struct ib_port_modify *props)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-
-       dev->port_cap_flags |= props->set_port_cap_mask;
-       dev->port_cap_flags &= ~props->clr_port_cap_mask;
-       if (port_modify_mask & IB_PORT_SHUTDOWN)
-               ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
-       if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-               dev->qkey_violations = 0;
-       return 0;
-}
-
-static int ipath_query_gid(struct ib_device *ibdev, u8 port,
-                          int index, union ib_gid *gid)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       if (index >= 1) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       gid->global.subnet_prefix = dev->gid_prefix;
-       gid->global.interface_id = dev->dd->ipath_guid;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
-                                   struct ib_ucontext *context,
-                                   struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_pd *pd;
-       struct ib_pd *ret;
-
-       /*
-        * This is actually totally arbitrary.  Some correctness tests
-        * assume there's a maximum number of PDs that can be allocated.
-        * We don't actually have this limit, but we fail the test if
-        * we allow allocations of more than we report for this value.
-        */
-
-       pd = kmalloc(sizeof *pd, GFP_KERNEL);
-       if (!pd) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       spin_lock(&dev->n_pds_lock);
-       if (dev->n_pds_allocated == ib_ipath_max_pds) {
-               spin_unlock(&dev->n_pds_lock);
-               kfree(pd);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       dev->n_pds_allocated++;
-       spin_unlock(&dev->n_pds_lock);
-
-       /* ib_alloc_pd() will initialize pd->ibpd. */
-       pd->user = udata != NULL;
-
-       ret = &pd->ibpd;
-
-bail:
-       return ret;
-}
-
-static int ipath_dealloc_pd(struct ib_pd *ibpd)
-{
-       struct ipath_pd *pd = to_ipd(ibpd);
-       struct ipath_ibdev *dev = to_idev(ibpd->device);
-
-       spin_lock(&dev->n_pds_lock);
-       dev->n_pds_allocated--;
-       spin_unlock(&dev->n_pds_lock);
-
-       kfree(pd);
-
-       return 0;
-}
-
-/**
- * ipath_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
-                                    struct ib_ah_attr *ah_attr)
-{
-       struct ipath_ah *ah;
-       struct ib_ah *ret;
-       struct ipath_ibdev *dev = to_idev(pd->device);
-       unsigned long flags;
-
-       /* A multicast address requires a GRH (see ch. 8.4.1). */
-       if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-           ah_attr->dlid != IPATH_PERMISSIVE_LID &&
-           !(ah_attr->ah_flags & IB_AH_GRH)) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (ah_attr->dlid == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (ah_attr->port_num < 1 ||
-           ah_attr->port_num > pd->device->phys_port_cnt) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       ah = kmalloc(sizeof *ah, GFP_ATOMIC);
-       if (!ah) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dev->n_ahs_lock, flags);
-       if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-               spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-               kfree(ah);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       dev->n_ahs_allocated++;
-       spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-       /* ib_create_ah() will initialize ah->ibah. */
-       ah->attr = *ah_attr;
-       ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
-
-       ret = &ah->ibah;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int ipath_destroy_ah(struct ib_ah *ibah)
-{
-       struct ipath_ibdev *dev = to_idev(ibah->device);
-       struct ipath_ah *ah = to_iah(ibah);
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->n_ahs_lock, flags);
-       dev->n_ahs_allocated--;
-       spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-       kfree(ah);
-
-       return 0;
-}
-
-static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-       struct ipath_ah *ah = to_iah(ibah);
-
-       *ah_attr = ah->attr;
-       ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
-
-       return 0;
-}
-
-/**
- * ipath_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_get_npkeys(struct ipath_devdata *dd)
-{
-       return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-/**
- * ipath_get_pkey - return the indexed PKEY from the port PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-       unsigned ret;
-
-       /* always a kernel port, no locking needed */
-       if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-               ret = 0;
-       else
-               ret = dd->ipath_pd[0]->port_pkeys[index];
-
-       return ret;
-}
-
-static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-                           u16 *pkey)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       if (index >= ipath_get_npkeys(dev->dd)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       *pkey = ipath_get_pkey(dev->dd, index);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the InfiniPath driver
- */
-
-static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
-                                               struct ib_udata *udata)
-{
-       struct ipath_ucontext *context;
-       struct ib_ucontext *ret;
-
-       context = kmalloc(sizeof *context, GFP_KERNEL);
-       if (!context) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       ret = &context->ibucontext;
-
-bail:
-       return ret;
-}
-
-static int ipath_dealloc_ucontext(struct ib_ucontext *context)
-{
-       kfree(to_iucontext(context));
-       return 0;
-}
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev);
-
-static void __verbs_timer(unsigned long arg)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-       /* Handle verbs layer timeouts. */
-       ipath_ib_timer(dd->verbs_dev);
-
-       mod_timer(&dd->verbs_timer, jiffies + 1);
-}
-
-static int enable_timer(struct ipath_devdata *dd)
-{
-       /*
-        * Early chips had a design flaw where the chip and kernel idea
-        * of the tail register don't always agree, and therefore we won't
-        * get an interrupt on the next packet received.
-        * If the board supports per packet receive interrupts, use it.
-        * Otherwise, the timer function periodically checks for packets
-        * to cover this case.
-        * Either way, the timer is needed for verbs layer related
-        * processing.
-        */
-       if (dd->ipath_flags & IPATH_GPIO_INTR) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-                                0x2074076542310ULL);
-               /* Enable GPIO bit 2 interrupt */
-               dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-       }
-
-       setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
-
-       dd->verbs_timer.expires = jiffies + 1;
-       add_timer(&dd->verbs_timer);
-
-       return 0;
-}
-
-static int disable_timer(struct ipath_devdata *dd)
-{
-       /* Disable GPIO bit 2 interrupt */
-       if (dd->ipath_flags & IPATH_GPIO_INTR) {
-                /* Disable GPIO bit 2 interrupt */
-               dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-               /*
-                * We might want to undo changes to debugportselect,
-                * but how?
-                */
-       }
-
-       del_timer_sync(&dd->verbs_timer);
-
-       return 0;
-}
-
-static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
-                               struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = ipath_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-       return 0;
-}
-
-/**
- * ipath_register_ib_device - register our device with the infiniband core
- * @dd: the device data structure
- * Return the allocated ipath_ibdev pointer or NULL on error.
- */
-int ipath_register_ib_device(struct ipath_devdata *dd)
-{
-       struct ipath_verbs_counters cntrs;
-       struct ipath_ibdev *idev;
-       struct ib_device *dev;
-       struct ipath_verbs_txreq *tx;
-       unsigned i;
-       int ret;
-
-       idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-       if (idev == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       dev = &idev->ibdev;
-
-       if (dd->ipath_sdma_descq_cnt) {
-               tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
-                                  GFP_KERNEL);
-               if (tx == NULL) {
-                       ret = -ENOMEM;
-                       goto err_tx;
-               }
-       } else
-               tx = NULL;
-       idev->txreq_bufs = tx;
-
-       /* Only need to initialize non-zero fields. */
-       spin_lock_init(&idev->n_pds_lock);
-       spin_lock_init(&idev->n_ahs_lock);
-       spin_lock_init(&idev->n_cqs_lock);
-       spin_lock_init(&idev->n_qps_lock);
-       spin_lock_init(&idev->n_srqs_lock);
-       spin_lock_init(&idev->n_mcast_grps_lock);
-
-       spin_lock_init(&idev->qp_table.lock);
-       spin_lock_init(&idev->lk_table.lock);
-       idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-       /* Set the prefix to the default value (see ch. 4.1.1) */
-       idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
-
-       ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
-       if (ret)
-               goto err_qp;
-
-       /*
-        * The top ib_ipath_lkey_table_size bits are used to index the
-        * table.  The lower 8 bits can be owned by the user (copied from
-        * the LKEY).  The remaining bits act as a generation number or tag.
-        */
-       idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
-       idev->lk_table.table = kcalloc(idev->lk_table.max,
-                                      sizeof(*idev->lk_table.table),
-                                      GFP_KERNEL);
-       if (idev->lk_table.table == NULL) {
-               ret = -ENOMEM;
-               goto err_lk;
-       }
-       INIT_LIST_HEAD(&idev->pending_mmaps);
-       spin_lock_init(&idev->pending_lock);
-       idev->mmap_offset = PAGE_SIZE;
-       spin_lock_init(&idev->mmap_offset_lock);
-       INIT_LIST_HEAD(&idev->pending[0]);
-       INIT_LIST_HEAD(&idev->pending[1]);
-       INIT_LIST_HEAD(&idev->pending[2]);
-       INIT_LIST_HEAD(&idev->piowait);
-       INIT_LIST_HEAD(&idev->rnrwait);
-       INIT_LIST_HEAD(&idev->txreq_free);
-       idev->pending_index = 0;
-       idev->port_cap_flags =
-               IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
-       if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
-               idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-       idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-       idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-       idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-       idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-       idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
-       /* Snapshot current HW counters to "clear" them. */
-       ipath_get_counters(dd, &cntrs);
-       idev->z_symbol_error_counter = cntrs.symbol_error_counter;
-       idev->z_link_error_recovery_counter =
-               cntrs.link_error_recovery_counter;
-       idev->z_link_downed_counter = cntrs.link_downed_counter;
-       idev->z_port_rcv_errors = cntrs.port_rcv_errors;
-       idev->z_port_rcv_remphys_errors =
-               cntrs.port_rcv_remphys_errors;
-       idev->z_port_xmit_discards = cntrs.port_xmit_discards;
-       idev->z_port_xmit_data = cntrs.port_xmit_data;
-       idev->z_port_rcv_data = cntrs.port_rcv_data;
-       idev->z_port_xmit_packets = cntrs.port_xmit_packets;
-       idev->z_port_rcv_packets = cntrs.port_rcv_packets;
-       idev->z_local_link_integrity_errors =
-               cntrs.local_link_integrity_errors;
-       idev->z_excessive_buffer_overrun_errors =
-               cntrs.excessive_buffer_overrun_errors;
-       idev->z_vl15_dropped = cntrs.vl15_dropped;
-
-       for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
-               list_add(&tx->txreq.list, &idev->txreq_free);
-
-       /*
-        * The system image GUID is supposed to be the same for all
-        * IB HCAs in a single system but since there can be other
-        * device types in the system, we can't be sure this is unique.
-        */
-       if (!sys_image_guid)
-               sys_image_guid = dd->ipath_guid;
-       idev->sys_image_guid = sys_image_guid;
-       idev->ib_unit = dd->ipath_unit;
-       idev->dd = dd;
-
-       strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
-       dev->owner = THIS_MODULE;
-       dev->node_guid = dd->ipath_guid;
-       dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
-       dev->uverbs_cmd_mask =
-               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-               (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-               (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
-               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-               (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-               (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-               (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-               (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-               (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-               (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-       dev->node_type = RDMA_NODE_IB_CA;
-       dev->phys_port_cnt = 1;
-       dev->num_comp_vectors = 1;
-       dev->dma_device = &dd->pcidev->dev;
-       dev->query_device = ipath_query_device;
-       dev->modify_device = ipath_modify_device;
-       dev->query_port = ipath_query_port;
-       dev->modify_port = ipath_modify_port;
-       dev->query_pkey = ipath_query_pkey;
-       dev->query_gid = ipath_query_gid;
-       dev->alloc_ucontext = ipath_alloc_ucontext;
-       dev->dealloc_ucontext = ipath_dealloc_ucontext;
-       dev->alloc_pd = ipath_alloc_pd;
-       dev->dealloc_pd = ipath_dealloc_pd;
-       dev->create_ah = ipath_create_ah;
-       dev->destroy_ah = ipath_destroy_ah;
-       dev->query_ah = ipath_query_ah;
-       dev->create_srq = ipath_create_srq;
-       dev->modify_srq = ipath_modify_srq;
-       dev->query_srq = ipath_query_srq;
-       dev->destroy_srq = ipath_destroy_srq;
-       dev->create_qp = ipath_create_qp;
-       dev->modify_qp = ipath_modify_qp;
-       dev->query_qp = ipath_query_qp;
-       dev->destroy_qp = ipath_destroy_qp;
-       dev->post_send = ipath_post_send;
-       dev->post_recv = ipath_post_receive;
-       dev->post_srq_recv = ipath_post_srq_receive;
-       dev->create_cq = ipath_create_cq;
-       dev->destroy_cq = ipath_destroy_cq;
-       dev->resize_cq = ipath_resize_cq;
-       dev->poll_cq = ipath_poll_cq;
-       dev->req_notify_cq = ipath_req_notify_cq;
-       dev->get_dma_mr = ipath_get_dma_mr;
-       dev->reg_phys_mr = ipath_reg_phys_mr;
-       dev->reg_user_mr = ipath_reg_user_mr;
-       dev->dereg_mr = ipath_dereg_mr;
-       dev->alloc_fmr = ipath_alloc_fmr;
-       dev->map_phys_fmr = ipath_map_phys_fmr;
-       dev->unmap_fmr = ipath_unmap_fmr;
-       dev->dealloc_fmr = ipath_dealloc_fmr;
-       dev->attach_mcast = ipath_multicast_attach;
-       dev->detach_mcast = ipath_multicast_detach;
-       dev->process_mad = ipath_process_mad;
-       dev->mmap = ipath_mmap;
-       dev->dma_ops = &ipath_dma_mapping_ops;
-       dev->get_port_immutable = ipath_port_immutable;
-
-       snprintf(dev->node_desc, sizeof(dev->node_desc),
-                IPATH_IDSTR " %s", init_utsname()->nodename);
-
-       ret = ib_register_device(dev, NULL);
-       if (ret)
-               goto err_reg;
-
-       ret = ipath_verbs_register_sysfs(dev);
-       if (ret)
-               goto err_class;
-
-       enable_timer(dd);
-
-       goto bail;
-
-err_class:
-       ib_unregister_device(dev);
-err_reg:
-       kfree(idev->lk_table.table);
-err_lk:
-       kfree(idev->qp_table.table);
-err_qp:
-       kfree(idev->txreq_bufs);
-err_tx:
-       ib_dealloc_device(dev);
-       ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-       idev = NULL;
-
-bail:
-       dd->verbs_dev = idev;
-       return ret;
-}
-
-void ipath_unregister_ib_device(struct ipath_ibdev *dev)
-{
-       struct ib_device *ibdev = &dev->ibdev;
-       u32 qps_inuse;
-
-       ib_unregister_device(ibdev);
-
-       disable_timer(dev->dd);
-
-       if (!list_empty(&dev->pending[0]) ||
-           !list_empty(&dev->pending[1]) ||
-           !list_empty(&dev->pending[2]))
-               ipath_dev_err(dev->dd, "pending list not empty!\n");
-       if (!list_empty(&dev->piowait))
-               ipath_dev_err(dev->dd, "piowait list not empty!\n");
-       if (!list_empty(&dev->rnrwait))
-               ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
-       if (!ipath_mcast_tree_empty())
-               ipath_dev_err(dev->dd, "multicast table memory leak!\n");
-       /*
-        * Note that ipath_unregister_ib_device() can be called before all
-        * the QPs are destroyed!
-        */
-       qps_inuse = ipath_free_all_qps(&dev->qp_table);
-       if (qps_inuse)
-               ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
-                       qps_inuse);
-       kfree(dev->qp_table.table);
-       kfree(dev->lk_table.table);
-       kfree(dev->txreq_bufs);
-       ib_dealloc_device(ibdev);
-}
-
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-
-       return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-       int ret;
-
-       ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
-       if (ret < 0)
-               goto bail;
-       strcat(buf, "\n");
-       ret = strlen(buf);
-
-bail:
-       return ret;
-}
-
-static ssize_t show_stats(struct device *device, struct device_attribute *attr,
-                         char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-       int i;
-       int len;
-
-       len = sprintf(buf,
-                     "RC resends  %d\n"
-                     "RC no QACK  %d\n"
-                     "RC ACKs     %d\n"
-                     "RC SEQ NAKs %d\n"
-                     "RC RDMA seq %d\n"
-                     "RC RNR NAKs %d\n"
-                     "RC OTH NAKs %d\n"
-                     "RC timeouts %d\n"
-                     "RC RDMA dup %d\n"
-                     "piobuf wait %d\n"
-                     "unaligned   %d\n"
-                     "PKT drops   %d\n"
-                     "WQE errs    %d\n",
-                     dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
-                     dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
-                     dev->n_other_naks, dev->n_timeouts,
-                     dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
-                     dev->n_pkt_drops, dev->n_wqe_errs);
-       for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
-               const struct ipath_opcode_stats *si = &dev->opstats[i];
-
-               if (!si->n_packets && !si->n_bytes)
-                       continue;
-               len += sprintf(buf + len, "%02x %llu/%llu\n", i,
-                              (unsigned long long) si->n_packets,
-                              (unsigned long long) si->n_bytes);
-       }
-       return len;
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
-
-static struct device_attribute *ipath_class_attributes[] = {
-       &dev_attr_hw_rev,
-       &dev_attr_hca_type,
-       &dev_attr_board_id,
-       &dev_attr_stats
-};
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev)
-{
-       int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
-               ret = device_create_file(&dev->dev,
-                                      ipath_class_attributes[i]);
-               if (ret)
-                       goto bail;
-       }
-       return 0;
-bail:
-       for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
-               device_remove_file(&dev->dev, ipath_class_attributes[i]);
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
deleted file mode 100644 (file)
index 0a90a56..0000000
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IPATH_VERBS_H
-#define IPATH_VERBS_H
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/kref.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_user_verbs.h>
-
-#include "ipath_kernel.h"
-
-#define IPATH_MAX_RDMA_ATOMIC  4
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define IPATH_UVERBS_ABI_VERSION       2
-
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE     (IB_CQ_NEXT_COMP + 1)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK                     0x20
-#define IB_NAK_PSN_ERROR               0x60
-#define IB_NAK_INVALID_REQUEST         0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR     0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST      0x64
-
-/* Flags for checking QP state (see ib_ipath_state_ops[]) */
-#define IPATH_POST_SEND_OK             0x01
-#define IPATH_POST_RECV_OK             0x02
-#define IPATH_PROCESS_RECV_OK          0x04
-#define IPATH_PROCESS_SEND_OK          0x08
-#define IPATH_PROCESS_NEXT_SEND_OK     0x10
-#define IPATH_FLUSH_SEND               0x20
-#define IPATH_FLUSH_RECV               0x40
-#define IPATH_PROCESS_OR_FLUSH_SEND \
-       (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
-
-/* IB Performance Manager status values */
-#define IB_PMA_SAMPLE_STATUS_DONE      0x00
-#define IB_PMA_SAMPLE_STATUS_STARTED   0x01
-#define IB_PMA_SAMPLE_STATUS_RUNNING   0x02
-
-/* Mandatory IB performance counter select values. */
-#define IB_PMA_PORT_XMIT_DATA  cpu_to_be16(0x0001)
-#define IB_PMA_PORT_RCV_DATA   cpu_to_be16(0x0002)
-#define IB_PMA_PORT_XMIT_PKTS  cpu_to_be16(0x0003)
-#define IB_PMA_PORT_RCV_PKTS   cpu_to_be16(0x0004)
-#define IB_PMA_PORT_XMIT_WAIT  cpu_to_be16(0x0005)
-
-struct ib_reth {
-       __be64 vaddr;
-       __be32 rkey;
-       __be32 length;
-} __attribute__ ((packed));
-
-struct ib_atomic_eth {
-       __be32 vaddr[2];        /* unaligned so access as 2 32-bit words */
-       __be32 rkey;
-       __be64 swap_data;
-       __be64 compare_data;
-} __attribute__ ((packed));
-
-struct ipath_other_headers {
-       __be32 bth[3];
-       union {
-               struct {
-                       __be32 deth[2];
-                       __be32 imm_data;
-               } ud;
-               struct {
-                       struct ib_reth reth;
-                       __be32 imm_data;
-               } rc;
-               struct {
-                       __be32 aeth;
-                       __be32 atomic_ack_eth[2];
-               } at;
-               __be32 imm_data;
-               __be32 aeth;
-               struct ib_atomic_eth atomic_eth;
-       } u;
-} __attribute__ ((packed));
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
- * will be in the eager header buffer.  The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct ipath_ib_header {
-       __be16 lrh[4];
-       union {
-               struct {
-                       struct ib_grh grh;
-                       struct ipath_other_headers oth;
-               } l;
-               struct ipath_other_headers oth;
-       } u;
-} __attribute__ ((packed));
-
-struct ipath_pio_header {
-       __le32 pbc[2];
-       struct ipath_ib_header hdr;
-} __attribute__ ((packed));
-
-/*
- * There is one struct ipath_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct ipath_mcast_qp.
- */
-struct ipath_mcast_qp {
-       struct list_head list;
-       struct ipath_qp *qp;
-};
-
-struct ipath_mcast {
-       struct rb_node rb_node;
-       union ib_gid mgid;
-       struct list_head qp_list;
-       wait_queue_head_t wait;
-       atomic_t refcount;
-       int n_attached;
-};
-
-/* Protection domain */
-struct ipath_pd {
-       struct ib_pd ibpd;
-       int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct ipath_ah {
-       struct ib_ah ibah;
-       struct ib_ah_attr attr;
-};
-
-/*
- * This structure is used by ipath_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct ipath_mmap_info {
-       struct list_head pending_mmaps;
-       struct ib_ucontext *context;
-       void *obj;
-       __u64 offset;
-       struct kref ref;
-       unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct ipath_cq_wc {
-       u32 head;               /* index of next entry to fill */
-       u32 tail;               /* index of next ib_poll_cq() entry */
-       union {
-               /* these are actually size ibcq.cqe + 1 */
-               struct ib_uverbs_wc uqueue[0];
-               struct ib_wc kqueue[0];
-       };
-};
-
-/*
- * The completion queue structure.
- */
-struct ipath_cq {
-       struct ib_cq ibcq;
-       struct tasklet_struct comptask;
-       spinlock_t lock;
-       u8 notify;
-       u8 triggered;
-       struct ipath_cq_wc *queue;
-       struct ipath_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-       void *vaddr;
-       size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-       struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-       struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-       u64 user_base;          /* User's address for this region */
-       u64 iova;               /* IB start address of this region */
-       size_t length;
-       u32 lkey;
-       u32 offset;             /* offset (bytes) to start of region */
-       int access_flags;
-       u32 max_segs;           /* number of ipath_segs in all the arrays */
-       u32 mapsz;              /* size of the map array */
-       struct ipath_segarray *map[0];  /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-       struct ipath_mregion *mr;
-       void *vaddr;            /* kernel virtual address of segment */
-       u32 sge_length;         /* length of the SGE */
-       u32 length;             /* remaining length of the segment */
-       u16 m;                  /* current index: mr->map[m] */
-       u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct ipath_mr {
-       struct ib_mr ibmr;
-       struct ib_umem *umem;
-       struct ipath_mregion mr;        /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct ipath_swqe {
-       union {
-               struct ib_send_wr wr;   /* don't use wr.sg_list */
-               struct ib_ud_wr ud_wr;
-               struct ib_rdma_wr rdma_wr;
-               struct ib_atomic_wr atomic_wr;
-       };
-
-       u32 psn;                /* first packet sequence number */
-       u32 lpsn;               /* last packet sequence number */
-       u32 ssn;                /* send sequence number */
-       u32 length;             /* total length of data in sg_list */
-       struct ipath_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct ipath_rwqe {
-       u64 wr_id;
-       u8 num_sge;
-       struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct ipath_rwq {
-       u32 head;               /* new work requests posted to the head */
-       u32 tail;               /* receives pull requests from here. */
-       struct ipath_rwqe wq[0];
-};
-
-struct ipath_rq {
-       struct ipath_rwq *wq;
-       spinlock_t lock;
-       u32 size;               /* size of RWQE array */
-       u8 max_sge;
-};
-
-struct ipath_srq {
-       struct ib_srq ibsrq;
-       struct ipath_rq rq;
-       struct ipath_mmap_info *ip;
-       /* send signal when number of RWQEs < limit */
-       u32 limit;
-};
-
-struct ipath_sge_state {
-       struct ipath_sge *sg_list;      /* next SGE to be used if any */
-       struct ipath_sge sge;   /* progress state for the current SGE */
-       u8 num_sge;
-       u8 static_rate;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct ipath_ack_entry {
-       u8 opcode;
-       u8 sent;
-       u32 psn;
-       union {
-               struct ipath_sge_state rdma_sge;
-               u64 atomic_data;
-       };
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct ipath_qp {
-       struct ib_qp ibqp;
-       struct ipath_qp *next;          /* link list for QPN hash table */
-       struct ipath_qp *timer_next;    /* link list for ipath_ib_timer() */
-       struct ipath_qp *pio_next;      /* link for ipath_ib_piobufavail() */
-       struct list_head piowait;       /* link for wait PIO buf */
-       struct list_head timerwait;     /* link for waiting for timeouts */
-       struct ib_ah_attr remote_ah_attr;
-       struct ipath_ib_header s_hdr;   /* next packet header to send */
-       atomic_t refcount;
-       wait_queue_head_t wait;
-       wait_queue_head_t wait_dma;
-       struct tasklet_struct s_task;
-       struct ipath_mmap_info *ip;
-       struct ipath_sge_state *s_cur_sge;
-       struct ipath_verbs_txreq *s_tx;
-       struct ipath_sge_state s_sge;   /* current send request data */
-       struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
-       struct ipath_sge_state s_ack_rdma_sge;
-       struct ipath_sge_state s_rdma_read_sge;
-       struct ipath_sge_state r_sge;   /* current receive data */
-       spinlock_t s_lock;
-       atomic_t s_dma_busy;
-       u16 s_pkt_delay;
-       u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-       u32 s_cur_size;         /* size of send packet in bytes */
-       u32 s_len;              /* total length of s_sge */
-       u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-       u32 s_next_psn;         /* PSN for next request */
-       u32 s_last_psn;         /* last response PSN processed */
-       u32 s_psn;              /* current packet sequence number */
-       u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-       u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-       u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-       u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-       u64 r_wr_id;            /* ID for current receive WQE */
-       unsigned long r_aflags;
-       u32 r_len;              /* total length of r_sge */
-       u32 r_rcv_len;          /* receive data len processed */
-       u32 r_psn;              /* expected rcv packet sequence number */
-       u32 r_msn;              /* message sequence number */
-       u8 state;               /* QP state */
-       u8 s_state;             /* opcode of last packet sent */
-       u8 s_ack_state;         /* opcode of packet to ACK */
-       u8 s_nak_state;         /* non-zero if NAK is pending */
-       u8 r_state;             /* opcode of last packet received */
-       u8 r_nak_state;         /* non-zero if NAK is pending */
-       u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-       u8 r_flags;
-       u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-       u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-       u8 qp_access_flags;
-       u8 s_max_sge;           /* size of s_wq->sg_list */
-       u8 s_retry_cnt;         /* number of times to retry */
-       u8 s_rnr_retry_cnt;
-       u8 s_retry;             /* requester retry counter */
-       u8 s_rnr_retry;         /* requester RNR retry counter */
-       u8 s_pkey_index;        /* PKEY index to use */
-       u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-       u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-       u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-       u8 s_flags;
-       u8 s_dmult;
-       u8 s_draining;
-       u8 timeout;             /* Timeout for this QP */
-       enum ib_mtu path_mtu;
-       u32 remote_qpn;
-       u32 qkey;               /* QKEY for this QP (for UD or RD) */
-       u32 s_size;             /* send work queue size */
-       u32 s_head;             /* new entries added here */
-       u32 s_tail;             /* next entry to process */
-       u32 s_cur;              /* current work queue entry */
-       u32 s_last;             /* last un-ACK'ed entry */
-       u32 s_ssn;              /* SSN of tail entry */
-       u32 s_lsn;              /* limit sequence number (credit) */
-       struct ipath_swqe *s_wq;        /* send work queue */
-       struct ipath_swqe *s_wqe;
-       struct ipath_sge *r_ud_sg_list;
-       struct ipath_rq r_rq;           /* receive work queue */
-       struct ipath_sge r_sg_list[0];  /* verified SGEs */
-};
-
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define IPATH_R_WRID_VALID     0
-
-/*
- * Bit definitions for r_flags.
- */
-#define IPATH_R_REUSE_SGE      0x01
-#define IPATH_R_RDMAR_SEQ      0x02
-
-/*
- * Bit definitions for s_flags.
- *
- * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
- *                        before processing the next SWQE
- * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
- *                        before processing the next SWQE
- * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
- * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                   next send completion entry not via send DMA.
- */
-#define IPATH_S_SIGNAL_REQ_WR  0x01
-#define IPATH_S_FENCE_PENDING  0x02
-#define IPATH_S_RDMAR_PENDING  0x04
-#define IPATH_S_ACK_PENDING    0x08
-#define IPATH_S_BUSY           0x10
-#define IPATH_S_WAITING                0x20
-#define IPATH_S_WAIT_SSN_CREDIT        0x40
-#define IPATH_S_WAIT_DMA       0x80
-
-#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
-       IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
-
-#define IPATH_PSN_CREDIT       512
-
-/*
- * Since struct ipath_swqe is not a fixed size, we can't simply index into
- * struct ipath_qp.s_wq.  This function does the array index computation.
- */
-static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
-                                             unsigned n)
-{
-       return (struct ipath_swqe *)((char *)qp->s_wq +
-                                    (sizeof(struct ipath_swqe) +
-                                     qp->s_max_sge *
-                                     sizeof(struct ipath_sge)) * n);
-}
-
-/*
- * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rwq.wq.  This function does the array index computation.
- */
-static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
-                                             unsigned n)
-{
-       return (struct ipath_rwqe *)
-               ((char *) rq->wq->wq +
-                (sizeof(struct ipath_rwqe) +
-                 rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-       atomic_t n_free;
-       void *page;
-};
-
-struct ipath_qp_table {
-       spinlock_t lock;
-       u32 last;               /* last QP number allocated */
-       u32 max;                /* size of the hash table */
-       u32 nmaps;              /* size of the map table */
-       struct ipath_qp **table;
-       /* bit map of free numbers */
-       struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct ipath_lkey_table {
-       spinlock_t lock;
-       u32 next;               /* next unused index (speeds search) */
-       u32 gen;                /* generation count */
-       u32 max;                /* size of the table */
-       struct ipath_mregion **table;
-};
-
-struct ipath_opcode_stats {
-       u64 n_packets;          /* number of packets */
-       u64 n_bytes;            /* total number of bytes */
-};
-
-struct ipath_ibdev {
-       struct ib_device ibdev;
-       struct ipath_devdata *dd;
-       struct list_head pending_mmaps;
-       spinlock_t mmap_offset_lock;
-       u32 mmap_offset;
-       int ib_unit;            /* This is the device number */
-       u16 sm_lid;             /* in host order */
-       u8 sm_sl;
-       u8 mkeyprot;
-       /* non-zero when timer is set */
-       unsigned long mkey_lease_timeout;
-
-       /* The following fields are really per port. */
-       struct ipath_qp_table qp_table;
-       struct ipath_lkey_table lk_table;
-       struct list_head pending[3];    /* FIFO of QPs waiting for ACKs */
-       struct list_head piowait;       /* list for wait PIO buf */
-       struct list_head txreq_free;
-       void *txreq_bufs;
-       /* list of QPs waiting for RNR timer */
-       struct list_head rnrwait;
-       spinlock_t pending_lock;
-       __be64 sys_image_guid;  /* in network order */
-       __be64 gid_prefix;      /* in network order */
-       __be64 mkey;
-
-       u32 n_pds_allocated;    /* number of PDs allocated for device */
-       spinlock_t n_pds_lock;
-       u32 n_ahs_allocated;    /* number of AHs allocated for device */
-       spinlock_t n_ahs_lock;
-       u32 n_cqs_allocated;    /* number of CQs allocated for device */
-       spinlock_t n_cqs_lock;
-       u32 n_qps_allocated;    /* number of QPs allocated for device */
-       spinlock_t n_qps_lock;
-       u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-       spinlock_t n_srqs_lock;
-       u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-       spinlock_t n_mcast_grps_lock;
-
-       u64 ipath_sword;        /* total dwords sent (sample result) */
-       u64 ipath_rword;        /* total dwords received (sample result) */
-       u64 ipath_spkts;        /* total packets sent (sample result) */
-       u64 ipath_rpkts;        /* total packets received (sample result) */
-       /* # of ticks no data sent (sample result) */
-       u64 ipath_xmit_wait;
-       u64 rcv_errors;         /* # of packets with SW detected rcv errs */
-       u64 n_unicast_xmit;     /* total unicast packets sent */
-       u64 n_unicast_rcv;      /* total unicast packets received */
-       u64 n_multicast_xmit;   /* total multicast packets sent */
-       u64 n_multicast_rcv;    /* total multicast packets received */
-       u64 z_symbol_error_counter;             /* starting count for PMA */
-       u64 z_link_error_recovery_counter;      /* starting count for PMA */
-       u64 z_link_downed_counter;              /* starting count for PMA */
-       u64 z_port_rcv_errors;                  /* starting count for PMA */
-       u64 z_port_rcv_remphys_errors;          /* starting count for PMA */
-       u64 z_port_xmit_discards;               /* starting count for PMA */
-       u64 z_port_xmit_data;                   /* starting count for PMA */
-       u64 z_port_rcv_data;                    /* starting count for PMA */
-       u64 z_port_xmit_packets;                /* starting count for PMA */
-       u64 z_port_rcv_packets;                 /* starting count for PMA */
-       u32 z_pkey_violations;                  /* starting count for PMA */
-       u32 z_local_link_integrity_errors;      /* starting count for PMA */
-       u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
-       u32 z_vl15_dropped;                     /* starting count for PMA */
-       u32 n_rc_resends;
-       u32 n_rc_acks;
-       u32 n_rc_qacks;
-       u32 n_seq_naks;
-       u32 n_rdma_seq;
-       u32 n_rnr_naks;
-       u32 n_other_naks;
-       u32 n_timeouts;
-       u32 n_pkt_drops;
-       u32 n_vl15_dropped;
-       u32 n_wqe_errs;
-       u32 n_rdma_dup_busy;
-       u32 n_piowait;
-       u32 n_unaligned;
-       u32 port_cap_flags;
-       u32 pma_sample_start;
-       u32 pma_sample_interval;
-       __be16 pma_counter_select[5];
-       u16 pma_tag;
-       u16 qkey_violations;
-       u16 mkey_violations;
-       u16 mkey_lease_period;
-       u16 pending_index;      /* which pending queue is active */
-       u8 pma_sample_status;
-       u8 subnet_timeout;
-       u8 vl_high_limit;
-       struct ipath_opcode_stats opstats[128];
-};
-
-struct ipath_verbs_counters {
-       u64 symbol_error_counter;
-       u64 link_error_recovery_counter;
-       u64 link_downed_counter;
-       u64 port_rcv_errors;
-       u64 port_rcv_remphys_errors;
-       u64 port_xmit_discards;
-       u64 port_xmit_data;
-       u64 port_rcv_data;
-       u64 port_xmit_packets;
-       u64 port_rcv_packets;
-       u32 local_link_integrity_errors;
-       u32 excessive_buffer_overrun_errors;
-       u32 vl15_dropped;
-};
-
-struct ipath_verbs_txreq {
-       struct ipath_qp         *qp;
-       struct ipath_swqe       *wqe;
-       u32                      map_len;
-       u32                      len;
-       struct ipath_sge_state  *ss;
-       struct ipath_pio_header  hdr;
-       struct ipath_sdma_txreq  txreq;
-};
-
-static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
-{
-       return container_of(ibmr, struct ipath_mr, ibmr);
-}
-
-static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
-{
-       return container_of(ibpd, struct ipath_pd, ibpd);
-}
-
-static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
-{
-       return container_of(ibah, struct ipath_ah, ibah);
-}
-
-static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
-{
-       return container_of(ibcq, struct ipath_cq, ibcq);
-}
-
-static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
-{
-       return container_of(ibsrq, struct ipath_srq, ibsrq);
-}
-
-static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
-{
-       return container_of(ibqp, struct ipath_qp, ibqp);
-}
-
-static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
-{
-       return container_of(ibdev, struct ipath_ibdev, ibdev);
-}
-
-/*
- * This must be called with s_lock held.
- */
-static inline void ipath_schedule_send(struct ipath_qp *qp)
-{
-       if (qp->s_flags & IPATH_S_ANY_WAIT)
-               qp->s_flags &= ~IPATH_S_ANY_WAIT;
-       if (!(qp->s_flags & IPATH_S_BUSY))
-               tasklet_hi_schedule(&qp->s_task);
-}
-
-int ipath_process_mad(struct ib_device *ibdev,
-                     int mad_flags,
-                     u8 port_num,
-                     const struct ib_wc *in_wc,
-                     const struct ib_grh *in_grh,
-                     const struct ib_mad_hdr *in, size_t in_mad_size,
-                     struct ib_mad_hdr *out, size_t *out_mad_size,
-                     u16 *out_mad_pkey_index);
-
-/*
- * Compare the lower 24 bits of the two values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int ipath_cmp24(u32 a, u32 b)
-{
-       return (((int) a) - ((int) b)) << 8;
-}
-
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                           u64 *rwords, u64 *spkts, u64 *rpkts,
-                           u64 *xmit_wait);
-
-int ipath_get_counters(struct ipath_devdata *dd,
-                      struct ipath_verbs_counters *cntrs);
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_mcast_tree_empty(void);
-
-__be32 ipath_compute_aeth(struct ipath_qp *qp);
-
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
-
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-                             struct ib_qp_init_attr *init_attr,
-                             struct ib_udata *udata);
-
-int ipath_destroy_qp(struct ib_qp *ibqp);
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask, struct ib_udata *udata);
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                  int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
-
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
-
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-                    u32 hdrwords, struct ipath_sge_state *ss, u32 len);
-
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
-
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
-
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
-
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
-                    struct ipath_mregion *mr);
-
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-                 struct ib_sge *sge, int acc);
-
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-                 u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-                          struct ib_recv_wr **bad_wr);
-
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-                               struct ib_srq_init_attr *srq_init_attr,
-                               struct ib_udata *udata);
-
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask,
-                    struct ib_udata *udata);
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int ipath_destroy_srq(struct ib_srq *ibsrq);
-
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-                             const struct ib_cq_init_attr *attr,
-                             struct ib_ucontext *context,
-                             struct ib_udata *udata);
-
-int ipath_destroy_cq(struct ib_cq *ibcq);
-
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
-                               struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, int acc, u64 *iova_start);
-
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                               u64 virt_addr, int mr_access_flags,
-                               struct ib_udata *udata);
-
-int ipath_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-                              struct ib_fmr_attr *fmr_attr);
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-                      int list_len, u64 iova);
-
-int ipath_unmap_fmr(struct list_head *fmr_list);
-
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
-
-void ipath_release_mmap_info(struct kref *ref);
-
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-                                              u32 size,
-                                              struct ib_ucontext *context,
-                                              void *obj);
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-                           struct ipath_mmap_info *ip,
-                           u32 size, void *obj);
-
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-void ipath_insert_rnr_queue(struct ipath_qp *qp);
-
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-                  u32 *lengthp, struct ipath_sge_state *ss);
-
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
-
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-                  struct ib_global_route *grh, u32 hwords, u32 nwords);
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                          struct ipath_other_headers *ohdr,
-                          u32 bth0, u32 bth2);
-
-void ipath_do_send(unsigned long data);
-
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-                        enum ib_wc_status status);
-
-int ipath_make_rc_req(struct ipath_qp *qp);
-
-int ipath_make_uc_req(struct ipath_qp *qp);
-
-int ipath_make_ud_req(struct ipath_qp *qp);
-
-int ipath_register_ib_device(struct ipath_devdata *);
-
-void ipath_unregister_ib_device(struct ipath_ibdev *);
-
-void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
-
-int ipath_ib_piobufavail(struct ipath_ibdev *);
-
-unsigned ipath_get_npkeys(struct ipath_devdata *);
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *);
-
-unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
-
-extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
-
-/*
- * Below converts HCA-specific LinkTrainingState to IB PhysPortState
- * values.
- */
-extern const u8 ipath_cvt_physportstate[];
-#define IB_PHYSPORTSTATE_SLEEP 1
-#define IB_PHYSPORTSTATE_POLL 2
-#define IB_PHYSPORTSTATE_DISABLED 3
-#define IB_PHYSPORTSTATE_CFG_TRAIN 4
-#define IB_PHYSPORTSTATE_LINKUP 5
-#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
-
-extern const int ib_ipath_state_ops[];
-
-extern unsigned int ib_ipath_lkey_table_size;
-
-extern unsigned int ib_ipath_max_cqes;
-
-extern unsigned int ib_ipath_max_cqs;
-
-extern unsigned int ib_ipath_max_qp_wrs;
-
-extern unsigned int ib_ipath_max_qps;
-
-extern unsigned int ib_ipath_max_sges;
-
-extern unsigned int ib_ipath_max_mcast_grps;
-
-extern unsigned int ib_ipath_max_mcast_qp_attached;
-
-extern unsigned int ib_ipath_max_srqs;
-
-extern unsigned int ib_ipath_max_srq_sges;
-
-extern unsigned int ib_ipath_max_srq_wrs;
-
-extern const u32 ib_ipath_rnr_table[];
-
-extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
-
-#endif                         /* IPATH_VERBS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c b/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
deleted file mode 100644 (file)
index 72d476f..0000000
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-#include <linux/slab.h>
-
-#include "ipath_verbs.h"
-
-/*
- * Global table of GID to attached QPs.
- * The table is global to all ipath devices since a send from one QP/device
- * needs to be locally routed to any locally attached QPs on the same
- * or different device.
- */
-static struct rb_root mcast_tree;
-static DEFINE_SPINLOCK(mcast_lock);
-
-/**
- * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
-{
-       struct ipath_mcast_qp *mqp;
-
-       mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
-       if (!mqp)
-               goto bail;
-
-       mqp->qp = qp;
-       atomic_inc(&qp->refcount);
-
-bail:
-       return mqp;
-}
-
-static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
-{
-       struct ipath_qp *qp = mqp->qp;
-
-       /* Notify ipath_destroy_qp() if it is waiting. */
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-
-       kfree(mqp);
-}
-
-/**
- * ipath_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
-{
-       struct ipath_mcast *mcast;
-
-       mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
-       if (!mcast)
-               goto bail;
-
-       mcast->mgid = *mgid;
-       INIT_LIST_HEAD(&mcast->qp_list);
-       init_waitqueue_head(&mcast->wait);
-       atomic_set(&mcast->refcount, 0);
-       mcast->n_attached = 0;
-
-bail:
-       return mcast;
-}
-
-static void ipath_mcast_free(struct ipath_mcast *mcast)
-{
-       struct ipath_mcast_qp *p, *tmp;
-
-       list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-               ipath_mcast_qp_free(p);
-
-       kfree(mcast);
-}
-
-/**
- * ipath_mcast_find - search the global table for the given multicast GID
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
-{
-       struct rb_node *n;
-       unsigned long flags;
-       struct ipath_mcast *mcast;
-
-       spin_lock_irqsave(&mcast_lock, flags);
-       n = mcast_tree.rb_node;
-       while (n) {
-               int ret;
-
-               mcast = rb_entry(n, struct ipath_mcast, rb_node);
-
-               ret = memcmp(mgid->raw, mcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0)
-                       n = n->rb_left;
-               else if (ret > 0)
-                       n = n->rb_right;
-               else {
-                       atomic_inc(&mcast->refcount);
-                       spin_unlock_irqrestore(&mcast_lock, flags);
-                       goto bail;
-               }
-       }
-       spin_unlock_irqrestore(&mcast_lock, flags);
-
-       mcast = NULL;
-
-bail:
-       return mcast;
-}
-
-/**
- * ipath_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int ipath_mcast_add(struct ipath_ibdev *dev,
-                          struct ipath_mcast *mcast,
-                          struct ipath_mcast_qp *mqp)
-{
-       struct rb_node **n = &mcast_tree.rb_node;
-       struct rb_node *pn = NULL;
-       int ret;
-
-       spin_lock_irq(&mcast_lock);
-
-       while (*n) {
-               struct ipath_mcast *tmcast;
-               struct ipath_mcast_qp *p;
-
-               pn = *n;
-               tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
-
-               ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0) {
-                       n = &pn->rb_left;
-                       continue;
-               }
-               if (ret > 0) {
-                       n = &pn->rb_right;
-                       continue;
-               }
-
-               /* Search the QP list to see if this is already there. */
-               list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-                       if (p->qp == mqp->qp) {
-                               ret = ESRCH;
-                               goto bail;
-                       }
-               }
-               if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
-                       ret = ENOMEM;
-                       goto bail;
-               }
-
-               tmcast->n_attached++;
-
-               list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-               ret = EEXIST;
-               goto bail;
-       }
-
-       spin_lock(&dev->n_mcast_grps_lock);
-       if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
-               spin_unlock(&dev->n_mcast_grps_lock);
-               ret = ENOMEM;
-               goto bail;
-       }
-
-       dev->n_mcast_grps_allocated++;
-       spin_unlock(&dev->n_mcast_grps_lock);
-
-       mcast->n_attached++;
-
-       list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-       atomic_inc(&mcast->refcount);
-       rb_link_node(&mcast->rb_node, pn, n);
-       rb_insert_color(&mcast->rb_node, &mcast_tree);
-
-       ret = 0;
-
-bail:
-       spin_unlock_irq(&mcast_lock);
-
-       return ret;
-}
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_mcast *mcast;
-       struct ipath_mcast_qp *mqp;
-       int ret;
-
-       /*
-        * Allocate data structures since its better to do this outside of
-        * spin locks and it will most likely be needed.
-        */
-       mcast = ipath_mcast_alloc(gid);
-       if (mcast == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       mqp = ipath_mcast_qp_alloc(qp);
-       if (mqp == NULL) {
-               ipath_mcast_free(mcast);
-               ret = -ENOMEM;
-               goto bail;
-       }
-       switch (ipath_mcast_add(dev, mcast, mqp)) {
-       case ESRCH:
-               /* Neither was used: can't attach the same QP twice. */
-               ipath_mcast_qp_free(mqp);
-               ipath_mcast_free(mcast);
-               ret = -EINVAL;
-               goto bail;
-       case EEXIST:            /* The mcast wasn't used */
-               ipath_mcast_free(mcast);
-               break;
-       case ENOMEM:
-               /* Exceeded the maximum number of mcast groups. */
-               ipath_mcast_qp_free(mqp);
-               ipath_mcast_free(mcast);
-               ret = -ENOMEM;
-               goto bail;
-       default:
-               break;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_mcast *mcast = NULL;
-       struct ipath_mcast_qp *p, *tmp;
-       struct rb_node *n;
-       int last = 0;
-       int ret;
-
-       spin_lock_irq(&mcast_lock);
-
-       /* Find the GID in the mcast table. */
-       n = mcast_tree.rb_node;
-       while (1) {
-               if (n == NULL) {
-                       spin_unlock_irq(&mcast_lock);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               mcast = rb_entry(n, struct ipath_mcast, rb_node);
-               ret = memcmp(gid->raw, mcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0)
-                       n = n->rb_left;
-               else if (ret > 0)
-                       n = n->rb_right;
-               else
-                       break;
-       }
-
-       /* Search the QP list. */
-       list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-               if (p->qp != qp)
-                       continue;
-               /*
-                * We found it, so remove it, but don't poison the forward
-                * link until we are sure there are no list walkers.
-                */
-               list_del_rcu(&p->list);
-               mcast->n_attached--;
-
-               /* If this was the last attached QP, remove the GID too. */
-               if (list_empty(&mcast->qp_list)) {
-                       rb_erase(&mcast->rb_node, &mcast_tree);
-                       last = 1;
-               }
-               break;
-       }
-
-       spin_unlock_irq(&mcast_lock);
-
-       if (p) {
-               /*
-                * Wait for any list walkers to finish before freeing the
-                * list element.
-                */
-               wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-               ipath_mcast_qp_free(p);
-       }
-       if (last) {
-               atomic_dec(&mcast->refcount);
-               wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-               ipath_mcast_free(mcast);
-               spin_lock_irq(&dev->n_mcast_grps_lock);
-               dev->n_mcast_grps_allocated--;
-               spin_unlock_irq(&dev->n_mcast_grps_lock);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_mcast_tree_empty(void)
-{
-       return mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c b/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
deleted file mode 100644 (file)
index 1a7e20a..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on PowerPC only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * Nothing to do on PowerPC, so just return without error.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c b/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
deleted file mode 100644 (file)
index 7b6e4c8..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on x86_64 only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include <linux/pci.h>
-#include <asm/processor.h>
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
- * write combining.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-       int ret = 0;
-       u64 pioaddr, piolen;
-       unsigned bits;
-       const unsigned long addr = pci_resource_start(dd->pcidev, 0);
-       const size_t len = pci_resource_len(dd->pcidev, 0);
-
-       /*
-        * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
-        * chip.  Linux (possibly the hardware) requires it to be on a power
-        * of 2 address matching the length (which has to be a power of 2).
-        * For rev1, that means the base address, for rev2, it will be just
-        * the PIO buffers themselves.
-        * For chips with two sets of buffers, the calculations are
-        * somewhat more complicated; we need to sum, and the piobufbase
-        * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
-        * The buffers are still packed, so a single range covers both.
-        */
-       if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
-               unsigned long pio2kbase, pio4kbase;
-               pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
-               pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
-               if (pio2kbase < pio4kbase) { /* all, for now */
-                       pioaddr = addr + pio2kbase;
-                       piolen = pio4kbase - pio2kbase +
-                               dd->ipath_piobcnt4k * dd->ipath_4kalign;
-               } else {
-                       pioaddr = addr + pio4kbase;
-                       piolen = pio2kbase - pio4kbase +
-                               dd->ipath_piobcnt2k * dd->ipath_palign;
-               }
-       } else {  /* single buffer size (2K, currently) */
-               pioaddr = addr + dd->ipath_piobufbase;
-               piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
-                       dd->ipath_piobcnt4k * dd->ipath_4kalign;
-       }
-
-       for (bits = 0; !(piolen & (1ULL << bits)); bits++)
-               /* do nothing */ ;
-
-       if (piolen != (1ULL << bits)) {
-               piolen >>= bits;
-               while (piolen >>= 1)
-                       bits++;
-               piolen = 1ULL << (bits + 1);
-       }
-       if (pioaddr & (piolen - 1)) {
-               u64 atmp;
-               ipath_dbg("pioaddr %llx not on right boundary for size "
-                         "%llx, fixing\n",
-                         (unsigned long long) pioaddr,
-                         (unsigned long long) piolen);
-               atmp = pioaddr & ~(piolen - 1);
-               if (atmp < addr || (atmp + piolen) > (addr + len)) {
-                       ipath_dev_err(dd, "No way to align address/size "
-                                     "(%llx/%llx), no WC mtrr\n",
-                                     (unsigned long long) atmp,
-                                     (unsigned long long) piolen << 1);
-                       ret = -ENODEV;
-               } else {
-                       ipath_dbg("changing WC base from %llx to %llx, "
-                                 "len from %llx to %llx\n",
-                                 (unsigned long long) pioaddr,
-                                 (unsigned long long) atmp,
-                                 (unsigned long long) piolen,
-                                 (unsigned long long) piolen << 1);
-                       pioaddr = atmp;
-                       piolen <<= 1;
-               }
-       }
-
-       if (!ret) {
-               dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
-               if (dd->wc_cookie < 0) {
-                       ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
-                       ret = -ENODEV;
-               } else if (dd->wc_cookie == 0)
-                       ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
-               else
-                       ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
-       }
-
-       return ret;
-}
-
-/**
- * ipath_disable_wc - disable write combining for MMIO writes to the device
- * @dd: infinipath device
- */
-void ipath_disable_wc(struct ipath_devdata *dd)
-{
-       arch_phys_wc_del(dd->wc_cookie);
-}
index efd6f4560d3e55ceeab571fdc1d0ebc242a38df2..7e8037e230b8e024ae2bb9d8797bba1d9eab7141 100644 (file)
@@ -1,7 +1,7 @@
 menu "Speakup console speech"
 
 config SPEAKUP
-       depends on VT
+       depends on VT && !MN10300
        tristate "Speakup core"
        ---help---
                This is the Speakup screen reader.  Think of it as a
index 63c59bc89b040fe68fcf01d364d5b636e8ac6707..30cf973f326dc467fba09f1a7c85a05acf0f9101 100644 (file)
@@ -264,8 +264,9 @@ static struct notifier_block vt_notifier_block = {
        .notifier_call = vt_notifier_call,
 };
 
-static unsigned char get_attributes(u16 *pos)
+static unsigned char get_attributes(struct vc_data *vc, u16 *pos)
 {
+       pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
        return (u_char) (scr_readw(pos) >> 8);
 }
 
@@ -275,7 +276,7 @@ static void speakup_date(struct vc_data *vc)
        spk_y = spk_cy = vc->vc_y;
        spk_pos = spk_cp = vc->vc_pos;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
 }
 
 static void bleep(u_short val)
@@ -469,8 +470,12 @@ static u16 get_char(struct vc_data *vc, u16 *pos, u_char *attribs)
        u16 ch = ' ';
 
        if (vc && pos) {
-               u16 w = scr_readw(pos);
-               u16 c = w & 0xff;
+               u16 w;
+               u16 c;
+
+               pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
+               w = scr_readw(pos);
+               c = w & 0xff;
 
                if (w & vc->vc_hi_font_mask)
                        c |= 0x100;
@@ -746,7 +751,7 @@ static int get_line(struct vc_data *vc)
        u_char tmp2;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
        for (i = 0; i < vc->vc_cols; i++) {
                buf[i] = (u_char) get_char(vc, (u_short *) tmp, &tmp2);
                tmp += 2;
@@ -811,7 +816,7 @@ static int say_from_to(struct vc_data *vc, u_long from, u_long to,
        u_short saved_punc_mask = spk_punc_mask;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) from);
+       spk_attr = get_attributes(vc, (u_short *)from);
        while (from < to) {
                buf[i++] = (char)get_char(vc, (u_short *) from, &tmp);
                from += 2;
@@ -886,7 +891,7 @@ static int get_sentence_buf(struct vc_data *vc, int read_punc)
        sentmarks[bn][0] = &sentbuf[bn][0];
        i = 0;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) start);
+       spk_attr = get_attributes(vc, (u_short *)start);
 
        while (start < end) {
                sentbuf[bn][i] = (char)get_char(vc, (u_short *) start, &tmp);
@@ -1585,7 +1590,7 @@ static int count_highlight_color(struct vc_data *vc)
                u16 *ptr;
 
                for (ptr = start; ptr < end; ptr++) {
-                       ch = get_attributes(ptr);
+                       ch = get_attributes(vc, ptr);
                        bg = (ch & 0x70) >> 4;
                        speakup_console[vc_num]->ht.bgcount[bg]++;
                }
index aa5ab6c80ed4ffefc12aaca9c1337e403371d478..41ef099b7aa69f345d33998c46e2bce11ad691d9 100644 (file)
@@ -142,7 +142,9 @@ static void __speakup_paste_selection(struct work_struct *work)
        struct tty_ldisc *ld;
        DECLARE_WAITQUEUE(wait, current);
 
-       ld = tty_ldisc_ref_wait(tty);
+       ld = tty_ldisc_ref(tty);
+       if (!ld)
+               goto tty_unref;
        tty_buffer_lock_exclusive(&vc->port);
 
        add_wait_queue(&vc->paste_wait, &wait);
@@ -162,6 +164,7 @@ static void __speakup_paste_selection(struct work_struct *work)
 
        tty_buffer_unlock_exclusive(&vc->port);
        tty_ldisc_deref(ld);
+tty_unref:
        tty_kref_put(tty);
 }
 
index 3b5835b2812849f0c32055f065ef0be6a0c4d0d0..a5bbb338f275495c18bf6051adf60ee8a3bbcc1d 100644 (file)
@@ -6,6 +6,11 @@
 #include "spk_priv.h"
 #include "serialio.h"
 
+#include <linux/serial_core.h>
+/* WARNING:  Do not change this to <linux/serial.h> without testing that
+ * SERIAL_PORT_DFNS does get defined to the appropriate value. */
+#include <asm/serial.h>
+
 #ifndef SERIAL_PORT_DFNS
 #define SERIAL_PORT_DFNS
 #endif
@@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index)
        int baud = 9600, quot = 0;
        unsigned int cval = 0;
        int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8;
-       const struct old_serial_port *ser = rs_table + index;
+       const struct old_serial_port *ser;
        int err;
 
+       if (index >= ARRAY_SIZE(rs_table)) {
+               pr_info("no port info for ttyS%d\n", index);
+               return NULL;
+       }
+       ser = rs_table + index;
+
        /*      Divisor, bytesize and parity */
        quot = ser->baud_base / baud;
        cval = cflag & (CSIZE | CSTOPB);
index 3327c49674d37d1a362f706eb30d55d7128d60a5..713c63d9681b3cf4e413cb50f93cd466adb8520c 100644 (file)
@@ -898,7 +898,7 @@ static ssize_t unmap_zeroes_data_store(struct config_item *item,
        da->unmap_zeroes_data = flag;
        pr_debug("dev[%p]: SE Device Thin Provisioning LBPRZ bit: %d\n",
                 da->da_dev, flag);
-       return 0;
+       return count;
 }
 
 /*
index cacd97a8cbd02d32509b0a590ff2c40b59be36b1..da457e25717a6099842e883f4291515dcacf344a 100644 (file)
@@ -828,6 +828,50 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
        return dev;
 }
 
+/*
+ * Check if the underlying struct block_device request_queue supports
+ * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
+ * in ATA and we need to set TPE=1
+ */
+bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+                                      struct request_queue *q, int block_size)
+{
+       if (!blk_queue_discard(q))
+               return false;
+
+       attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) /
+                                                               block_size;
+       /*
+        * Currently hardcoded to 1 in Linux/SCSI code..
+        */
+       attrib->max_unmap_block_desc_count = 1;
+       attrib->unmap_granularity = q->limits.discard_granularity / block_size;
+       attrib->unmap_granularity_alignment = q->limits.discard_alignment /
+                                                               block_size;
+       attrib->unmap_zeroes_data = q->limits.discard_zeroes_data;
+       return true;
+}
+EXPORT_SYMBOL(target_configure_unmap_from_queue);
+
+/*
+ * Convert from blocksize advertised to the initiator to the 512 byte
+ * units unconditionally used by the Linux block layer.
+ */
+sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
+{
+       switch (dev->dev_attrib.block_size) {
+       case 4096:
+               return lb << 3;
+       case 2048:
+               return lb << 2;
+       case 1024:
+               return lb << 1;
+       default:
+               return lb;
+       }
+}
+EXPORT_SYMBOL(target_to_linux_sector);
+
 int target_configure_device(struct se_device *dev)
 {
        struct se_hba *hba = dev->se_hba;
index e3195700211a3ebc38192391bf6488ed6ae86099..75f0f08b2a34f32d0b2bfdd35fa40f027e74ae75 100644 (file)
@@ -160,25 +160,11 @@ static int fd_configure_device(struct se_device *dev)
                        " block_device blocks: %llu logical_block_size: %d\n",
                        dev_size, div_u64(dev_size, fd_dev->fd_block_size),
                        fd_dev->fd_block_size);
-               /*
-                * Check if the underlying struct block_device request_queue supports
-                * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
-                * in ATA and we need to set TPE=1
-                */
-               if (blk_queue_discard(q)) {
-                       dev->dev_attrib.max_unmap_lba_count =
-                               q->limits.max_discard_sectors;
-                       /*
-                        * Currently hardcoded to 1 in Linux/SCSI code..
-                        */
-                       dev->dev_attrib.max_unmap_block_desc_count = 1;
-                       dev->dev_attrib.unmap_granularity =
-                               q->limits.discard_granularity >> 9;
-                       dev->dev_attrib.unmap_granularity_alignment =
-                               q->limits.discard_alignment;
+
+               if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
+                                                     fd_dev->fd_block_size))
                        pr_debug("IFILE: BLOCK Discard support available,"
-                                       " disabled by default\n");
-               }
+                                " disabled by default\n");
                /*
                 * Enable write same emulation for IBLOCK and use 0xFFFF as
                 * the smaller WRITE_SAME(10) only has a two-byte block count.
@@ -490,9 +476,12 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
        if (S_ISBLK(inode->i_mode)) {
                /* The backend is block device, use discard */
                struct block_device *bdev = inode->i_bdev;
+               struct se_device *dev = cmd->se_dev;
 
-               ret = blkdev_issue_discard(bdev, lba,
-                               nolb, GFP_KERNEL, 0);
+               ret = blkdev_issue_discard(bdev,
+                                          target_to_linux_sector(dev, lba),
+                                          target_to_linux_sector(dev,  nolb),
+                                          GFP_KERNEL, 0);
                if (ret < 0) {
                        pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n",
                                ret);
index e77d15000caa313a8c3c8bb4a403ba8f1aa9a6c6..abe4eb997a842240f8b76875cc7cfbb7f55dbe0f 100644 (file)
@@ -121,29 +121,11 @@ static int iblock_configure_device(struct se_device *dev)
        dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
        dev->dev_attrib.hw_queue_depth = q->nr_requests;
 
-       /*
-        * Check if the underlying struct block_device request_queue supports
-        * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
-        * in ATA and we need to set TPE=1
-        */
-       if (blk_queue_discard(q)) {
-               dev->dev_attrib.max_unmap_lba_count =
-                               q->limits.max_discard_sectors;
-
-               /*
-                * Currently hardcoded to 1 in Linux/SCSI code..
-                */
-               dev->dev_attrib.max_unmap_block_desc_count = 1;
-               dev->dev_attrib.unmap_granularity =
-                               q->limits.discard_granularity >> 9;
-               dev->dev_attrib.unmap_granularity_alignment =
-                               q->limits.discard_alignment;
-               dev->dev_attrib.unmap_zeroes_data =
-                               q->limits.discard_zeroes_data;
-
+       if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
+                                             dev->dev_attrib.hw_block_size))
                pr_debug("IBLOCK: BLOCK Discard support available,"
-                               " disabled by default\n");
-       }
+                        " disabled by default\n");
+
        /*
         * Enable write same emulation for IBLOCK and use 0xFFFF as
         * the smaller WRITE_SAME(10) only has a two-byte block count.
@@ -415,9 +397,13 @@ static sense_reason_t
 iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
 {
        struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd;
+       struct se_device *dev = cmd->se_dev;
        int ret;
 
-       ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0);
+       ret = blkdev_issue_discard(bdev,
+                                  target_to_linux_sector(dev, lba),
+                                  target_to_linux_sector(dev,  nolb),
+                                  GFP_KERNEL, 0);
        if (ret < 0) {
                pr_err("blkdev_issue_discard() failed: %d\n", ret);
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -433,8 +419,10 @@ iblock_execute_write_same(struct se_cmd *cmd)
        struct scatterlist *sg;
        struct bio *bio;
        struct bio_list list;
-       sector_t block_lba = cmd->t_task_lba;
-       sector_t sectors = sbc_get_write_same_sectors(cmd);
+       struct se_device *dev = cmd->se_dev;
+       sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
+       sector_t sectors = target_to_linux_sector(dev,
+                                       sbc_get_write_same_sectors(cmd));
 
        if (cmd->prot_op) {
                pr_err("WRITE_SAME: Protection information with IBLOCK"
@@ -615,9 +603,9 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
        }
 
        bip = bio_integrity_alloc(bio, GFP_NOIO, cmd->t_prot_nents);
-       if (!bip) {
+       if (IS_ERR(bip)) {
                pr_err("Unable to allocate bio_integrity_payload\n");
-               return -ENOMEM;
+               return PTR_ERR(bip);
        }
 
        bip->bip_iter.bi_size = (cmd->data_length / dev->dev_attrib.block_size) *
@@ -648,12 +636,12 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                  enum dma_data_direction data_direction)
 {
        struct se_device *dev = cmd->se_dev;
+       sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
        struct iblock_req *ibr;
        struct bio *bio, *bio_start;
        struct bio_list list;
        struct scatterlist *sg;
        u32 sg_num = sgl_nents;
-       sector_t block_lba;
        unsigned bio_cnt;
        int rw = 0;
        int i;
@@ -679,24 +667,6 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                rw = READ;
        }
 
-       /*
-        * Convert the blocksize advertised to the initiator to the 512 byte
-        * units unconditionally used by the Linux block layer.
-        */
-       if (dev->dev_attrib.block_size == 4096)
-               block_lba = (cmd->t_task_lba << 3);
-       else if (dev->dev_attrib.block_size == 2048)
-               block_lba = (cmd->t_task_lba << 2);
-       else if (dev->dev_attrib.block_size == 1024)
-               block_lba = (cmd->t_task_lba << 1);
-       else if (dev->dev_attrib.block_size == 512)
-               block_lba = cmd->t_task_lba;
-       else {
-               pr_err("Unsupported SCSI -> BLOCK LBA conversion:"
-                               " %u\n", dev->dev_attrib.block_size);
-               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-       }
-
        ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
        if (!ibr)
                goto fail;
index dae0750c2032bd87430ab99609d475a615e691db..db4412fe6b8a3f9ba73d46fadb11dcf1bf40980b 100644 (file)
@@ -141,7 +141,6 @@ void        transport_dump_vpd_proto_id(struct t10_vpd *, unsigned char *, int);
 int    transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int);
 int    transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int);
 int    transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int);
-bool   target_stop_cmd(struct se_cmd *cmd, unsigned long *flags);
 void   transport_clear_lun_ref(struct se_lun *);
 void   transport_send_task_abort(struct se_cmd *);
 sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size);
index fcdcb117c60da36e546370ecfd7e973f17b5d4ec..82a663ba98009fb5f286a88651c9b3a54cf68bbb 100644 (file)
@@ -68,23 +68,25 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
 
        if (dev) {
                spin_lock_irqsave(&dev->se_tmr_lock, flags);
-               list_del(&tmr->tmr_list);
+               list_del_init(&tmr->tmr_list);
                spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
        }
 
        kfree(tmr);
 }
 
-static void core_tmr_handle_tas_abort(
-       struct se_node_acl *tmr_nacl,
-       struct se_cmd *cmd,
-       int tas)
+static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 {
-       bool remove = true;
+       unsigned long flags;
+       bool remove = true, send_tas;
        /*
         * TASK ABORTED status (TAS) bit support
         */
-       if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) {
+       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       send_tas = (cmd->transport_state & CMD_T_TAS);
+       spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
+       if (send_tas) {
                remove = false;
                transport_send_task_abort(cmd);
        }
@@ -107,6 +109,46 @@ static int target_check_cdb_and_preempt(struct list_head *list,
        return 1;
 }
 
+static bool __target_check_io_state(struct se_cmd *se_cmd,
+                                   struct se_session *tmr_sess, int tas)
+{
+       struct se_session *sess = se_cmd->se_sess;
+
+       assert_spin_locked(&sess->sess_cmd_lock);
+       WARN_ON_ONCE(!irqs_disabled());
+       /*
+        * If command already reached CMD_T_COMPLETE state within
+        * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown,
+        * this se_cmd has been passed to fabric driver and will
+        * not be aborted.
+        *
+        * Otherwise, obtain a local se_cmd->cmd_kref now for TMR
+        * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as
+        * long as se_cmd->cmd_kref is still active unless zero.
+        */
+       spin_lock(&se_cmd->t_state_lock);
+       if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) {
+               pr_debug("Attempted to abort io tag: %llu already complete or"
+                       " fabric stop, skipping\n", se_cmd->tag);
+               spin_unlock(&se_cmd->t_state_lock);
+               return false;
+       }
+       if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
+               pr_debug("Attempted to abort io tag: %llu already shutdown,"
+                       " skipping\n", se_cmd->tag);
+               spin_unlock(&se_cmd->t_state_lock);
+               return false;
+       }
+       se_cmd->transport_state |= CMD_T_ABORTED;
+
+       if ((tmr_sess != se_cmd->se_sess) && tas)
+               se_cmd->transport_state |= CMD_T_TAS;
+
+       spin_unlock(&se_cmd->t_state_lock);
+
+       return kref_get_unless_zero(&se_cmd->cmd_kref);
+}
+
 void core_tmr_abort_task(
        struct se_device *dev,
        struct se_tmr_req *tmr,
@@ -130,34 +172,22 @@ void core_tmr_abort_task(
                if (tmr->ref_task_tag != ref_tag)
                        continue;
 
-               if (!kref_get_unless_zero(&se_cmd->cmd_kref))
-                       continue;
-
                printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
                        se_cmd->se_tfo->get_fabric_name(), ref_tag);
 
-               spin_lock(&se_cmd->t_state_lock);
-               if (se_cmd->transport_state & CMD_T_COMPLETE) {
-                       printk("ABORT_TASK: ref_tag: %llu already complete,"
-                              " skipping\n", ref_tag);
-                       spin_unlock(&se_cmd->t_state_lock);
+               if (!__target_check_io_state(se_cmd, se_sess, 0)) {
                        spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-
                        target_put_sess_cmd(se_cmd);
-
                        goto out;
                }
-               se_cmd->transport_state |= CMD_T_ABORTED;
-               spin_unlock(&se_cmd->t_state_lock);
-
                list_del_init(&se_cmd->se_cmd_list);
                spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
                cancel_work_sync(&se_cmd->work);
                transport_wait_for_tasks(se_cmd);
 
-               target_put_sess_cmd(se_cmd);
                transport_cmd_finish_abort(se_cmd, true);
+               target_put_sess_cmd(se_cmd);
 
                printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
                                " ref_tag: %llu\n", ref_tag);
@@ -178,9 +208,11 @@ static void core_tmr_drain_tmr_list(
        struct list_head *preempt_and_abort_list)
 {
        LIST_HEAD(drain_tmr_list);
+       struct se_session *sess;
        struct se_tmr_req *tmr_p, *tmr_pp;
        struct se_cmd *cmd;
        unsigned long flags;
+       bool rc;
        /*
         * Release all pending and outgoing TMRs aside from the received
         * LUN_RESET tmr..
@@ -206,17 +238,39 @@ static void core_tmr_drain_tmr_list(
                if (target_check_cdb_and_preempt(preempt_and_abort_list, cmd))
                        continue;
 
+               sess = cmd->se_sess;
+               if (WARN_ON_ONCE(!sess))
+                       continue;
+
+               spin_lock(&sess->sess_cmd_lock);
                spin_lock(&cmd->t_state_lock);
-               if (!(cmd->transport_state & CMD_T_ACTIVE)) {
+               if (!(cmd->transport_state & CMD_T_ACTIVE) ||
+                    (cmd->transport_state & CMD_T_FABRIC_STOP)) {
                        spin_unlock(&cmd->t_state_lock);
+                       spin_unlock(&sess->sess_cmd_lock);
                        continue;
                }
                if (cmd->t_state == TRANSPORT_ISTATE_PROCESSING) {
                        spin_unlock(&cmd->t_state_lock);
+                       spin_unlock(&sess->sess_cmd_lock);
                        continue;
                }
+               if (sess->sess_tearing_down || cmd->cmd_wait_set) {
+                       spin_unlock(&cmd->t_state_lock);
+                       spin_unlock(&sess->sess_cmd_lock);
+                       continue;
+               }
+               cmd->transport_state |= CMD_T_ABORTED;
                spin_unlock(&cmd->t_state_lock);
 
+               rc = kref_get_unless_zero(&cmd->cmd_kref);
+               if (!rc) {
+                       printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n");
+                       spin_unlock(&sess->sess_cmd_lock);
+                       continue;
+               }
+               spin_unlock(&sess->sess_cmd_lock);
+
                list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);
        }
        spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
@@ -230,20 +284,26 @@ static void core_tmr_drain_tmr_list(
                        (preempt_and_abort_list) ? "Preempt" : "", tmr_p,
                        tmr_p->function, tmr_p->response, cmd->t_state);
 
+               cancel_work_sync(&cmd->work);
+               transport_wait_for_tasks(cmd);
+
                transport_cmd_finish_abort(cmd, 1);
+               target_put_sess_cmd(cmd);
        }
 }
 
 static void core_tmr_drain_state_list(
        struct se_device *dev,
        struct se_cmd *prout_cmd,
-       struct se_node_acl *tmr_nacl,
+       struct se_session *tmr_sess,
        int tas,
        struct list_head *preempt_and_abort_list)
 {
        LIST_HEAD(drain_task_list);
+       struct se_session *sess;
        struct se_cmd *cmd, *next;
        unsigned long flags;
+       int rc;
 
        /*
         * Complete outstanding commands with TASK_ABORTED SAM status.
@@ -282,6 +342,16 @@ static void core_tmr_drain_state_list(
                if (prout_cmd == cmd)
                        continue;
 
+               sess = cmd->se_sess;
+               if (WARN_ON_ONCE(!sess))
+                       continue;
+
+               spin_lock(&sess->sess_cmd_lock);
+               rc = __target_check_io_state(cmd, tmr_sess, tas);
+               spin_unlock(&sess->sess_cmd_lock);
+               if (!rc)
+                       continue;
+
                list_move_tail(&cmd->state_list, &drain_task_list);
                cmd->state_active = false;
        }
@@ -289,7 +359,7 @@ static void core_tmr_drain_state_list(
 
        while (!list_empty(&drain_task_list)) {
                cmd = list_entry(drain_task_list.next, struct se_cmd, state_list);
-               list_del(&cmd->state_list);
+               list_del_init(&cmd->state_list);
 
                pr_debug("LUN_RESET: %s cmd: %p"
                        " ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d"
@@ -313,16 +383,11 @@ static void core_tmr_drain_state_list(
                 * loop above, but we do it down here given that
                 * cancel_work_sync may block.
                 */
-               if (cmd->t_state == TRANSPORT_COMPLETE)
-                       cancel_work_sync(&cmd->work);
-
-               spin_lock_irqsave(&cmd->t_state_lock, flags);
-               target_stop_cmd(cmd, &flags);
-
-               cmd->transport_state |= CMD_T_ABORTED;
-               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+               cancel_work_sync(&cmd->work);
+               transport_wait_for_tasks(cmd);
 
-               core_tmr_handle_tas_abort(tmr_nacl, cmd, tas);
+               core_tmr_handle_tas_abort(cmd, tas);
+               target_put_sess_cmd(cmd);
        }
 }
 
@@ -334,6 +399,7 @@ int core_tmr_lun_reset(
 {
        struct se_node_acl *tmr_nacl = NULL;
        struct se_portal_group *tmr_tpg = NULL;
+       struct se_session *tmr_sess = NULL;
        int tas;
         /*
         * TASK_ABORTED status bit, this is configurable via ConfigFS
@@ -352,8 +418,9 @@ int core_tmr_lun_reset(
         * or struct se_device passthrough..
         */
        if (tmr && tmr->task_cmd && tmr->task_cmd->se_sess) {
-               tmr_nacl = tmr->task_cmd->se_sess->se_node_acl;
-               tmr_tpg = tmr->task_cmd->se_sess->se_tpg;
+               tmr_sess = tmr->task_cmd->se_sess;
+               tmr_nacl = tmr_sess->se_node_acl;
+               tmr_tpg = tmr_sess->se_tpg;
                if (tmr_nacl && tmr_tpg) {
                        pr_debug("LUN_RESET: TMR caller fabric: %s"
                                " initiator port %s\n",
@@ -366,7 +433,7 @@ int core_tmr_lun_reset(
                dev->transport->name, tas);
 
        core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list);
-       core_tmr_drain_state_list(dev, prout_cmd, tmr_nacl, tas,
+       core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas,
                                preempt_and_abort_list);
 
        /*
index 9f3608e10f25a0ea2c1cf71b419596d558e7f31b..867bc6d0a68a3ecfbd1aab610f01ff73614b3305 100644 (file)
@@ -534,9 +534,6 @@ void transport_deregister_session(struct se_session *se_sess)
 }
 EXPORT_SYMBOL(transport_deregister_session);
 
-/*
- * Called with cmd->t_state_lock held.
- */
 static void target_remove_from_state_list(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
@@ -561,10 +558,6 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists,
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&cmd->t_state_lock, flags);
-       if (write_pending)
-               cmd->t_state = TRANSPORT_WRITE_PENDING;
-
        if (remove_from_lists) {
                target_remove_from_state_list(cmd);
 
@@ -574,6 +567,10 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists,
                cmd->se_lun = NULL;
        }
 
+       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       if (write_pending)
+               cmd->t_state = TRANSPORT_WRITE_PENDING;
+
        /*
         * Determine if frontend context caller is requesting the stopping of
         * this command for frontend exceptions.
@@ -627,6 +624,8 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
 
 void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 {
+       bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+
        if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
                transport_lun_remove_cmd(cmd);
        /*
@@ -638,7 +637,7 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 
        if (transport_cmd_check_stop_to_fabric(cmd))
                return;
-       if (remove)
+       if (remove && ack_kref)
                transport_put_cmd(cmd);
 }
 
@@ -693,20 +692,11 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
                        success = 1;
        }
 
-       /*
-        * See if we are waiting to complete for an exception condition.
-        */
-       if (cmd->transport_state & CMD_T_REQUEST_STOP) {
-               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-               complete(&cmd->task_stop_comp);
-               return;
-       }
-
        /*
         * Check for case where an explicit ABORT_TASK has been received
         * and transport_wait_for_tasks() will be waiting for completion..
         */
-       if (cmd->transport_state & CMD_T_ABORTED &&
+       if (cmd->transport_state & CMD_T_ABORTED ||
            cmd->transport_state & CMD_T_STOP) {
                spin_unlock_irqrestore(&cmd->t_state_lock, flags);
                complete_all(&cmd->t_transport_stop_comp);
@@ -721,10 +711,10 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
        cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
        spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-       if (cmd->cpuid == -1)
-               queue_work(target_completion_wq, &cmd->work);
-       else
+       if (cmd->se_cmd_flags & SCF_USE_CPUID)
                queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work);
+       else
+               queue_work(target_completion_wq, &cmd->work);
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
@@ -1203,7 +1193,6 @@ void transport_init_se_cmd(
        INIT_LIST_HEAD(&cmd->state_list);
        init_completion(&cmd->t_transport_stop_comp);
        init_completion(&cmd->cmd_wait_comp);
-       init_completion(&cmd->task_stop_comp);
        spin_lock_init(&cmd->t_state_lock);
        kref_init(&cmd->cmd_kref);
        cmd->transport_state = CMD_T_DEV_ACTIVE;
@@ -1437,6 +1426,12 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess
         */
        transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess,
                                data_length, data_dir, task_attr, sense);
+
+       if (flags & TARGET_SCF_USE_CPUID)
+               se_cmd->se_cmd_flags |= SCF_USE_CPUID;
+       else
+               se_cmd->cpuid = WORK_CPU_UNBOUND;
+
        if (flags & TARGET_SCF_UNKNOWN_SIZE)
                se_cmd->unknown_data_length = 1;
        /*
@@ -1634,33 +1629,6 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
 }
 EXPORT_SYMBOL(target_submit_tmr);
 
-/*
- * If the cmd is active, request it to be stopped and sleep until it
- * has completed.
- */
-bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags)
-       __releases(&cmd->t_state_lock)
-       __acquires(&cmd->t_state_lock)
-{
-       bool was_active = false;
-
-       if (cmd->transport_state & CMD_T_BUSY) {
-               cmd->transport_state |= CMD_T_REQUEST_STOP;
-               spin_unlock_irqrestore(&cmd->t_state_lock, *flags);
-
-               pr_debug("cmd %p waiting to complete\n", cmd);
-               wait_for_completion(&cmd->task_stop_comp);
-               pr_debug("cmd %p stopped successfully\n", cmd);
-
-               spin_lock_irqsave(&cmd->t_state_lock, *flags);
-               cmd->transport_state &= ~CMD_T_REQUEST_STOP;
-               cmd->transport_state &= ~CMD_T_BUSY;
-               was_active = true;
-       }
-
-       return was_active;
-}
-
 /*
  * Handle SAM-esque emulation for generic transport request failures.
  */
@@ -1859,19 +1827,21 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
        return true;
 }
 
+static int __transport_check_aborted_status(struct se_cmd *, int);
+
 void target_execute_cmd(struct se_cmd *cmd)
 {
-       /*
-        * If the received CDB has aleady been aborted stop processing it here.
-        */
-       if (transport_check_aborted_status(cmd, 1))
-               return;
-
        /*
         * Determine if frontend context caller is requesting the stopping of
         * this command for frontend exceptions.
+        *
+        * If the received CDB has aleady been aborted stop processing it here.
         */
        spin_lock_irq(&cmd->t_state_lock);
+       if (__transport_check_aborted_status(cmd, 1)) {
+               spin_unlock_irq(&cmd->t_state_lock);
+               return;
+       }
        if (cmd->transport_state & CMD_T_STOP) {
                pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n",
                        __func__, __LINE__, cmd->tag);
@@ -2222,20 +2192,14 @@ static inline void transport_free_pages(struct se_cmd *cmd)
 }
 
 /**
- * transport_release_cmd - free a command
- * @cmd:       command to free
+ * transport_put_cmd - release a reference to a command
+ * @cmd:       command to release
  *
- * This routine unconditionally frees a command, and reference counting
- * or list removal must be done in the caller.
+ * This routine releases our reference to the command and frees it if possible.
  */
-static int transport_release_cmd(struct se_cmd *cmd)
+static int transport_put_cmd(struct se_cmd *cmd)
 {
        BUG_ON(!cmd->se_tfo);
-
-       if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
-               core_tmr_release_req(cmd->se_tmr_req);
-       if (cmd->t_task_cdb != cmd->__t_task_cdb)
-               kfree(cmd->t_task_cdb);
        /*
         * If this cmd has been setup with target_get_sess_cmd(), drop
         * the kref and call ->release_cmd() in kref callback.
@@ -2243,18 +2207,6 @@ static int transport_release_cmd(struct se_cmd *cmd)
        return target_put_sess_cmd(cmd);
 }
 
-/**
- * transport_put_cmd - release a reference to a command
- * @cmd:       command to release
- *
- * This routine releases our reference to the command and frees it if possible.
- */
-static int transport_put_cmd(struct se_cmd *cmd)
-{
-       transport_free_pages(cmd);
-       return transport_release_cmd(cmd);
-}
-
 void *transport_kmap_data_sg(struct se_cmd *cmd)
 {
        struct scatterlist *sg = cmd->t_data_sg;
@@ -2450,34 +2402,58 @@ static void transport_write_pending_qf(struct se_cmd *cmd)
        }
 }
 
-int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
+static bool
+__transport_wait_for_tasks(struct se_cmd *, bool, bool *, bool *,
+                          unsigned long *flags);
+
+static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas)
 {
        unsigned long flags;
+
+       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       __transport_wait_for_tasks(cmd, true, aborted, tas, &flags);
+       spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+}
+
+int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
+{
        int ret = 0;
+       bool aborted = false, tas = false;
 
        if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) {
                if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
-                        transport_wait_for_tasks(cmd);
+                       target_wait_free_cmd(cmd, &aborted, &tas);
 
-               ret = transport_release_cmd(cmd);
+               if (!aborted || tas)
+                       ret = transport_put_cmd(cmd);
        } else {
                if (wait_for_tasks)
-                       transport_wait_for_tasks(cmd);
+                       target_wait_free_cmd(cmd, &aborted, &tas);
                /*
                 * Handle WRITE failure case where transport_generic_new_cmd()
                 * has already added se_cmd to state_list, but fabric has
                 * failed command before I/O submission.
                 */
-               if (cmd->state_active) {
-                       spin_lock_irqsave(&cmd->t_state_lock, flags);
+               if (cmd->state_active)
                        target_remove_from_state_list(cmd);
-                       spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-               }
 
                if (cmd->se_lun)
                        transport_lun_remove_cmd(cmd);
 
-               ret = transport_put_cmd(cmd);
+               if (!aborted || tas)
+                       ret = transport_put_cmd(cmd);
+       }
+       /*
+        * If the task has been internally aborted due to TMR ABORT_TASK
+        * or LUN_RESET, target_core_tmr.c is responsible for performing
+        * the remaining calls to target_put_sess_cmd(), and not the
+        * callers of this function.
+        */
+       if (aborted) {
+               pr_debug("Detected CMD_T_ABORTED for ITT: %llu\n", cmd->tag);
+               wait_for_completion(&cmd->cmd_wait_comp);
+               cmd->se_tfo->release_cmd(cmd);
+               ret = 1;
        }
        return ret;
 }
@@ -2517,26 +2493,46 @@ out:
 }
 EXPORT_SYMBOL(target_get_sess_cmd);
 
+static void target_free_cmd_mem(struct se_cmd *cmd)
+{
+       transport_free_pages(cmd);
+
+       if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
+               core_tmr_release_req(cmd->se_tmr_req);
+       if (cmd->t_task_cdb != cmd->__t_task_cdb)
+               kfree(cmd->t_task_cdb);
+}
+
 static void target_release_cmd_kref(struct kref *kref)
 {
        struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
        struct se_session *se_sess = se_cmd->se_sess;
        unsigned long flags;
+       bool fabric_stop;
 
        spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
        if (list_empty(&se_cmd->se_cmd_list)) {
                spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+               target_free_cmd_mem(se_cmd);
                se_cmd->se_tfo->release_cmd(se_cmd);
                return;
        }
-       if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) {
+
+       spin_lock(&se_cmd->t_state_lock);
+       fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP);
+       spin_unlock(&se_cmd->t_state_lock);
+
+       if (se_cmd->cmd_wait_set || fabric_stop) {
+               list_del_init(&se_cmd->se_cmd_list);
                spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+               target_free_cmd_mem(se_cmd);
                complete(&se_cmd->cmd_wait_comp);
                return;
        }
-       list_del(&se_cmd->se_cmd_list);
+       list_del_init(&se_cmd->se_cmd_list);
        spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
+       target_free_cmd_mem(se_cmd);
        se_cmd->se_tfo->release_cmd(se_cmd);
 }
 
@@ -2548,6 +2544,7 @@ int target_put_sess_cmd(struct se_cmd *se_cmd)
        struct se_session *se_sess = se_cmd->se_sess;
 
        if (!se_sess) {
+               target_free_cmd_mem(se_cmd);
                se_cmd->se_tfo->release_cmd(se_cmd);
                return 1;
        }
@@ -2564,6 +2561,7 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess)
 {
        struct se_cmd *se_cmd;
        unsigned long flags;
+       int rc;
 
        spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
        if (se_sess->sess_tearing_down) {
@@ -2573,8 +2571,15 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess)
        se_sess->sess_tearing_down = 1;
        list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list);
 
-       list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list)
-               se_cmd->cmd_wait_set = 1;
+       list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) {
+               rc = kref_get_unless_zero(&se_cmd->cmd_kref);
+               if (rc) {
+                       se_cmd->cmd_wait_set = 1;
+                       spin_lock(&se_cmd->t_state_lock);
+                       se_cmd->transport_state |= CMD_T_FABRIC_STOP;
+                       spin_unlock(&se_cmd->t_state_lock);
+               }
+       }
 
        spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 }
@@ -2587,15 +2592,25 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
 {
        struct se_cmd *se_cmd, *tmp_cmd;
        unsigned long flags;
+       bool tas;
 
        list_for_each_entry_safe(se_cmd, tmp_cmd,
                                &se_sess->sess_wait_list, se_cmd_list) {
-               list_del(&se_cmd->se_cmd_list);
+               list_del_init(&se_cmd->se_cmd_list);
 
                pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:"
                        " %d\n", se_cmd, se_cmd->t_state,
                        se_cmd->se_tfo->get_cmd_state(se_cmd));
 
+               spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+               tas = (se_cmd->transport_state & CMD_T_TAS);
+               spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+               if (!target_put_sess_cmd(se_cmd)) {
+                       if (tas)
+                               target_put_sess_cmd(se_cmd);
+               }
+
                wait_for_completion(&se_cmd->cmd_wait_comp);
                pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d"
                        " fabric state: %d\n", se_cmd, se_cmd->t_state,
@@ -2617,53 +2632,75 @@ void transport_clear_lun_ref(struct se_lun *lun)
        wait_for_completion(&lun->lun_ref_comp);
 }
 
-/**
- * transport_wait_for_tasks - wait for completion to occur
- * @cmd:       command to wait
- *
- * Called from frontend fabric context to wait for storage engine
- * to pause and/or release frontend generated struct se_cmd.
- */
-bool transport_wait_for_tasks(struct se_cmd *cmd)
+static bool
+__transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop,
+                          bool *aborted, bool *tas, unsigned long *flags)
+       __releases(&cmd->t_state_lock)
+       __acquires(&cmd->t_state_lock)
 {
-       unsigned long flags;
 
-       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       assert_spin_locked(&cmd->t_state_lock);
+       WARN_ON_ONCE(!irqs_disabled());
+
+       if (fabric_stop)
+               cmd->transport_state |= CMD_T_FABRIC_STOP;
+
+       if (cmd->transport_state & CMD_T_ABORTED)
+               *aborted = true;
+
+       if (cmd->transport_state & CMD_T_TAS)
+               *tas = true;
+
        if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) &&
-           !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
-               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+           !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
                return false;
-       }
 
        if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) &&
-           !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
-               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+           !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
                return false;
-       }
 
-       if (!(cmd->transport_state & CMD_T_ACTIVE)) {
-               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+       if (!(cmd->transport_state & CMD_T_ACTIVE))
+               return false;
+
+       if (fabric_stop && *aborted)
                return false;
-       }
 
        cmd->transport_state |= CMD_T_STOP;
 
-       pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d, t_state: %d, CMD_T_STOP\n",
-               cmd, cmd->tag, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state);
+       pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d,"
+                " t_state: %d, CMD_T_STOP\n", cmd, cmd->tag,
+                cmd->se_tfo->get_cmd_state(cmd), cmd->t_state);
 
-       spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+       spin_unlock_irqrestore(&cmd->t_state_lock, *flags);
 
        wait_for_completion(&cmd->t_transport_stop_comp);
 
-       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       spin_lock_irqsave(&cmd->t_state_lock, *flags);
        cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP);
 
-       pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->t_transport_stop_comp) for ITT: 0x%08llx\n",
-               cmd->tag);
+       pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->"
+                "t_transport_stop_comp) for ITT: 0x%08llx\n", cmd->tag);
 
+       return true;
+}
+
+/**
+ * transport_wait_for_tasks - wait for completion to occur
+ * @cmd:       command to wait
+ *
+ * Called from frontend fabric context to wait for storage engine
+ * to pause and/or release frontend generated struct se_cmd.
+ */
+bool transport_wait_for_tasks(struct se_cmd *cmd)
+{
+       unsigned long flags;
+       bool ret, aborted = false, tas = false;
+
+       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       ret = __transport_wait_for_tasks(cmd, false, &aborted, &tas, &flags);
        spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-       return true;
+       return ret;
 }
 EXPORT_SYMBOL(transport_wait_for_tasks);
 
@@ -2845,28 +2882,49 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd,
 }
 EXPORT_SYMBOL(transport_send_check_condition_and_sense);
 
-int transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+       __releases(&cmd->t_state_lock)
+       __acquires(&cmd->t_state_lock)
 {
+       assert_spin_locked(&cmd->t_state_lock);
+       WARN_ON_ONCE(!irqs_disabled());
+
        if (!(cmd->transport_state & CMD_T_ABORTED))
                return 0;
-
        /*
         * If cmd has been aborted but either no status is to be sent or it has
         * already been sent, just return
         */
-       if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS))
+       if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) {
+               if (send_status)
+                       cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
                return 1;
+       }
 
-       pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08llx\n",
-                cmd->t_task_cdb[0], cmd->tag);
+       pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB:"
+               " 0x%02x ITT: 0x%08llx\n", cmd->t_task_cdb[0], cmd->tag);
 
        cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS;
        cmd->scsi_status = SAM_STAT_TASK_ABORTED;
        trace_target_cmd_complete(cmd);
+
+       spin_unlock_irq(&cmd->t_state_lock);
        cmd->se_tfo->queue_status(cmd);
+       spin_lock_irq(&cmd->t_state_lock);
 
        return 1;
 }
+
+int transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+{
+       int ret;
+
+       spin_lock_irq(&cmd->t_state_lock);
+       ret = __transport_check_aborted_status(cmd, send_status);
+       spin_unlock_irq(&cmd->t_state_lock);
+
+       return ret;
+}
 EXPORT_SYMBOL(transport_check_aborted_status);
 
 void transport_send_task_abort(struct se_cmd *cmd)
@@ -2888,11 +2946,17 @@ void transport_send_task_abort(struct se_cmd *cmd)
         */
        if (cmd->data_direction == DMA_TO_DEVICE) {
                if (cmd->se_tfo->write_pending_status(cmd) != 0) {
-                       cmd->transport_state |= CMD_T_ABORTED;
+                       spin_lock_irqsave(&cmd->t_state_lock, flags);
+                       if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) {
+                               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+                               goto send_abort;
+                       }
                        cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
+                       spin_unlock_irqrestore(&cmd->t_state_lock, flags);
                        return;
                }
        }
+send_abort:
        cmd->scsi_status = SAM_STAT_TASK_ABORTED;
 
        transport_lun_remove_cmd(cmd);
@@ -2909,8 +2973,17 @@ static void target_tmr_work(struct work_struct *work)
        struct se_cmd *cmd = container_of(work, struct se_cmd, work);
        struct se_device *dev = cmd->se_dev;
        struct se_tmr_req *tmr = cmd->se_tmr_req;
+       unsigned long flags;
        int ret;
 
+       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       if (cmd->transport_state & CMD_T_ABORTED) {
+               tmr->response = TMR_FUNCTION_REJECTED;
+               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+               goto check_stop;
+       }
+       spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
        switch (tmr->function) {
        case TMR_ABORT_TASK:
                core_tmr_abort_task(dev, tmr, cmd->se_sess);
@@ -2943,9 +3016,17 @@ static void target_tmr_work(struct work_struct *work)
                break;
        }
 
+       spin_lock_irqsave(&cmd->t_state_lock, flags);
+       if (cmd->transport_state & CMD_T_ABORTED) {
+               spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+               goto check_stop;
+       }
        cmd->t_state = TRANSPORT_ISTATE_PROCESSING;
+       spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
        cmd->se_tfo->queue_tm_rsp(cmd);
 
+check_stop:
        transport_cmd_check_stop_to_fabric(cmd);
 }
 
index dd600e5ead719ba95d41ae0b3b4eeed8a9161eab..94f5154ac788594e4a223147f02c20642ec842df 100644 (file)
@@ -903,7 +903,7 @@ static int tcmu_configure_device(struct se_device *dev)
        info->version = __stringify(TCMU_MAILBOX_VERSION);
 
        info->mem[0].name = "tcm-user command & data buffer";
-       info->mem[0].addr = (phys_addr_t) udev->mb_addr;
+       info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr;
        info->mem[0].size = TCMU_RING_SIZE;
        info->mem[0].memtype = UIO_MEM_VIRTUAL;
 
index 8cc4ac64a91c36347b9307addb88ae99d545d2b7..7c92c09be21386c3d60ff41b76a6694234c93432 100644 (file)
@@ -195,7 +195,7 @@ config IMX_THERMAL
          passive trip is crossed.
 
 config SPEAR_THERMAL
-       bool "SPEAr thermal sensor driver"
+       tristate "SPEAr thermal sensor driver"
        depends on PLAT_SPEAR || COMPILE_TEST
        depends on OF
        help
@@ -237,8 +237,8 @@ config DOVE_THERMAL
          framework.
 
 config DB8500_THERMAL
-       bool "DB8500 thermal management"
-       depends on ARCH_U8500
+       tristate "DB8500 thermal management"
+       depends on MFD_DB8500_PRCMU
        default y
        help
          Adds DB8500 thermal management implementation according to the thermal
index e3fbc5a5d88f166930e8633e671348614d00dd4d..6ceac4f2d4b227d52045632992568d08ea158c5b 100644 (file)
@@ -377,26 +377,28 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
  * get_load() - get load for a cpu since last updated
  * @cpufreq_device:    &struct cpufreq_cooling_device for this cpu
  * @cpu:       cpu number
+ * @cpu_idx:   index of the cpu in cpufreq_device->allowed_cpus
  *
  * Return: The average load of cpu @cpu in percentage since this
  * function was last called.
  */
-static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
+                   int cpu_idx)
 {
        u32 load;
        u64 now, now_idle, delta_time, delta_idle;
 
        now_idle = get_cpu_idle_time(cpu, &now, 0);
-       delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
-       delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
+       delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
+       delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
 
        if (delta_time <= delta_idle)
                load = 0;
        else
                load = div64_u64(100 * (delta_time - delta_idle), delta_time);
 
-       cpufreq_device->time_in_idle[cpu] = now_idle;
-       cpufreq_device->time_in_idle_timestamp[cpu] = now;
+       cpufreq_device->time_in_idle[cpu_idx] = now_idle;
+       cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
 
        return load;
 }
@@ -598,7 +600,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
                u32 load;
 
                if (cpu_online(cpu))
-                       load = get_load(cpufreq_device, cpu);
+                       load = get_load(cpufreq_device, cpu, i);
                else
                        load = 0;
 
index ccc0ad02d06698108ec486d11f7b0dc69c7c734a..36fa724a36c851d463afa53a67d1a8526efb75dc 100644 (file)
 /* Braswell thermal reporting device */
 #define PCI_DEVICE_ID_PROC_BSW_THERMAL 0x22DC
 
+/* Broxton thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BXT0_THERMAL  0x0A8C
+#define PCI_DEVICE_ID_PROC_BXT1_THERMAL  0x1A8C
+#define PCI_DEVICE_ID_PROC_BXTX_THERMAL  0x4A8C
+#define PCI_DEVICE_ID_PROC_BXTP_THERMAL  0x5A8C
+
 struct power_config {
        u32     index;
        u32     min_uw;
@@ -404,6 +410,10 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT0_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT1_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTX_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTP_THERMAL)},
        { 0, },
 };
 
index 50c7da79be830a43b342999ca80ef264431e0078..00d81af648b8ed93a9d7c2e09aef8979f7d0606d 100644 (file)
@@ -136,7 +136,7 @@ struct pch_dev_ops {
 
 
 /* dev ops for Wildcat Point */
-static struct pch_dev_ops pch_dev_ops_wpt = {
+static const struct pch_dev_ops pch_dev_ops_wpt = {
        .hw_init = pch_wpt_init,
        .get_temp = pch_wpt_get_temp,
 };
index be4eedcb839ac22158fe180c2abc37df712d9511..9043f8f918529bd600eda1bbffe3bdf91f036ef4 100644 (file)
@@ -475,14 +475,10 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
 
        sensor_np = of_node_get(dev->of_node);
 
-       for_each_child_of_node(np, child) {
+       for_each_available_child_of_node(np, child) {
                struct of_phandle_args sensor_specs;
                int ret, id;
 
-               /* Check whether child is enabled or not */
-               if (!of_device_is_available(child))
-                       continue;
-
                /* For now, thermal framework supports only 1 sensor per zone */
                ret = of_parse_phandle_with_args(child, "thermal-sensors",
                                                 "#thermal-sensor-cells",
@@ -881,16 +877,12 @@ int __init of_parse_thermal_zones(void)
                return 0; /* Run successfully on systems without thermal DT */
        }
 
-       for_each_child_of_node(np, child) {
+       for_each_available_child_of_node(np, child) {
                struct thermal_zone_device *zone;
                struct thermal_zone_params *tzp;
                int i, mask = 0;
                u32 prop;
 
-               /* Check whether child is enabled or not */
-               if (!of_device_is_available(child))
-                       continue;
-
                tz = thermal_of_build_thermal_zone(child);
                if (IS_ERR(tz)) {
                        pr_err("failed to build thermal zone %s: %ld\n",
@@ -968,13 +960,9 @@ void of_thermal_destroy_zones(void)
                return;
        }
 
-       for_each_child_of_node(np, child) {
+       for_each_available_child_of_node(np, child) {
                struct thermal_zone_device *zone;
 
-               /* Check whether child is enabled or not */
-               if (!of_device_is_available(child))
-                       continue;
-
                zone = thermal_zone_get_zone_by_name(child->name);
                if (IS_ERR(zone))
                        continue;
index 13d01edc7a043812611719bf7a342260f09ababa..0e735acea33afc7b8e747857b57046647d50e7d9 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reboot.h>
@@ -75,11 +76,13 @@ struct rcar_thermal_priv {
 #define rcar_has_irq_support(priv)     ((priv)->common->base)
 #define rcar_id_to_shift(priv)         ((priv)->id * 8)
 
-#ifdef DEBUG
-# define rcar_force_update_temp(priv)  1
-#else
-# define rcar_force_update_temp(priv)  0
-#endif
+#define USE_OF_THERMAL 1
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+       { .compatible = "renesas,rcar-thermal", },
+       { .compatible = "renesas,rcar-gen2-thermal", .data = (void *)USE_OF_THERMAL },
+       {},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
 
 /*
  *             basic functions
@@ -200,20 +203,46 @@ err_out_unlock:
        return ret;
 }
 
-static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
+static int rcar_thermal_get_current_temp(struct rcar_thermal_priv *priv,
+                                        int *temp)
 {
-       struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+       int tmp;
+       int ret;
 
-       if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
-               rcar_thermal_update_temp(priv);
+       ret = rcar_thermal_update_temp(priv);
+       if (ret < 0)
+               return ret;
 
        mutex_lock(&priv->lock);
-       *temp =  MCELSIUS((priv->ctemp * 5) - 65);
+       tmp =  MCELSIUS((priv->ctemp * 5) - 65);
        mutex_unlock(&priv->lock);
 
+       if ((tmp < MCELSIUS(-45)) || (tmp > MCELSIUS(125))) {
+               struct device *dev = rcar_priv_to_dev(priv);
+
+               dev_err(dev, "it couldn't measure temperature correctly\n");
+               return -EIO;
+       }
+
+       *temp = tmp;
+
        return 0;
 }
 
+static int rcar_thermal_of_get_temp(void *data, int *temp)
+{
+       struct rcar_thermal_priv *priv = data;
+
+       return rcar_thermal_get_current_temp(priv, temp);
+}
+
+static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
+{
+       struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+
+       return rcar_thermal_get_current_temp(priv, temp);
+}
+
 static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone,
                                      int trip, enum thermal_trip_type *type)
 {
@@ -270,6 +299,10 @@ static int rcar_thermal_notify(struct thermal_zone_device *zone,
        return 0;
 }
 
+static const struct thermal_zone_of_device_ops rcar_thermal_zone_of_ops = {
+       .get_temp       = rcar_thermal_of_get_temp,
+};
+
 static struct thermal_zone_device_ops rcar_thermal_zone_ops = {
        .get_temp       = rcar_thermal_get_temp,
        .get_trip_type  = rcar_thermal_get_trip_type,
@@ -288,6 +321,9 @@ static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable)
        unsigned long flags;
        u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
 
+       if (!rcar_has_irq_support(priv))
+               return;
+
        spin_lock_irqsave(&common->lock, flags);
 
        rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
@@ -299,14 +335,24 @@ static void rcar_thermal_work(struct work_struct *work)
 {
        struct rcar_thermal_priv *priv;
        int cctemp, nctemp;
+       int ret;
 
        priv = container_of(work, struct rcar_thermal_priv, work.work);
 
-       rcar_thermal_get_temp(priv->zone, &cctemp);
-       rcar_thermal_update_temp(priv);
+       ret = rcar_thermal_get_current_temp(priv, &cctemp);
+       if (ret < 0)
+               return;
+
+       ret = rcar_thermal_update_temp(priv);
+       if (ret < 0)
+               return;
+
        rcar_thermal_irq_enable(priv);
 
-       rcar_thermal_get_temp(priv->zone, &nctemp);
+       ret = rcar_thermal_get_current_temp(priv, &nctemp);
+       if (ret < 0)
+               return;
+
        if (nctemp != cctemp)
                thermal_zone_device_update(priv->zone);
 }
@@ -368,8 +414,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
        struct rcar_thermal_priv *priv;
 
        rcar_thermal_for_each_priv(priv, common) {
-               if (rcar_has_irq_support(priv))
-                       rcar_thermal_irq_disable(priv);
+               rcar_thermal_irq_disable(priv);
                thermal_zone_device_unregister(priv->zone);
        }
 
@@ -385,6 +430,8 @@ static int rcar_thermal_probe(struct platform_device *pdev)
        struct rcar_thermal_priv *priv;
        struct device *dev = &pdev->dev;
        struct resource *res, *irq;
+       const struct of_device_id *of_id = of_match_device(rcar_thermal_dt_ids, dev);
+       unsigned long of_data = (unsigned long)of_id->data;
        int mres = 0;
        int i;
        int ret = -ENODEV;
@@ -441,9 +488,17 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                mutex_init(&priv->lock);
                INIT_LIST_HEAD(&priv->list);
                INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
-               rcar_thermal_update_temp(priv);
+               ret = rcar_thermal_update_temp(priv);
+               if (ret < 0)
+                       goto error_unregister;
 
-               priv->zone = thermal_zone_device_register("rcar_thermal",
+               if (of_data == USE_OF_THERMAL)
+                       priv->zone = thermal_zone_of_sensor_register(
+                                               dev, i, priv,
+                                               &rcar_thermal_zone_of_ops);
+               else
+                       priv->zone = thermal_zone_device_register(
+                                               "rcar_thermal",
                                                1, 0, priv,
                                                &rcar_thermal_zone_ops, NULL, 0,
                                                idle);
@@ -453,8 +508,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                        goto error_unregister;
                }
 
-               if (rcar_has_irq_support(priv))
-                       rcar_thermal_irq_enable(priv);
+               rcar_thermal_irq_enable(priv);
 
                list_move_tail(&priv->list, &common->head);
 
@@ -484,12 +538,6 @@ error_unregister:
        return ret;
 }
 
-static const struct of_device_id rcar_thermal_dt_ids[] = {
-       { .compatible = "renesas,rcar-thermal", },
-       {},
-};
-MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
-
 static struct platform_driver rcar_thermal_driver = {
        .driver = {
                .name   = "rcar_thermal",
index e845841ab036cc82033d0a6829058a0f15f2543e..b58e3fb9b31186921e51edd0fa70bfe0e8c88059 100644 (file)
@@ -38,7 +38,7 @@ enum tshut_mode {
 };
 
 /**
- * the system Temperature Sensors tshut(tshut) polarity
+ * The system Temperature Sensors tshut(tshut) polarity
  * the bit 8 is tshut polarity.
  * 0: low active, 1: high active
  */
@@ -57,10 +57,10 @@ enum sensor_id {
 };
 
 /**
-* The conversion table has the adc value and temperature.
-* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
-* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
-*/
+ * The conversion table has the adc value and temperature.
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ */
 enum adc_sort_mode {
        ADC_DECREMENT = 0,
        ADC_INCREMENT,
@@ -72,16 +72,17 @@ enum adc_sort_mode {
  */
 #define SOC_MAX_SENSORS        2
 
+/**
+ * struct chip_tsadc_table: hold information about chip-specific differences
+ * @id: conversion table
+ * @length: size of conversion table
+ * @data_mask: mask to apply on data inputs
+ * @mode: sort mode of this adc variant (incrementing or decrementing)
+ */
 struct chip_tsadc_table {
        const struct tsadc_table *id;
-
-       /* the array table size*/
        unsigned int length;
-
-       /* that analogic mask data */
        u32 data_mask;
-
-       /* the sort mode is adc value that increment or decrement in table */
        enum adc_sort_mode mode;
 };
 
@@ -153,6 +154,7 @@ struct rockchip_thermal_data {
 #define TSADCV2_SHUT_2GPIO_SRC_EN(chn)         BIT(4 + (chn))
 #define TSADCV2_SHUT_2CRU_SRC_EN(chn)          BIT(8 + (chn))
 
+#define TSADCV1_INT_PD_CLEAR_MASK              ~BIT(16)
 #define TSADCV2_INT_PD_CLEAR_MASK              ~BIT(8)
 
 #define TSADCV2_DATA_MASK                      0xfff
@@ -168,6 +170,51 @@ struct tsadc_table {
        int temp;
 };
 
+/**
+ * Note:
+ * Code to Temperature mapping of the Temperature sensor is a piece wise linear
+ * curve.Any temperature, code faling between to 2 give temperatures can be
+ * linearly interpolated.
+ * Code to Temperature mapping should be updated based on sillcon results.
+ */
+static const struct tsadc_table v1_code_table[] = {
+       {TSADCV3_DATA_MASK, -40000},
+       {436, -40000},
+       {431, -35000},
+       {426, -30000},
+       {421, -25000},
+       {416, -20000},
+       {411, -15000},
+       {406, -10000},
+       {401, -5000},
+       {395, 0},
+       {390, 5000},
+       {385, 10000},
+       {380, 15000},
+       {375, 20000},
+       {370, 25000},
+       {364, 30000},
+       {359, 35000},
+       {354, 40000},
+       {349, 45000},
+       {343, 50000},
+       {338, 55000},
+       {333, 60000},
+       {328, 65000},
+       {322, 70000},
+       {317, 75000},
+       {312, 80000},
+       {307, 85000},
+       {301, 90000},
+       {296, 95000},
+       {291, 100000},
+       {286, 105000},
+       {280, 110000},
+       {275, 115000},
+       {270, 120000},
+       {264, 125000},
+};
+
 static const struct tsadc_table v2_code_table[] = {
        {TSADCV2_DATA_MASK, -40000},
        {3800, -40000},
@@ -245,6 +292,44 @@ static const struct tsadc_table v3_code_table[] = {
        {TSADCV3_DATA_MASK, 125000},
 };
 
+static const struct tsadc_table v4_code_table[] = {
+       {TSADCV3_DATA_MASK, -40000},
+       {431, -40000},
+       {426, -35000},
+       {421, -30000},
+       {415, -25000},
+       {410, -20000},
+       {405, -15000},
+       {399, -10000},
+       {394, -5000},
+       {389, 0},
+       {383, 5000},
+       {378, 10000},
+       {373, 15000},
+       {367, 20000},
+       {362, 25000},
+       {357, 30000},
+       {351, 35000},
+       {346, 40000},
+       {340, 45000},
+       {335, 50000},
+       {330, 55000},
+       {324, 60000},
+       {319, 65000},
+       {313, 70000},
+       {308, 75000},
+       {302, 80000},
+       {297, 85000},
+       {291, 90000},
+       {286, 95000},
+       {281, 100000},
+       {275, 105000},
+       {270, 110000},
+       {264, 115000},
+       {259, 120000},
+       {253, 125000},
+};
+
 static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
                                   int temp)
 {
@@ -368,6 +453,14 @@ static void rk_tsadcv2_initialize(void __iomem *regs,
                       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
 }
 
+static void rk_tsadcv1_irq_ack(void __iomem *regs)
+{
+       u32 val;
+
+       val = readl_relaxed(regs + TSADCV2_INT_PD);
+       writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+}
+
 static void rk_tsadcv2_irq_ack(void __iomem *regs)
 {
        u32 val;
@@ -429,6 +522,29 @@ static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
        writel_relaxed(val, regs + TSADCV2_INT_EN);
 }
 
+static const struct rockchip_tsadc_chip rk3228_tsadc_data = {
+       .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+       .chn_num = 1, /* one channel for tsadc */
+
+       .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+       .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+       .tshut_temp = 95000,
+
+       .initialize = rk_tsadcv2_initialize,
+       .irq_ack = rk_tsadcv1_irq_ack,
+       .control = rk_tsadcv2_control,
+       .get_temp = rk_tsadcv2_get_temp,
+       .set_tshut_temp = rk_tsadcv2_tshut_temp,
+       .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+       .table = {
+               .id = v1_code_table,
+               .length = ARRAY_SIZE(v1_code_table),
+               .data_mask = TSADCV3_DATA_MASK,
+               .mode = ADC_DECREMENT,
+       },
+};
+
 static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
        .chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
        .chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
@@ -477,7 +593,35 @@ static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
        },
 };
 
+static const struct rockchip_tsadc_chip rk3399_tsadc_data = {
+       .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+       .chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+       .chn_num = 2, /* two channels for tsadc */
+
+       .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+       .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+       .tshut_temp = 95000,
+
+       .initialize = rk_tsadcv2_initialize,
+       .irq_ack = rk_tsadcv1_irq_ack,
+       .control = rk_tsadcv2_control,
+       .get_temp = rk_tsadcv2_get_temp,
+       .set_tshut_temp = rk_tsadcv2_tshut_temp,
+       .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+       .table = {
+               .id = v4_code_table,
+               .length = ARRAY_SIZE(v4_code_table),
+               .data_mask = TSADCV3_DATA_MASK,
+               .mode = ADC_DECREMENT,
+       },
+};
+
 static const struct of_device_id of_rockchip_thermal_match[] = {
+       {
+               .compatible = "rockchip,rk3228-tsadc",
+               .data = (void *)&rk3228_tsadc_data,
+       },
        {
                .compatible = "rockchip,rk3288-tsadc",
                .data = (void *)&rk3288_tsadc_data,
@@ -486,6 +630,10 @@ static const struct of_device_id of_rockchip_thermal_match[] = {
                .compatible = "rockchip,rk3368-tsadc",
                .data = (void *)&rk3368_tsadc_data,
        },
+       {
+               .compatible = "rockchip,rk3399-tsadc",
+               .data = (void *)&rk3399_tsadc_data,
+       },
        { /* end */ },
 };
 MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
@@ -617,7 +765,7 @@ rockchip_thermal_register_sensor(struct platform_device *pdev,
        return 0;
 }
 
-/*
+/**
  * Reset TSADC Controller, reset all tsadc registers.
  */
 static void rockchip_thermal_reset_controller(struct reset_control *reset)
index 534dd913666283fa13eecfaeb8823ea3d55ac0c8..81b35aace9de0439c4dd76c4c4c0d91597d3b3f3 100644 (file)
@@ -54,8 +54,7 @@ static struct thermal_zone_device_ops ops = {
        .get_temp = thermal_get_temp,
 };
 
-#ifdef CONFIG_PM
-static int spear_thermal_suspend(struct device *dev)
+static int __maybe_unused spear_thermal_suspend(struct device *dev)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct thermal_zone_device *spear_thermal = platform_get_drvdata(pdev);
@@ -72,7 +71,7 @@ static int spear_thermal_suspend(struct device *dev)
        return 0;
 }
 
-static int spear_thermal_resume(struct device *dev)
+static int __maybe_unused spear_thermal_resume(struct device *dev)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct thermal_zone_device *spear_thermal = platform_get_drvdata(pdev);
@@ -94,7 +93,6 @@ static int spear_thermal_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(spear_thermal_pm_ops, spear_thermal_suspend,
                spear_thermal_resume);
index 2f9f7086ac3dd0d7a3a71015593fffacf8ee586e..ea9366ad3e6bb285e52e368691a0d495cbb3429f 100644 (file)
@@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
        next_target = instance->target;
        dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
 
+       if (!instance->initialized) {
+               if (throttle) {
+                       next_target = (cur_state + 1) >= instance->upper ?
+                                       instance->upper :
+                                       ((cur_state + 1) < instance->lower ?
+                                       instance->lower : (cur_state + 1));
+               } else {
+                       next_target = THERMAL_NO_TARGET;
+               }
+
+               return next_target;
+       }
+
        switch (trend) {
        case THERMAL_TREND_RAISING:
                if (throttle) {
@@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
                                        old_target, (int)instance->target);
 
-               if (old_target == instance->target)
+               if (instance->initialized && old_target == instance->target)
                        continue;
 
                /* Activate a passive thermal instance */
@@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                        instance->target == THERMAL_NO_TARGET)
                        update_passive_instance(tz, trip_type, -1);
 
-
+               instance->initialized = true;
                instance->cdev->updated = false; /* cdev needs update */
        }
 
index d9e525cc9c1ce24bcd9d55dd59730f375b15d552..a0a8fd1235e2a21cbe5553c440b26eafecf8042e 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/of.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <linux/suspend.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/thermal.h>
@@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
 
+static atomic_t in_suspend;
+
 static struct thermal_governor *def_governor;
 
 static struct thermal_governor *__find_governor(const char *name)
@@ -532,14 +535,31 @@ static void update_temperature(struct thermal_zone_device *tz)
        mutex_unlock(&tz->lock);
 
        trace_thermal_temperature(tz);
-       dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
-                               tz->last_temperature, tz->temperature);
+       if (tz->last_temperature == THERMAL_TEMP_INVALID)
+               dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+                       tz->temperature);
+       else
+               dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+                       tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+       struct thermal_instance *pos;
+
+       tz->temperature = THERMAL_TEMP_INVALID;
+       tz->passive = 0;
+       list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+               pos->initialized = false;
 }
 
 void thermal_zone_device_update(struct thermal_zone_device *tz)
 {
        int count;
 
+       if (atomic_read(&in_suspend))
+               return;
+
        if (!tz->ops->get_temp)
                return;
 
@@ -676,8 +696,12 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
                return -EINVAL;
 
        ret = tz->ops->set_trip_temp(tz, trip, temperature);
+       if (ret)
+               return ret;
 
-       return ret ? ret : count;
+       thermal_zone_device_update(tz);
+
+       return count;
 }
 
 static ssize_t
@@ -1321,6 +1345,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
        if (!result) {
                list_add_tail(&dev->tz_node, &tz->thermal_instances);
                list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+               atomic_set(&tz->need_update, 1);
        }
        mutex_unlock(&cdev->lock);
        mutex_unlock(&tz->lock);
@@ -1430,6 +1455,7 @@ __thermal_cooling_device_register(struct device_node *np,
                                  const struct thermal_cooling_device_ops *ops)
 {
        struct thermal_cooling_device *cdev;
+       struct thermal_zone_device *pos = NULL;
        int result;
 
        if (type && strlen(type) >= THERMAL_NAME_LENGTH)
@@ -1474,6 +1500,12 @@ __thermal_cooling_device_register(struct device_node *np,
        /* Update binding information for 'this' new cdev */
        bind_cdev(cdev);
 
+       mutex_lock(&thermal_list_lock);
+       list_for_each_entry(pos, &thermal_tz_list, node)
+               if (atomic_cmpxchg(&pos->need_update, 1, 0))
+                       thermal_zone_device_update(pos);
+       mutex_unlock(&thermal_list_lock);
+
        return cdev;
 }
 
@@ -1806,6 +1838,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
        tz->trips = trips;
        tz->passive_delay = passive_delay;
        tz->polling_delay = polling_delay;
+       /* A new thermal zone needs to be updated anyway. */
+       atomic_set(&tz->need_update, 1);
 
        dev_set_name(&tz->device, "thermal_zone%d", tz->id);
        result = device_register(&tz->device);
@@ -1900,7 +1934,10 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
        INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
-       thermal_zone_device_update(tz);
+       thermal_zone_device_reset(tz);
+       /* Update the new thermal zone and mark it as already updated. */
+       if (atomic_cmpxchg(&tz->need_update, 1, 0))
+               thermal_zone_device_update(tz);
 
        return tz;
 
@@ -2140,6 +2177,36 @@ static void thermal_unregister_governors(void)
        thermal_gov_power_allocator_unregister();
 }
 
+static int thermal_pm_notify(struct notifier_block *nb,
+                               unsigned long mode, void *_unused)
+{
+       struct thermal_zone_device *tz;
+
+       switch (mode) {
+       case PM_HIBERNATION_PREPARE:
+       case PM_RESTORE_PREPARE:
+       case PM_SUSPEND_PREPARE:
+               atomic_set(&in_suspend, 1);
+               break;
+       case PM_POST_HIBERNATION:
+       case PM_POST_RESTORE:
+       case PM_POST_SUSPEND:
+               atomic_set(&in_suspend, 0);
+               list_for_each_entry(tz, &thermal_tz_list, node) {
+                       thermal_zone_device_reset(tz);
+                       thermal_zone_device_update(tz);
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+       .notifier_call = thermal_pm_notify,
+};
+
 static int __init thermal_init(void)
 {
        int result;
@@ -2160,6 +2227,11 @@ static int __init thermal_init(void)
        if (result)
                goto exit_netlink;
 
+       result = register_pm_notifier(&thermal_pm_nb);
+       if (result)
+               pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+                       result);
+
        return 0;
 
 exit_netlink:
@@ -2179,6 +2251,7 @@ error:
 
 static void __exit thermal_exit(void)
 {
+       unregister_pm_notifier(&thermal_pm_nb);
        of_thermal_destroy_zones();
        genetlink_exit();
        class_unregister(&thermal_class);
index d7ac1fccd6598a80453dc51da655e37b30b85ac7..749d41abfbabecfc8fc33f54f4bc2e5d680450f8 100644 (file)
@@ -41,6 +41,7 @@ struct thermal_instance {
        struct thermal_zone_device *tz;
        struct thermal_cooling_device *cdev;
        int trip;
+       bool initialized;
        unsigned long upper;    /* Highest cooling state for this trip point */
        unsigned long lower;    /* Lowest cooling state for this trip point */
        unsigned long target;   /* expected cooling state */
index d9a5fc28fef4bbfe56850e258b726ca3464fd82b..b280abaad91b717639011f1407157eab5f3aaad1 100644 (file)
@@ -269,16 +269,13 @@ static void n_tty_check_throttle(struct tty_struct *tty)
 
 static void n_tty_check_unthrottle(struct tty_struct *tty)
 {
-       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-           tty->link->ldisc->ops->write_wakeup == n_tty_write_wakeup) {
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY) {
                if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE)
                        return;
                if (!tty->count)
                        return;
                n_tty_kick_worker(tty);
-               n_tty_write_wakeup(tty->link);
-               if (waitqueue_active(&tty->link->write_wait))
-                       wake_up_interruptible_poll(&tty->link->write_wait, POLLOUT);
+               tty_wakeup(tty->link);
                return;
        }
 
index b3110040164ae64fa29e66fae2d7f5bd4d7d139f..2348fa6137070e19c19d97a5dc436b1a0b762441 100644 (file)
@@ -681,7 +681,14 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 /* this is called once with whichever end is closed last */
 static void pty_unix98_shutdown(struct tty_struct *tty)
 {
-       devpts_kill_index(tty->driver_data, tty->index);
+       struct inode *ptmx_inode;
+
+       if (tty->driver->subtype == PTY_TYPE_MASTER)
+               ptmx_inode = tty->driver_data;
+       else
+               ptmx_inode = tty->link->driver_data;
+       devpts_kill_index(ptmx_inode, tty->index);
+       devpts_del_ref(ptmx_inode);
 }
 
 static const struct tty_operations ptm_unix98_ops = {
@@ -773,6 +780,18 @@ static int ptmx_open(struct inode *inode, struct file *filp)
        set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
        tty->driver_data = inode;
 
+       /*
+        * In the case where all references to ptmx inode are dropped and we
+        * still have /dev/tty opened pointing to the master/slave pair (ptmx
+        * is closed/released before /dev/tty), we must make sure that the inode
+        * is still valid when we call the final pty_unix98_shutdown, thus we
+        * hold an additional reference to the ptmx inode. For the same /dev/tty
+        * last close case, we also need to make sure the super_block isn't
+        * destroyed (devpts instance unmounted), before /dev/tty is closed and
+        * on its release devpts_kill_index is called.
+        */
+       devpts_add_ref(inode);
+
        tty_add_file(tty, filp);
 
        slave_inode = devpts_pty_new(inode,
index 4097f3f65b3bb1bfc3e3d4d6a40f05fb46c70618..7cd6f9a9054212d905e808bcfba2d76d676aaae2 100644 (file)
@@ -1379,6 +1379,9 @@ ce4100_serial_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_INTEL_BSW_UART1  0x228a
 #define PCI_DEVICE_ID_INTEL_BSW_UART2  0x228c
 
+#define PCI_DEVICE_ID_INTEL_BDW_UART1  0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2  0x9ce4
+
 #define BYT_PRV_CLK                    0x800
 #define BYT_PRV_CLK_EN                 (1 << 0)
 #define BYT_PRV_CLK_M_VAL_SHIFT                1
@@ -1461,11 +1464,13 @@ byt_serial_setup(struct serial_private *priv,
        switch (pdev->device) {
        case PCI_DEVICE_ID_INTEL_BYT_UART1:
        case PCI_DEVICE_ID_INTEL_BSW_UART1:
+       case PCI_DEVICE_ID_INTEL_BDW_UART1:
                rx_param->src_id = 3;
                tx_param->dst_id = 2;
                break;
        case PCI_DEVICE_ID_INTEL_BYT_UART2:
        case PCI_DEVICE_ID_INTEL_BSW_UART2:
+       case PCI_DEVICE_ID_INTEL_BDW_UART2:
                rx_param->src_id = 5;
                tx_param->dst_id = 4;
                break;
@@ -1936,6 +1941,7 @@ pci_wch_ch38x_setup(struct serial_private *priv,
 #define PCIE_VENDOR_ID_WCH             0x1c00
 #define PCIE_DEVICE_ID_WCH_CH382_2S1P  0x3250
 #define PCIE_DEVICE_ID_WCH_CH384_4S    0x3470
+#define PCIE_DEVICE_ID_WCH_CH382_2S    0x3253
 
 #define PCI_VENDOR_ID_PERICOM                  0x12D8
 #define PCI_DEVICE_ID_PERICOM_PI7C9X7951       0x7951
@@ -2062,6 +2068,20 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                .subdevice      = PCI_ANY_ID,
                .setup          = byt_serial_setup,
        },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART1,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART2,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
        /*
         * ITE
         */
@@ -2618,6 +2638,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                .subdevice      = PCI_ANY_ID,
                .setup          = pci_wch_ch353_setup,
        },
+       /* WCH CH382 2S card (16850 clone) */
+       {
+               .vendor         = PCIE_VENDOR_ID_WCH,
+               .device         = PCIE_DEVICE_ID_WCH_CH382_2S,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = pci_wch_ch38x_setup,
+       },
        /* WCH CH382 2S1P card (16850 clone) */
        {
                .vendor         = PCIE_VENDOR_ID_WCH,
@@ -2936,6 +2964,7 @@ enum pci_board_num_t {
        pbn_fintek_4,
        pbn_fintek_8,
        pbn_fintek_12,
+       pbn_wch382_2,
        pbn_wch384_4,
        pbn_pericom_PI7C9X7951,
        pbn_pericom_PI7C9X7952,
@@ -3756,6 +3785,13 @@ static struct pciserial_board pci_boards[] = {
                .base_baud      = 115200,
                .first_offset   = 0x40,
        },
+       [pbn_wch382_2] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 2,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+               .first_offset   = 0xC0,
+       },
        [pbn_wch384_4] = {
                .flags          = FL_BASE0,
                .num_ports      = 4,
@@ -5506,6 +5542,16 @@ static struct pci_device_id serial_pci_tbl[] = {
                PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
                pbn_byt },
 
+       /* Intel Broadwell */
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+
        /*
         * Intel Quark x1000
         */
@@ -5545,6 +5591,10 @@ static struct pci_device_id serial_pci_tbl[] = {
                PCI_ANY_ID, PCI_ANY_ID,
                0, 0, pbn_b0_bt_2_115200 },
 
+       {       PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH382_2S,
+               PCI_ANY_ID, PCI_ANY_ID,
+               0, 0, pbn_wch382_2 },
+
        {       PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH384_4S,
                PCI_ANY_ID, PCI_ANY_ID,
                0, 0, pbn_wch384_4 },
index b645f9228ed77b90ac6d4791e1b0ab27056a066b..fa49eb1e2fa2429f1c162d1464d06fb727a91d1c 100644 (file)
@@ -1165,7 +1165,7 @@ serial_omap_type(struct uart_port *port)
 
 #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
 
-static void wait_for_xmitr(struct uart_omap_port *up)
+static void __maybe_unused wait_for_xmitr(struct uart_omap_port *up)
 {
        unsigned int status, tmout = 10000;
 
@@ -1343,7 +1343,7 @@ static inline void serial_omap_add_console_port(struct uart_omap_port *up)
 
 /* Enable or disable the rs485 support */
 static int
-serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf)
+serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485)
 {
        struct uart_omap_port *up = to_uart_omap_port(port);
        unsigned int mode;
@@ -1356,8 +1356,12 @@ serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf)
        up->ier = 0;
        serial_out(up, UART_IER, 0);
 
+       /* Clamp the delays to [0, 100ms] */
+       rs485->delay_rts_before_send = min(rs485->delay_rts_before_send, 100U);
+       rs485->delay_rts_after_send  = min(rs485->delay_rts_after_send, 100U);
+
        /* store new config */
-       port->rs485 = *rs485conf;
+       port->rs485 = *rs485;
 
        /*
         * Just as a precaution, only allow rs485
index 892c923547450518cb5d7a4714f570a6803253bd..a7eacef1bd2216ef697c12d046cb8975f9fefb20 100644 (file)
@@ -1463,13 +1463,13 @@ static int tty_reopen(struct tty_struct *tty)
 {
        struct tty_driver *driver = tty->driver;
 
-       if (!tty->count)
-               return -EIO;
-
        if (driver->type == TTY_DRIVER_TYPE_PTY &&
            driver->subtype == PTY_TYPE_MASTER)
                return -EIO;
 
+       if (!tty->count)
+               return -EAGAIN;
+
        if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN))
                return -EBUSY;
 
@@ -2065,9 +2065,13 @@ retry_open:
 
                if (tty) {
                        mutex_unlock(&tty_mutex);
-                       tty_lock(tty);
-                       /* safe to drop the kref from tty_driver_lookup_tty() */
-                       tty_kref_put(tty);
+                       retval = tty_lock_interruptible(tty);
+                       tty_kref_put(tty);  /* drop kref from tty_driver_lookup_tty() */
+                       if (retval) {
+                               if (retval == -EINTR)
+                                       retval = -ERESTARTSYS;
+                               goto err_unref;
+                       }
                        retval = tty_reopen(tty);
                        if (retval < 0) {
                                tty_unlock(tty);
@@ -2083,7 +2087,11 @@ retry_open:
 
        if (IS_ERR(tty)) {
                retval = PTR_ERR(tty);
-               goto err_file;
+               if (retval != -EAGAIN || signal_pending(current))
+                       goto err_file;
+               tty_free_file(filp);
+               schedule();
+               goto retry_open;
        }
 
        tty_add_file(tty, filp);
@@ -2152,6 +2160,7 @@ retry_open:
        return 0;
 err_unlock:
        mutex_unlock(&tty_mutex);
+err_unref:
        /* after locks to avoid deadlock */
        if (!IS_ERR_OR_NULL(driver))
                tty_driver_kref_put(driver);
@@ -2648,6 +2657,28 @@ static int tiocsetd(struct tty_struct *tty, int __user *p)
        return ret;
 }
 
+/**
+ *     tiocgetd        -       get line discipline
+ *     @tty: tty device
+ *     @p: pointer to user data
+ *
+ *     Retrieves the line discipline id directly from the ldisc.
+ *
+ *     Locking: waits for ldisc reference (in case the line discipline
+ *             is changing or the tty is being hungup)
+ */
+
+static int tiocgetd(struct tty_struct *tty, int __user *p)
+{
+       struct tty_ldisc *ld;
+       int ret;
+
+       ld = tty_ldisc_ref_wait(tty);
+       ret = put_user(ld->ops->num, p);
+       tty_ldisc_deref(ld);
+       return ret;
+}
+
 /**
  *     send_break      -       performed time break
  *     @tty: device to break on
@@ -2874,7 +2905,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case TIOCGSID:
                return tiocgsid(tty, real_tty, p);
        case TIOCGETD:
-               return put_user(tty->ldisc->ops->num, (int __user *)p);
+               return tiocgetd(tty, p);
        case TIOCSETD:
                return tiocsetd(tty, p);
        case TIOCVHANGUP:
index 77703a3912075a7b740eb4626a5159d55ea6f57a..dfa9ec03fa8e06dfe03a6e0ae04c62a2ac774103 100644 (file)
@@ -19,6 +19,19 @@ void __lockfunc tty_lock(struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_lock);
 
+int tty_lock_interruptible(struct tty_struct *tty)
+{
+       int ret;
+
+       if (WARN(tty->magic != TTY_MAGIC, "L Bad %p\n", tty))
+               return -EIO;
+       tty_kref_get(tty);
+       ret = mutex_lock_interruptible(&tty->legacy_mutex);
+       if (ret)
+               tty_kref_put(tty);
+       return ret;
+}
+
 void __lockfunc tty_unlock(struct tty_struct *tty)
 {
        if (WARN(tty->magic != TTY_MAGIC, "U Bad %p\n", tty))
index e7cbc44eef57529a23c479413538f1c2f924b55c..bd51bdd0a7bf2536617b1a27e3c846105c31a821 100644 (file)
@@ -4250,6 +4250,7 @@ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
 {
        return screenpos(vc, 2 * w_offset, viewed);
 }
+EXPORT_SYMBOL_GPL(screen_pos);
 
 void getconsxy(struct vc_data *vc, unsigned char *p)
 {
index b59195edf63634a3be94bcd3dc9ede6d87699185..b635ab67490de3a93a9c0df0f17848376ae1d947 100644 (file)
@@ -85,8 +85,8 @@ static int ci_hdrc_pci_probe(struct pci_dev *pdev,
 
        /* register a nop PHY */
        ci->phy = usb_phy_generic_register();
-       if (!ci->phy)
-               return -ENOMEM;
+       if (IS_ERR(ci->phy))
+               return PTR_ERR(ci->phy);
 
        memset(res, 0, sizeof(res));
        res[0].start    = pci_resource_start(pdev, 0);
index a4f7db2e18ddc295fc4ff1a0cad99bc1ff8b04bd..df47110bad2d6f277b3893bf48657ecfde165fcf 100644 (file)
@@ -100,6 +100,9 @@ static ssize_t ci_port_test_write(struct file *file, const char __user *ubuf,
        if (sscanf(buf, "%u", &mode) != 1)
                return -EINVAL;
 
+       if (mode > 255)
+               return -EBADRQC;
+
        pm_runtime_get_sync(ci->dev);
        spin_lock_irqsave(&ci->lock, flags);
        ret = hw_port_test_set(ci, mode);
index 26ca4f910cb020aae539f29b260a98fe1eee92b0..fa4e23930614a47ac14ece43e863286fadb91402 100644 (file)
@@ -428,7 +428,8 @@ static void acm_read_bulk_callback(struct urb *urb)
                set_bit(rb->index, &acm->read_urbs_free);
                dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n",
                                                        __func__, status);
-               return;
+               if ((status != -ENOENT) || (urb->actual_length == 0))
+                       return;
        }
 
        usb_mark_last_busy(acm->dev);
@@ -1404,6 +1405,8 @@ made_compressed_probe:
                                usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
                                NULL, acm->writesize, acm_write_bulk, snd);
                snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+               if (quirks & SEND_ZERO_PACKET)
+                       snd->urb->transfer_flags |= URB_ZERO_PACKET;
                snd->instance = acm;
        }
 
@@ -1838,6 +1841,11 @@ static const struct usb_device_id acm_ids[] = {
        },
 #endif
 
+       /*Samsung phone in firmware update mode */
+       { USB_DEVICE(0x04e8, 0x685d),
+       .driver_info = IGNORE_DEVICE,
+       },
+
        /* Exclude Infineon Flash Loader utility */
        { USB_DEVICE(0x058b, 0x0041),
        .driver_info = IGNORE_DEVICE,
@@ -1861,6 +1869,10 @@ static const struct usb_device_id acm_ids[] = {
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_CDMA) },
 
+       { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */
+       .driver_info = SEND_ZERO_PACKET,
+       },
+
        { }
 };
 
index dd9af38e7cda612106e09c89a298c4397ccf7494..ccfaba9ab4e49cd4a009132397d9467bc949bfd0 100644 (file)
@@ -134,3 +134,4 @@ struct acm {
 #define IGNORE_DEVICE                  BIT(5)
 #define QUIRK_CONTROL_LINE_STATE       BIT(6)
 #define CLEAR_HALT_CONDITIONS          BIT(7)
+#define SEND_ZERO_PACKET               BIT(8)
index fd95ba6ec317fdacb13478d84c4157656893740c..f0decc0d69b5197d183c29ec16882a8fcc18208e 100644 (file)
@@ -1,5 +1,6 @@
 config USB_DWC2
        tristate "DesignWare USB2 DRD Core Support"
+       depends on HAS_DMA
        depends on USB || USB_GADGET
        help
          Say Y here if your system has a Dual Role Hi-Speed USB
index 39a0fa8a4c0aea17b9ecf330c127b110b4b54ba4..46c4ba75dc2afdb744bd32fbf047949ff50ec1fd 100644 (file)
@@ -572,12 +572,6 @@ static bool dwc2_force_mode(struct dwc2_hsotg *hsotg, bool host)
        set = host ? GUSBCFG_FORCEHOSTMODE : GUSBCFG_FORCEDEVMODE;
        clear = host ? GUSBCFG_FORCEDEVMODE : GUSBCFG_FORCEHOSTMODE;
 
-       /*
-        * If the force mode bit is already set, don't set it.
-        */
-       if ((gusbcfg & set) && !(gusbcfg & clear))
-               return false;
-
        gusbcfg &= ~clear;
        gusbcfg |= set;
        dwc2_writel(gusbcfg, hsotg->regs + GUSBCFG);
@@ -625,6 +619,12 @@ void dwc2_force_dr_mode(struct dwc2_hsotg *hsotg)
                         __func__, hsotg->dr_mode);
                break;
        }
+
+       /*
+        * NOTE: This is required for some rockchip soc based
+        * platforms.
+        */
+       msleep(50);
 }
 
 /*
@@ -3278,9 +3278,6 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
 /**
  * During device initialization, read various hardware configuration
  * registers and interpret the contents.
- *
- * This should be called during driver probe. It will perform a core
- * soft reset in order to get the reset values of the parameters.
  */
 int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 {
@@ -3288,7 +3285,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
        unsigned width;
        u32 hwcfg1, hwcfg2, hwcfg3, hwcfg4;
        u32 grxfsiz;
-       int retval;
 
        /*
         * Attempt to ensure this device is really a DWC_otg Controller.
@@ -3308,10 +3304,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
                hw->snpsid >> 12 & 0xf, hw->snpsid >> 8 & 0xf,
                hw->snpsid >> 4 & 0xf, hw->snpsid & 0xf, hw->snpsid);
 
-       retval = dwc2_core_reset(hsotg);
-       if (retval)
-               return retval;
-
        hwcfg1 = dwc2_readl(hsotg->regs + GHWCFG1);
        hwcfg2 = dwc2_readl(hsotg->regs + GHWCFG2);
        hwcfg3 = dwc2_readl(hsotg->regs + GHWCFG3);
index 36606fc33c0d31f0ed52d85bc626f3fe29e36099..a41274aa52adc5f8e1119544a90e8f79c421c2b5 100644 (file)
@@ -1174,14 +1174,11 @@ static int dwc2_process_non_isoc_desc(struct dwc2_hsotg *hsotg,
        failed = dwc2_update_non_isoc_urb_state_ddma(hsotg, chan, qtd, dma_desc,
                                                     halt_status, n_bytes,
                                                     xfer_done);
-       if (*xfer_done && urb->status != -EINPROGRESS)
-               failed = 1;
-
-       if (failed) {
+       if (failed || (*xfer_done && urb->status != -EINPROGRESS)) {
                dwc2_host_complete(hsotg, qtd, urb->status);
                dwc2_hcd_qtd_unlink_and_free(hsotg, qtd, qh);
-               dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x status=%08x\n",
-                        failed, *xfer_done, urb->status);
+               dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x\n",
+                        failed, *xfer_done);
                return failed;
        }
 
@@ -1236,21 +1233,23 @@ static void dwc2_complete_non_isoc_xfer_ddma(struct dwc2_hsotg *hsotg,
 
        list_for_each_safe(qtd_item, qtd_tmp, &qh->qtd_list) {
                int i;
+               int qtd_desc_count;
 
                qtd = list_entry(qtd_item, struct dwc2_qtd, qtd_list_entry);
                xfer_done = 0;
+               qtd_desc_count = qtd->n_desc;
 
-               for (i = 0; i < qtd->n_desc; i++) {
+               for (i = 0; i < qtd_desc_count; i++) {
                        if (dwc2_process_non_isoc_desc(hsotg, chan, chnum, qtd,
                                                       desc_num, halt_status,
-                                                      &xfer_done)) {
-                               qtd = NULL;
-                               break;
-                       }
+                                                      &xfer_done))
+                               goto stop_scan;
+
                        desc_num++;
                }
        }
 
+stop_scan:
        if (qh->ep_type != USB_ENDPOINT_XFER_CONTROL) {
                /*
                 * Resetting the data toggle for bulk and interrupt endpoints
@@ -1258,7 +1257,7 @@ static void dwc2_complete_non_isoc_xfer_ddma(struct dwc2_hsotg *hsotg,
                 */
                if (halt_status == DWC2_HC_XFER_STALL)
                        qh->data_toggle = DWC2_HC_PID_DATA0;
-               else if (qtd)
+               else
                        dwc2_hcd_save_data_toggle(hsotg, chan, chnum, qtd);
        }
 
index f8253803a0507543f617af6e2aacc266b43762de..cadba8b13c4837d3e7a3432a3909d69239a70c5d 100644 (file)
@@ -525,11 +525,19 @@ void dwc2_hcd_save_data_toggle(struct dwc2_hsotg *hsotg,
        u32 pid = (hctsiz & TSIZ_SC_MC_PID_MASK) >> TSIZ_SC_MC_PID_SHIFT;
 
        if (chan->ep_type != USB_ENDPOINT_XFER_CONTROL) {
+               if (WARN(!chan || !chan->qh,
+                        "chan->qh must be specified for non-control eps\n"))
+                       return;
+
                if (pid == TSIZ_SC_MC_PID_DATA0)
                        chan->qh->data_toggle = DWC2_HC_PID_DATA0;
                else
                        chan->qh->data_toggle = DWC2_HC_PID_DATA1;
        } else {
+               if (WARN(!qtd,
+                        "qtd must be specified for control eps\n"))
+                       return;
+
                if (pid == TSIZ_SC_MC_PID_DATA0)
                        qtd->data_toggle = DWC2_HC_PID_DATA0;
                else
index 510f787434b3d574e40cfe4a5a41511c92f1ad50..690b9fd98b55165a8b1d6a7b8bce30f730ac6805 100644 (file)
@@ -530,7 +530,13 @@ static int dwc2_driver_probe(struct platform_device *dev)
        if (retval)
                return retval;
 
-       /* Reset the controller and detect hardware config values */
+       /*
+        * Reset before dwc2_get_hwparams() then it could get power-on real
+        * reset value form registers.
+        */
+       dwc2_core_reset_and_force_dr_mode(hsotg);
+
+       /* Detect config values from hardware */
        retval = dwc2_get_hwparams(hsotg);
        if (retval)
                goto error;
index 29130682e547e568fb92e32734dbf0141a5e1bef..e4f8b90d9627f9baed5fe4f7fd6dcac7963f49f5 100644 (file)
@@ -856,7 +856,6 @@ struct dwc3 {
        unsigned                pullups_connected:1;
        unsigned                resize_fifos:1;
        unsigned                setup_packet_pending:1;
-       unsigned                start_config_issued:1;
        unsigned                three_stage_setup:1;
        unsigned                usb3_lpm_capable:1;
 
index 3a9354abcb68bfc6e9d27cc64c7155d54dec04e0..8d6b75c2f53b2e0d0991ca3dddc784f6dccecadb 100644 (file)
@@ -555,7 +555,6 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
        int ret;
        u32 reg;
 
-       dwc->start_config_issued = false;
        cfg = le16_to_cpu(ctrl->wValue);
 
        switch (state) {
@@ -737,10 +736,6 @@ static int dwc3_ep0_std_request(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
                dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_ISOCH_DELAY");
                ret = dwc3_ep0_set_isoch_delay(dwc, ctrl);
                break;
-       case USB_REQ_SET_INTERFACE:
-               dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_INTERFACE");
-               dwc->start_config_issued = false;
-               /* Fall through */
        default:
                dwc3_trace(trace_dwc3_ep0, "Forwarding to gadget driver");
                ret = dwc3_ep0_delegate_req(dwc, ctrl);
index af023a81a0b02160f85e4800594b49aa318190c2..2363bad45af865dd2e7ee0c28e009fff1ae7a7d5 100644 (file)
@@ -385,24 +385,66 @@ static void dwc3_free_trb_pool(struct dwc3_ep *dep)
        dep->trb_pool_dma = 0;
 }
 
+static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep);
+
+/**
+ * dwc3_gadget_start_config - Configure EP resources
+ * @dwc: pointer to our controller context structure
+ * @dep: endpoint that is being enabled
+ *
+ * The assignment of transfer resources cannot perfectly follow the
+ * data book due to the fact that the controller driver does not have
+ * all knowledge of the configuration in advance. It is given this
+ * information piecemeal by the composite gadget framework after every
+ * SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook
+ * programming model in this scenario can cause errors. For two
+ * reasons:
+ *
+ * 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION
+ * and SET_INTERFACE (8.1.5). This is incorrect in the scenario of
+ * multiple interfaces.
+ *
+ * 2) The databook does not mention doing more DEPXFERCFG for new
+ * endpoint on alt setting (8.1.6).
+ *
+ * The following simplified method is used instead:
+ *
+ * All hardware endpoints can be assigned a transfer resource and this
+ * setting will stay persistent until either a core reset or
+ * hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and
+ * do DEPXFERCFG for every hardware endpoint as well. We are
+ * guaranteed that there are as many transfer resources as endpoints.
+ *
+ * This function is called for each endpoint when it is being enabled
+ * but is triggered only when called for EP0-out, which always happens
+ * first, and which should only happen in one of the above conditions.
+ */
 static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep)
 {
        struct dwc3_gadget_ep_cmd_params params;
        u32                     cmd;
+       int                     i;
+       int                     ret;
+
+       if (dep->number)
+               return 0;
 
        memset(&params, 0x00, sizeof(params));
+       cmd = DWC3_DEPCMD_DEPSTARTCFG;
 
-       if (dep->number != 1) {
-               cmd = DWC3_DEPCMD_DEPSTARTCFG;
-               /* XferRscIdx == 0 for ep0 and 2 for the remaining */
-               if (dep->number > 1) {
-                       if (dwc->start_config_issued)
-                               return 0;
-                       dwc->start_config_issued = true;
-                       cmd |= DWC3_DEPCMD_PARAM(2);
-               }
+       ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+       if (ret)
+               return ret;
 
-               return dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+       for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) {
+               struct dwc3_ep *dep = dwc->eps[i];
+
+               if (!dep)
+                       continue;
+
+               ret = dwc3_gadget_set_xfer_resource(dwc, dep);
+               if (ret)
+                       return ret;
        }
 
        return 0;
@@ -516,10 +558,6 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
                struct dwc3_trb *trb_st_hw;
                struct dwc3_trb *trb_link;
 
-               ret = dwc3_gadget_set_xfer_resource(dwc, dep);
-               if (ret)
-                       return ret;
-
                dep->endpoint.desc = desc;
                dep->comp_desc = comp_desc;
                dep->type = usb_endpoint_type(desc);
@@ -1636,8 +1674,6 @@ static int dwc3_gadget_start(struct usb_gadget *g,
        }
        dwc3_writel(dwc->regs, DWC3_DCFG, reg);
 
-       dwc->start_config_issued = false;
-
        /* Start with SuperSpeed Default */
        dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
 
@@ -2237,7 +2273,6 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
        dwc3_writel(dwc->regs, DWC3_DCTL, reg);
 
        dwc3_disconnect_gadget(dwc);
-       dwc->start_config_issued = false;
 
        dwc->gadget.speed = USB_SPEED_UNKNOWN;
        dwc->setup_packet_pending = false;
@@ -2288,7 +2323,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
 
        dwc3_stop_active_transfers(dwc);
        dwc3_clear_stall_all_ep(dwc);
-       dwc->start_config_issued = false;
 
        /* Reset device address to zero */
        reg = dwc3_readl(dwc->regs, DWC3_DCFG);
@@ -2789,6 +2823,7 @@ int dwc3_gadget_init(struct dwc3 *dwc)
        dwc->gadget.speed               = USB_SPEED_UNKNOWN;
        dwc->gadget.sg_supported        = true;
        dwc->gadget.name                = "dwc3-gadget";
+       dwc->gadget.is_otg              = dwc->dr_mode == USB_DR_MODE_OTG;
 
        /*
         * FIXME We might be setting max_speed to <SUPER, however versions
index 0fbfb2b2aa08b3ade8e4b487d012188687b29e05..26ccad5d8680a3a1b5c75b4bf89018340eeb1846 100644 (file)
@@ -673,7 +673,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
        unsigned long           flags;
        int                     tx_list_empty;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        spin_lock_irqsave(&dev->lock, flags);
        tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
        spin_unlock_irqrestore(&dev->lock, flags);
@@ -683,7 +683,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
                wait_event_interruptible(dev->tx_flush_wait,
                                (likely(list_empty(&dev->tx_reqs_active))));
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 365afd7e14f8cade738683a06616b4116fa84fc9..87fb0fd6aaabea260758ba338769d6be70472740 100644 (file)
@@ -130,7 +130,8 @@ struct dev_data {
                                        setup_can_stall : 1,
                                        setup_out_ready : 1,
                                        setup_out_error : 1,
-                                       setup_abort : 1;
+                                       setup_abort : 1,
+                                       gadget_registered : 1;
        unsigned                        setup_wLength;
 
        /* the rest is basically write-once */
@@ -1179,7 +1180,8 @@ dev_release (struct inode *inode, struct file *fd)
 
        /* closing ep0 === shutdown all */
 
-       usb_gadget_unregister_driver (&gadgetfs_driver);
+       if (dev->gadget_registered)
+               usb_gadget_unregister_driver (&gadgetfs_driver);
 
        /* at this point "good" hardware has disconnected the
         * device from USB; the host won't see it any more.
@@ -1521,10 +1523,10 @@ static void destroy_ep_files (struct dev_data *dev)
                spin_unlock_irq (&dev->lock);
 
                /* break link to dcache */
-               mutex_lock (&parent->i_mutex);
+               inode_lock(parent);
                d_delete (dentry);
                dput (dentry);
-               mutex_unlock (&parent->i_mutex);
+               inode_unlock(parent);
 
                spin_lock_irq (&dev->lock);
        }
@@ -1847,6 +1849,7 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
                 * kick in after the ep0 descriptor is closed.
                 */
                value = len;
+               dev->gadget_registered = true;
        }
        return value;
 
index f92f5aff0dd5e8ece600cc44b5ba6168e3f1bb81..8755b2c2aadaa60700471f5260408bff6e28b69c 100644 (file)
@@ -91,7 +91,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
        if (!access_ok(VERIFY_WRITE, buf, nbytes))
                return -EFAULT;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        list_for_each_entry_safe(req, tmp_req, queue, queue) {
                len = snprintf(tmpbuf, sizeof(tmpbuf),
                                "%8p %08x %c%c%c %5d %c%c%c\n",
@@ -118,7 +118,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
                nbytes -= len;
                buf += len;
        }
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
 
        return actual;
 }
@@ -143,7 +143,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
        u32 *data;
        int ret = -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        udc = inode->i_private;
        data = kmalloc(inode->i_size, GFP_KERNEL);
        if (!data)
@@ -158,7 +158,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
        ret = 0;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
@@ -169,11 +169,11 @@ static ssize_t regs_dbg_read(struct file *file, char __user *buf,
        struct inode *inode = file_inode(file);
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = simple_read_from_buffer(buf, nbytes, ppos,
                        file->private_data,
                        file_inode(file)->i_size);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index 53c0692f1b096eef837624bda521a11ad3181fc4..93d28cb00b76f15de7f281f465a1c791aa7edbbe 100644 (file)
@@ -2340,7 +2340,7 @@ static struct qe_udc *qe_udc_config(struct platform_device *ofdev)
 {
        struct qe_udc *udc;
        struct device_node *np = ofdev->dev.of_node;
-       unsigned int tmp_addr = 0;
+       unsigned long tmp_addr = 0;
        struct usb_device_para __iomem *usbpram;
        unsigned int i;
        u64 size;
index 4dff60d34f732097752e59360e0edcf7e5eb90b4..0d32052bf16f90d5e1ace964cedd484c14a6f65d 100644 (file)
@@ -369,9 +369,20 @@ static inline void set_max_speed(struct net2280_ep *ep, u32 max)
        static const u32 ep_enhanced[9] = { 0x10, 0x60, 0x30, 0x80,
                                          0x50, 0x20, 0x70, 0x40, 0x90 };
 
-       if (ep->dev->enhanced_mode)
+       if (ep->dev->enhanced_mode) {
                reg = ep_enhanced[ep->num];
-       else{
+               switch (ep->dev->gadget.speed) {
+               case USB_SPEED_SUPER:
+                       reg += 2;
+                       break;
+               case USB_SPEED_FULL:
+                       reg += 1;
+                       break;
+               case USB_SPEED_HIGH:
+               default:
+                       break;
+               }
+       } else {
                reg = (ep->num + 1) * 0x10;
                if (ep->dev->gadget.speed != USB_SPEED_HIGH)
                        reg += 1;
index fd73a3ea07c20e4cac1100be1883dce5ea81c033..b86a6f03592ec9e1d42f5bdc813b83c1297b6e52 100644 (file)
@@ -413,9 +413,10 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
                if (!driver->udc_name || strcmp(driver->udc_name,
                                                dev_name(&udc->dev)) == 0) {
                        ret = udc_bind_to_driver(udc, driver);
+                       if (ret != -EPROBE_DEFER)
+                               list_del(&driver->pending);
                        if (ret)
                                goto err4;
-                       list_del(&driver->pending);
                        break;
                }
        }
index daa563ff1fa05c9c28e2c8a03876c2310aaf0438..1f117c360ebbba3394acd1b934e43632ac2b3531 100644 (file)
@@ -229,6 +229,8 @@ config USB_EHCI_TEGRA
        depends on ARCH_TEGRA
        select USB_EHCI_ROOT_HUB_TT
        select USB_PHY
+       select USB_ULPI
+       select USB_ULPI_VIEWPORT
        help
          This driver enables support for the internal USB Host Controllers
          found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
index 04ce6b156b350e5dd0f9f9a321c79eb4d173b656..e0244fb3903dc85287f509bbac97600734c9b5af 100644 (file)
@@ -112,12 +112,16 @@ static inline int xhci_find_next_ext_cap(void __iomem *base, u32 start, int id)
        offset = start;
        if (!start || start == XHCI_HCC_PARAMS_OFFSET) {
                val = readl(base + XHCI_HCC_PARAMS_OFFSET);
+               if (val == ~0)
+                       return 0;
                offset = XHCI_HCC_EXT_CAPS(val) << 2;
                if (!offset)
                        return 0;
        };
        do {
                val = readl(base + offset);
+               if (val == ~0)
+                       return 0;
                if (XHCI_EXT_CAPS_ID(val) == id && offset != start)
                        return offset;
 
index c30de7c39f44088e302b257a480a6c7b159ff167..73f763c4f5f591a2c19053083dedf7b6aa252f31 100644 (file)
@@ -275,8 +275,9 @@ static bool need_bw_sch(struct usb_host_endpoint *ep,
                return false;
 
        /*
-        * for LS & FS periodic endpoints which its device don't attach
-        * to TT are also ignored, root-hub will schedule them directly
+        * for LS & FS periodic endpoints which its device is not behind
+        * a TT are also ignored, root-hub will schedule them directly,
+        * but need set @bpkts field of endpoint context to 1.
         */
        if (is_fs_or_ls(speed) && !has_tt)
                return false;
@@ -339,8 +340,17 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
                GET_MAX_PACKET(usb_endpoint_maxp(&ep->desc)),
                usb_endpoint_dir_in(&ep->desc), ep);
 
-       if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT))
+       if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT)) {
+               /*
+                * set @bpkts to 1 if it is LS or FS periodic endpoint, and its
+                * device does not connected through an external HS hub
+                */
+               if (usb_endpoint_xfer_int(&ep->desc)
+                       || usb_endpoint_xfer_isoc(&ep->desc))
+                       ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(1));
+
                return 0;
+       }
 
        bw_index = get_bw_index(xhci, udev, ep);
        sch_bw = &sch_array[bw_index];
index c9ab6a44c34aef05968c0b7e053e63cafd392d8f..9532f5aef71bfe310b499db66a266e9a2c7b32f4 100644 (file)
@@ -696,9 +696,24 @@ static int xhci_mtk_remove(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+/*
+ * if ip sleep fails, and all clocks are disabled, access register will hang
+ * AHB bus, so stop polling roothubs to avoid regs access on bus suspend.
+ * and no need to check whether ip sleep failed or not; this will cause SPM
+ * to wake up system immediately after system suspend complete if ip sleep
+ * fails, it is what we wanted.
+ */
 static int xhci_mtk_suspend(struct device *dev)
 {
        struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+       struct usb_hcd *hcd = mtk->hcd;
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+
+       xhci_dbg(xhci, "%s: stop port polling\n", __func__);
+       clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+       del_timer_sync(&hcd->rh_timer);
+       clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+       del_timer_sync(&xhci->shared_hcd->rh_timer);
 
        xhci_mtk_host_disable(mtk);
        xhci_mtk_phy_power_off(mtk);
@@ -710,11 +725,19 @@ static int xhci_mtk_suspend(struct device *dev)
 static int xhci_mtk_resume(struct device *dev)
 {
        struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+       struct usb_hcd *hcd = mtk->hcd;
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
        usb_wakeup_disable(mtk);
        xhci_mtk_clks_enable(mtk);
        xhci_mtk_phy_power_on(mtk);
        xhci_mtk_host_enable(mtk);
+
+       xhci_dbg(xhci, "%s: restart port polling\n", __func__);
+       set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+       usb_hcd_poll_rh_status(hcd);
+       set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+       usb_hcd_poll_rh_status(xhci->shared_hcd);
        return 0;
 }
 
index 58c43ed7ff3b6c5291bab17bb646c2bbefcf67b6..f0640b7a1c42e2b82c4b8c6071fa0c43561ab7e7 100644 (file)
@@ -28,7 +28,9 @@
 #include "xhci.h"
 #include "xhci-trace.h"
 
-#define PORT2_SSIC_CONFIG_REG2 0x883c
+#define SSIC_PORT_NUM          2
+#define SSIC_PORT_CFG2         0x880c
+#define SSIC_PORT_CFG2_OFFSET  0x30
 #define PROG_DONE              (1 << 30)
 #define SSIC_PORT_UNUSED       (1 << 31)
 
@@ -45,6 +47,7 @@
 #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI            0x22b5
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI                0xa12f
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI       0x9d2f
+#define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI             0x0aa8
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -151,9 +154,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
                (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
                 pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
-                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) {
+                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+                pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) {
                xhci->quirks |= XHCI_PME_STUCK_QUIRK;
        }
+       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
+               xhci->quirks |= XHCI_SSIC_PORT_UNUSED;
+       }
        if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
                        pdev->device == PCI_DEVICE_ID_EJ168) {
                xhci->quirks |= XHCI_RESET_ON_RESUME;
@@ -312,22 +320,20 @@ static void xhci_pci_remove(struct pci_dev *dev)
  * SSIC PORT need to be marked as "unused" before putting xHCI
  * into D3. After D3 exit, the SSIC port need to be marked as "used".
  * Without this change, xHCI might not enter D3 state.
- * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
- * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
  */
-static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend)
+static void xhci_ssic_port_unused_quirk(struct usb_hcd *hcd, bool suspend)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
-       struct pci_dev          *pdev = to_pci_dev(hcd->self.controller);
        u32 val;
        void __iomem *reg;
+       int i;
 
-       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
-
-               reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2;
+       for (i = 0; i < SSIC_PORT_NUM; i++) {
+               reg = (void __iomem *) xhci->cap_regs +
+                               SSIC_PORT_CFG2 +
+                               i * SSIC_PORT_CFG2_OFFSET;
 
-               /* Notify SSIC that SSIC profile programming is not done */
+               /* Notify SSIC that SSIC profile programming is not done. */
                val = readl(reg) & ~PROG_DONE;
                writel(val, reg);
 
@@ -344,6 +350,17 @@ static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend)
                writel(val, reg);
                readl(reg);
        }
+}
+
+/*
+ * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
+ * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
+ */
+static void xhci_pme_quirk(struct usb_hcd *hcd)
+{
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+       void __iomem *reg;
+       u32 val;
 
        reg = (void __iomem *) xhci->cap_regs + 0x80a4;
        val = readl(reg);
@@ -355,6 +372,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
        struct pci_dev          *pdev = to_pci_dev(hcd->self.controller);
+       int                     ret;
 
        /*
         * Systems with the TI redriver that loses port status change events
@@ -364,9 +382,16 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
                pdev->no_d3cold = true;
 
        if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-               xhci_pme_quirk(hcd, true);
+               xhci_pme_quirk(hcd);
+
+       if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+               xhci_ssic_port_unused_quirk(hcd, true);
 
-       return xhci_suspend(xhci, do_wakeup);
+       ret = xhci_suspend(xhci, do_wakeup);
+       if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED))
+               xhci_ssic_port_unused_quirk(hcd, false);
+
+       return ret;
 }
 
 static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
@@ -396,8 +421,11 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL)
                usb_enable_intel_xhci_ports(pdev);
 
+       if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+               xhci_ssic_port_unused_quirk(hcd, false);
+
        if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-               xhci_pme_quirk(hcd, false);
+               xhci_pme_quirk(hcd);
 
        retval = xhci_resume(xhci, hibernated);
        return retval;
index 770b6b08879790ec2754e26fbb424fcd03c1205c..d39d6bf1d090dad06687892ed723ea3a5e993083 100644 (file)
@@ -184,7 +184,8 @@ static int xhci_plat_probe(struct platform_device *pdev)
                struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd);
 
                /* Just copy data for now */
-               *priv = *priv_match;
+               if (priv_match)
+                       *priv = *priv_match;
        }
 
        if (xhci_plat_type_is(hcd, XHCI_PLAT_TYPE_MARVELL_ARMADA)) {
index f1c21c40b4a674e6a8fad98ac34c0b9385df76be..3915657e6078b66211fd699eab9bf227752f1ffc 100644 (file)
@@ -2193,10 +2193,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                }
        /* Fast path - was this the last TRB in the TD for this URB? */
        } else if (event_trb == td->last_trb) {
-               if (td->urb_length_set && trb_comp_code == COMP_SHORT_TX)
-                       return finish_td(xhci, td, event_trb, event, ep,
-                                        status, false);
-
                if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
                        td->urb->actual_length =
                                td->urb->transfer_buffer_length -
@@ -2248,12 +2244,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                        td->urb->actual_length +=
                                TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
                                EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
-
-               if (trb_comp_code == COMP_SHORT_TX) {
-                       xhci_dbg(xhci, "mid bulk/intr SP, wait for last TRB event\n");
-                       td->urb_length_set = true;
-                       return 0;
-               }
        }
 
        return finish_td(xhci, td, event_trb, event, ep, status, false);
index 26a44c0e969e621be5d2a6e2d947cf9292505716..0c8087d3c3138f72e6eeaedc928c09453ab32285 100644 (file)
@@ -1554,7 +1554,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                                "HW died, freeing TD.");
                urb_priv = urb->hcpriv;
-               for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
+               for (i = urb_priv->td_cnt;
+                    i < urb_priv->length && xhci->devs[urb->dev->slot_id];
+                    i++) {
                        td = urb_priv->td[i];
                        if (!list_empty(&td->td_list))
                                list_del_init(&td->td_list);
index 9be7348872baab804691c7fd10f31115a0c7e3fe..cc651383ce5a85d713c66b032a0aa30bd4331dc8 100644 (file)
@@ -1631,6 +1631,7 @@ struct xhci_hcd {
 #define XHCI_BROKEN_STREAMS    (1 << 19)
 #define XHCI_PME_STUCK_QUIRK   (1 << 20)
 #define XHCI_MTK_HOST          (1 << 21)
+#define XHCI_SSIC_PORT_UNUSED  (1 << 22)
        unsigned int            num_active_eps;
        unsigned int            limit_active_eps;
        /* There are two roothubs to keep track of bus suspend info for */
index 795a45b1b25bacb016552b4d0635166965ebfce6..58487a4735218518ce50326750ca8718aaa2a516 100644 (file)
@@ -662,7 +662,7 @@ static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
                csr &= ~(MUSB_TXCSR_AUTOSET | MUSB_TXCSR_DMAMODE);
                csr |= MUSB_TXCSR_DMAENAB; /* against programmer's guide */
        }
-       channel->desired_mode = mode;
+       channel->desired_mode = *mode;
        musb_writew(epio, MUSB_TXCSR, csr);
 
        return 0;
@@ -2003,10 +2003,8 @@ void musb_host_rx(struct musb *musb, u8 epnum)
                                qh->offset,
                                urb->transfer_buffer_length);
 
-                       done = musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh,
-                                                             urb, xfer_len,
-                                                             iso_err);
-                       if (done)
+                       if (musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh, urb,
+                                                          xfer_len, iso_err))
                                goto finish;
                        else
                                dev_err(musb->controller, "error: rx_dma failed\n");
index b2685e75a6835fe2b4615b3f86f7aef378fedcae..3eaa4ba6867d408a516abec7bb4180d5c9f05d40 100644 (file)
@@ -348,7 +348,9 @@ static int ux500_suspend(struct device *dev)
        struct ux500_glue       *glue = dev_get_drvdata(dev);
        struct musb             *musb = glue_to_musb(glue);
 
-       usb_phy_set_suspend(musb->xceiv, 1);
+       if (musb)
+               usb_phy_set_suspend(musb->xceiv, 1);
+
        clk_disable_unprepare(glue->clk);
 
        return 0;
@@ -366,7 +368,8 @@ static int ux500_resume(struct device *dev)
                return ret;
        }
 
-       usb_phy_set_suspend(musb->xceiv, 0);
+       if (musb)
+               usb_phy_set_suspend(musb->xceiv, 0);
 
        return 0;
 }
index 0d19a6d61a71f7ccb7a55e1b90a7d5d5f27db535..72b387d592c278eafb5a73e519482b405d921e39 100644 (file)
@@ -757,14 +757,8 @@ static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
        otg->host = host;
        dev_dbg(otg->usb_phy->dev, "host driver registered w/ tranceiver\n");
 
-       /*
-        * Kick the state machine work, if peripheral is not supported
-        * or peripheral is already registered with us.
-        */
-       if (motg->pdata->mode == USB_DR_MODE_HOST || otg->gadget) {
-               pm_runtime_get_sync(otg->usb_phy->dev);
-               schedule_work(&motg->sm_work);
-       }
+       pm_runtime_get_sync(otg->usb_phy->dev);
+       schedule_work(&motg->sm_work);
 
        return 0;
 }
@@ -827,14 +821,8 @@ static int msm_otg_set_peripheral(struct usb_otg *otg,
        dev_dbg(otg->usb_phy->dev,
                "peripheral driver registered w/ tranceiver\n");
 
-       /*
-        * Kick the state machine work, if host is not supported
-        * or host is already registered with us.
-        */
-       if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL || otg->host) {
-               pm_runtime_get_sync(otg->usb_phy->dev);
-               schedule_work(&motg->sm_work);
-       }
+       pm_runtime_get_sync(otg->usb_phy->dev);
+       schedule_work(&motg->sm_work);
 
        return 0;
 }
@@ -1599,6 +1587,8 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
                                                &motg->id.nb);
                if (ret < 0) {
                        dev_err(&pdev->dev, "register ID notifier failed\n");
+                       extcon_unregister_notifier(motg->vbus.extcon,
+                                                  EXTCON_USB, &motg->vbus.nb);
                        return ret;
                }
 
@@ -1660,15 +1650,6 @@ static int msm_otg_probe(struct platform_device *pdev)
        if (!motg)
                return -ENOMEM;
 
-       pdata = dev_get_platdata(&pdev->dev);
-       if (!pdata) {
-               if (!np)
-                       return -ENXIO;
-               ret = msm_otg_read_dt(pdev, motg);
-               if (ret)
-                       return ret;
-       }
-
        motg->phy.otg = devm_kzalloc(&pdev->dev, sizeof(struct usb_otg),
                                     GFP_KERNEL);
        if (!motg->phy.otg)
@@ -1710,6 +1691,15 @@ static int msm_otg_probe(struct platform_device *pdev)
        if (!motg->regs)
                return -ENOMEM;
 
+       pdata = dev_get_platdata(&pdev->dev);
+       if (!pdata) {
+               if (!np)
+                       return -ENXIO;
+               ret = msm_otg_read_dt(pdev, motg);
+               if (ret)
+                       return ret;
+       }
+
        /*
         * NOTE: The PHYs can be multiplexed between the chipidea controller
         * and the dwc3 controller, using a single bit. It is important that
@@ -1717,8 +1707,10 @@ static int msm_otg_probe(struct platform_device *pdev)
         */
        if (motg->phy_number) {
                phy_select = devm_ioremap_nocache(&pdev->dev, USB2_PHY_SEL, 4);
-               if (!phy_select)
-                       return -ENOMEM;
+               if (!phy_select) {
+                       ret = -ENOMEM;
+                       goto unregister_extcon;
+               }
                /* Enable second PHY with the OTG port */
                writel(0x1, phy_select);
        }
@@ -1728,7 +1720,8 @@ static int msm_otg_probe(struct platform_device *pdev)
        motg->irq = platform_get_irq(pdev, 0);
        if (motg->irq < 0) {
                dev_err(&pdev->dev, "platform_get_irq failed\n");
-               return motg->irq;
+               ret = motg->irq;
+               goto unregister_extcon;
        }
 
        regs[0].supply = "vddcx";
@@ -1737,7 +1730,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 
        ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(regs), regs);
        if (ret)
-               return ret;
+               goto unregister_extcon;
 
        motg->vddcx = regs[0].consumer;
        motg->v3p3  = regs[1].consumer;
@@ -1834,6 +1827,12 @@ disable_clks:
        clk_disable_unprepare(motg->clk);
        if (!IS_ERR(motg->core_clk))
                clk_disable_unprepare(motg->core_clk);
+unregister_extcon:
+       extcon_unregister_notifier(motg->id.extcon,
+                                  EXTCON_USB_HOST, &motg->id.nb);
+       extcon_unregister_notifier(motg->vbus.extcon,
+                                  EXTCON_USB, &motg->vbus.nb);
+
        return ret;
 }
 
index c2936dc48ca7b45b0e5c186859d2879569719323..00bfea01be6501d9b53ccc7975b069c24bc57d7b 100644 (file)
@@ -220,7 +220,7 @@ static int mxs_phy_hw_init(struct mxs_phy *mxs_phy)
 /* Return true if the vbus is there */
 static bool mxs_phy_get_vbus_status(struct mxs_phy *mxs_phy)
 {
-       unsigned int vbus_value;
+       unsigned int vbus_value = 0;
 
        if (!mxs_phy->regmap_anatop)
                return false;
index 9b90ad747d8766797bd1737ea8f0be9347a5560d..7c319e7edda22b22e2566a2699dc7dbc93292f56 100644 (file)
@@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */
        { USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */
        { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
+       { USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */
        { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
        { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
        { USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */
@@ -162,6 +163,8 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
        { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
        { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+       { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
+       { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
        { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
        { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
        { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */
index a5a0376bbd48c2596b908aa0409befb1136bf128..8c660ae401d8267f951516e33fe8188f0f23e060 100644 (file)
@@ -824,6 +824,7 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
+       { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) },
        { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },
 
        /* Papouch devices based on FTDI chip */
index 67c6d446973092ddf98dd703152b7cd0d8dec24c..a84df2513994a57377cc41105f3078e36d2351a2 100644 (file)
  */
 #define RATOC_VENDOR_ID                0x0584
 #define RATOC_PRODUCT_ID_USB60F        0xb020
+#define RATOC_PRODUCT_ID_SCU18 0xb03a
 
 /*
  * Infineon Technologies
index e3c3f57c2d828d42ecb35e12d8a1a048974d9ee6..619607323bfdfb0a678e470ed31757ba6028676d 100644 (file)
@@ -368,6 +368,16 @@ static int mxu1_port_probe(struct usb_serial_port *port)
        return 0;
 }
 
+static int mxu1_port_remove(struct usb_serial_port *port)
+{
+       struct mxu1_port *mxport;
+
+       mxport = usb_get_serial_port_data(port);
+       kfree(mxport);
+
+       return 0;
+}
+
 static int mxu1_startup(struct usb_serial *serial)
 {
        struct mxu1_device *mxdev;
@@ -427,6 +437,14 @@ err_free_mxdev:
        return err;
 }
 
+static void mxu1_release(struct usb_serial *serial)
+{
+       struct mxu1_device *mxdev;
+
+       mxdev = usb_get_serial_data(serial);
+       kfree(mxdev);
+}
+
 static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
                           u8 mask, u8 byte)
 {
@@ -957,7 +975,9 @@ static struct usb_serial_driver mxu11x0_device = {
        .id_table               = mxu1_idtable,
        .num_ports              = 1,
        .port_probe             = mxu1_port_probe,
+       .port_remove            = mxu1_port_remove,
        .attach                 = mxu1_startup,
+       .release                = mxu1_release,
        .open                   = mxu1_open,
        .close                  = mxu1_close,
        .ioctl                  = mxu1_ioctl,
index f2280606b73c0c897d83d7738747b3d6f97a2c71..8849439a8f18f6db8a33bba05c45154085aaef9d 100644 (file)
@@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_CC864_SINGLE             0x1006
 #define TELIT_PRODUCT_DE910_DUAL               0x1010
 #define TELIT_PRODUCT_UE910_V2                 0x1012
+#define TELIT_PRODUCT_LE922_USBCFG0            0x1042
+#define TELIT_PRODUCT_LE922_USBCFG3            0x1043
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 
@@ -313,6 +315,7 @@ static void option_instat_callback(struct urb *urb);
 #define TOSHIBA_PRODUCT_G450                   0x0d45
 
 #define ALINK_VENDOR_ID                                0x1e0e
+#define SIMCOM_PRODUCT_SIM7100E                        0x9001 /* Yes, ALINK_VENDOR_ID */
 #define ALINK_PRODUCT_PH300                    0x9100
 #define ALINK_PRODUCT_3GU                      0x9200
 
@@ -605,6 +608,10 @@ static const struct option_blacklist_info zte_1255_blacklist = {
        .reserved = BIT(3) | BIT(4),
 };
 
+static const struct option_blacklist_info simcom_sim7100e_blacklist = {
+       .reserved = BIT(5) | BIT(6),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
        .sendsetup = BIT(0),
        .reserved = BIT(1) | BIT(2),
@@ -615,6 +622,16 @@ static const struct option_blacklist_info telit_le920_blacklist = {
        .reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
+       .sendsetup = BIT(2),
+       .reserved = BIT(0) | BIT(1) | BIT(3),
+};
+
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(1) | BIT(2) | BIT(3),
+};
+
 static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1110,6 +1127,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
        { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */
+       { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */
+         .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
@@ -1160,6 +1179,10 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
                .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
@@ -1629,6 +1652,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(ALINK_VENDOR_ID, 0x9000) },
        { USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) },
        { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) },
+       { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E),
+         .driver_info = (kernel_ulong_t)&simcom_sim7100e_blacklist },
        { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
          .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
        },
@@ -1679,7 +1704,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
-       { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) },
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, 
index 60afb39eb73c0b95d261ec367890a306621fd2de..337a0be89fcf0767f67731a9b0a50700e1df9318 100644 (file)
@@ -544,6 +544,11 @@ static int treo_attach(struct usb_serial *serial)
                (serial->num_interrupt_in == 0))
                return 0;
 
+       if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) {
+               dev_err(&serial->interface->dev, "missing endpoints\n");
+               return -ENODEV;
+       }
+
        /*
        * It appears that Treos and Kyoceras want to use the
        * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint,
@@ -597,8 +602,10 @@ static int clie_5_attach(struct usb_serial *serial)
         */
 
        /* some sanity check */
-       if (serial->num_ports < 2)
-               return -1;
+       if (serial->num_bulk_out < 2) {
+               dev_err(&serial->interface->dev, "missing bulk out endpoints\n");
+               return -ENODEV;
+       }
 
        /* port 0 now uses the modified endpoint Address */
        port = serial->port[0];
index 82f25cc1c460dee73c37b2b19895fb7d89bfb0d3..ecca316386f57fa64d04746d4b8326fa4c241ebd 100644 (file)
@@ -123,8 +123,8 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
        /*
         * With noiommu enabled, an IOMMU group will be created for a device
         * that doesn't already have one and doesn't have an iommu_ops on their
-        * bus.  We use iommu_present() again in the main code to detect these
-        * fake groups.
+        * bus.  We set iommudata simply to be able to identify these groups
+        * as special use and for reclamation later.
         */
        if (group || !noiommu || iommu_present(dev->bus))
                return group;
@@ -134,6 +134,7 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
                return NULL;
 
        iommu_group_set_name(group, "vfio-noiommu");
+       iommu_group_set_iommudata(group, &noiommu, NULL);
        ret = iommu_group_add_device(group, dev);
        iommu_group_put(group);
        if (ret)
@@ -158,7 +159,7 @@ EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
 {
 #ifdef CONFIG_VFIO_NOIOMMU
-       if (!iommu_present(dev->bus))
+       if (iommu_group_get_iommudata(group) == &noiommu)
                iommu_group_remove_device(dev);
 #endif
 
@@ -190,16 +191,10 @@ static long vfio_noiommu_ioctl(void *iommu_data,
        return -ENOTTY;
 }
 
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
-       return iommu_present(dev->bus) ? 1 : 0;
-}
-
 static int vfio_noiommu_attach_group(void *iommu_data,
                                     struct iommu_group *iommu_group)
 {
-       return iommu_group_for_each_dev(iommu_group, NULL,
-                                       vfio_iommu_present) ? -EINVAL : 0;
+       return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
 }
 
 static void vfio_noiommu_detach_group(void *iommu_data,
@@ -323,8 +318,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-                                           bool iommu_present)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 {
        struct vfio_group *group, *tmp;
        struct device *dev;
@@ -342,7 +336,9 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
        atomic_set(&group->container_users, 0);
        atomic_set(&group->opened, 0);
        group->iommu_group = iommu_group;
-       group->noiommu = !iommu_present;
+#ifdef CONFIG_VFIO_NOIOMMU
+       group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
+#endif
 
        group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -767,7 +763,7 @@ int vfio_add_group_dev(struct device *dev,
 
        group = vfio_group_get_from_iommu(iommu_group);
        if (!group) {
-               group = vfio_create_group(iommu_group, iommu_present(dev->bus));
+               group = vfio_create_group(iommu_group);
                if (IS_ERR(group)) {
                        iommu_group_put(iommu_group);
                        return PTR_ERR(group);
index 3fc63c208d08ba9fa801570d830220106aa24f21..57721c73177fa1184d253155a6bcce39bdb2e927 100644 (file)
@@ -78,13 +78,13 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy
        if (!info->fbdefio)
                return 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Kill off the delayed work */
        cancel_delayed_work_sync(&info->deferred_work);
 
        /* Run it immediately */
        schedule_delayed_work(&info->deferred_work, 0);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 0081725c6b5be62f6c7939d5bc2b4022881f363e..6b2a06d09f2b612ae6fac5252977df0853fc8cf1 100644 (file)
@@ -152,7 +152,7 @@ static void lcdc_write(unsigned int val, unsigned int addr)
 
 struct da8xx_fb_par {
        struct device           *dev;
-       resource_size_t p_palette_base;
+       dma_addr_t              p_palette_base;
        unsigned char *v_palette_base;
        dma_addr_t              vram_phys;
        unsigned long           vram_size;
@@ -1428,7 +1428,7 @@ static int fb_probe(struct platform_device *device)
 
        par->vram_virt = dma_alloc_coherent(NULL,
                                            par->vram_size,
-                                           (resource_size_t *) &par->vram_phys,
+                                           &par->vram_phys,
                                            GFP_KERNEL | GFP_DMA);
        if (!par->vram_virt) {
                dev_err(&device->dev,
@@ -1448,7 +1448,7 @@ static int fb_probe(struct platform_device *device)
 
        /* allocate palette buffer */
        par->v_palette_base = dma_zalloc_coherent(NULL, PALETTE_SIZE,
-                                                 (resource_size_t *)&par->p_palette_base,
+                                                 &par->p_palette_base,
                                                  GFP_KERNEL | GFP_DMA);
        if (!par->v_palette_base) {
                dev_err(&device->dev,
index 95873f26e39cf6d58fe78b0dce1fdbabcf9c1390..de2f3e793786c77c73923616fb30b0170d1100b1 100644 (file)
@@ -829,8 +829,7 @@ static int s6e8ax0_probe(struct mipi_dsim_lcd_device *dsim_dev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int s6e8ax0_suspend(struct mipi_dsim_lcd_device *dsim_dev)
+static int __maybe_unused s6e8ax0_suspend(struct mipi_dsim_lcd_device *dsim_dev)
 {
        struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev);
 
@@ -843,7 +842,7 @@ static int s6e8ax0_suspend(struct mipi_dsim_lcd_device *dsim_dev)
        return 0;
 }
 
-static int s6e8ax0_resume(struct mipi_dsim_lcd_device *dsim_dev)
+static int __maybe_unused s6e8ax0_resume(struct mipi_dsim_lcd_device *dsim_dev)
 {
        struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev);
 
@@ -855,10 +854,6 @@ static int s6e8ax0_resume(struct mipi_dsim_lcd_device *dsim_dev)
 
        return 0;
 }
-#else
-#define s6e8ax0_suspend                NULL
-#define s6e8ax0_resume         NULL
-#endif
 
 static struct mipi_dsim_lcd_driver s6e8ax0_dsim_ddi_driver = {
        .name = "s6e8ax0",
@@ -867,8 +862,8 @@ static struct mipi_dsim_lcd_driver s6e8ax0_dsim_ddi_driver = {
        .power_on = s6e8ax0_power_on,
        .set_sequence = s6e8ax0_set_sequence,
        .probe = s6e8ax0_probe,
-       .suspend = s6e8ax0_suspend,
-       .resume = s6e8ax0_resume,
+       .suspend = IS_ENABLED(CONFIG_PM) ? s6e8ax0_suspend : NULL,
+       .resume = IS_ENABLED(CONFIG_PM) ? s6e8ax0_resume : NULL,
 };
 
 static int s6e8ax0_init(void)
index cee88603efc9e40195430fe1a745eebefb97ef0b..bb2f1e866020199d31b8ba0b1940cb9e6b87923f 100644 (file)
@@ -902,6 +902,21 @@ static int imxfb_probe(struct platform_device *pdev)
                goto failed_getclock;
        }
 
+       /*
+        * The LCDC controller does not have an enable bit. The
+        * controller starts directly when the clocks are enabled.
+        * If the clocks are enabled when the controller is not yet
+        * programmed with proper register values (enabled at the
+        * bootloader, for example) then it just goes into some undefined
+        * state.
+        * To avoid this issue, let's enable and disable LCDC IPG clock
+        * so that we force some kind of 'reset' to the LCDC block.
+        */
+       ret = clk_prepare_enable(fbi->clk_ipg);
+       if (ret)
+               goto failed_getclock;
+       clk_disable_unprepare(fbi->clk_ipg);
+
        fbi->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
        if (IS_ERR(fbi->clk_ahb)) {
                ret = PTR_ERR(fbi->clk_ahb);
index de54a474806504ba9844839267fcc7604422ce3f..b6f83d5df9fdeab329478d5ccfeb78610da9e532 100644 (file)
@@ -503,8 +503,7 @@ static int mmphw_probe(struct platform_device *pdev)
        ctrl->reg_base = devm_ioremap_nocache(ctrl->dev,
                        res->start, resource_size(res));
        if (ctrl->reg_base == NULL) {
-               dev_err(ctrl->dev, "%s: res %x - %x map failed\n", __func__,
-                       res->start, res->end);
+               dev_err(ctrl->dev, "%s: res %pR map failed\n", __func__, res);
                ret = -ENOMEM;
                goto failed;
        }
index c9293aea8ec3502e27ddf6125f07201c16996a7b..a970edc2a6f8c781696ac7b0b6664fd7c4513ffa 100644 (file)
@@ -123,11 +123,11 @@ static int ocfb_setupfb(struct ocfb_dev *fbdev)
 
        /* Horizontal timings */
        ocfb_writereg(fbdev, OCFB_HTIM, (var->hsync_len - 1) << 24 |
-                     (var->right_margin - 1) << 16 | (var->xres - 1));
+                     (var->left_margin - 1) << 16 | (var->xres - 1));
 
        /* Vertical timings */
        ocfb_writereg(fbdev, OCFB_VTIM, (var->vsync_len - 1) << 24 |
-                     (var->lower_margin - 1) << 16 | (var->yres - 1));
+                     (var->upper_margin - 1) << 16 | (var->yres - 1));
 
        /* Total length of frame */
        hlen = var->left_margin + var->right_margin + var->hsync_len +
index 36205c27c4d0f19ab61a4aa4175de3dd8785de66..f6bed86c17f96f16cfa75f35a15b790287e7ab1f 100644 (file)
@@ -545,6 +545,7 @@ err_enable_device:
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
        struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+       struct device *dev = get_device(&vp_dev->vdev.dev);
 
        unregister_virtio_device(&vp_dev->vdev);
 
@@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
                virtio_pci_modern_remove(vp_dev);
 
        pci_disable_device(pci_dev);
+       put_device(dev);
 }
 
 static struct pci_driver virtio_pci_driver = {
index 30d38ae685f64019403c045ff08969cef2ccd567..80825a7e8e48e1ebd06af14a1bcf208acb733daf 100644 (file)
@@ -145,7 +145,8 @@ config MENF21BMC_WATCHDOG
 config TANGOX_WATCHDOG
        tristate "Sigma Designs SMP86xx/SMP87xx watchdog"
        select WATCHDOG_CORE
-       depends on ARCH_TANGOX || COMPILE_TEST
+       depends on ARCH_TANGO || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Support for the watchdog in Sigma Designs SMP86xx (tango3)
          and SMP87xx (tango4) family chips.
@@ -618,6 +619,7 @@ config DIGICOLOR_WATCHDOG
 config LPC18XX_WATCHDOG
        tristate "LPC18xx/43xx Watchdog"
        depends on ARCH_LPC18XX || COMPILE_TEST
+       depends on HAS_IOMEM
        select WATCHDOG_CORE
        help
          Say Y here if to include support for the watchdog timer
@@ -1374,6 +1376,7 @@ config BCM_KONA_WDT_DEBUG
 config BCM7038_WDT
        tristate "BCM7038 Watchdog"
        select WATCHDOG_CORE
+       depends on HAS_IOMEM
        help
         Watchdog driver for the built-in hardware in Broadcom 7038 SoCs.
 
@@ -1383,6 +1386,7 @@ config IMGPDC_WDT
        tristate "Imagination Technologies PDC Watchdog Timer"
        depends on HAS_IOMEM
        depends on METAG || MIPS || COMPILE_TEST
+       select WATCHDOG_CORE
        help
          Driver for Imagination Technologies PowerDown Controller
          Watchdog Timer.
index f36ca4be07207a9fd200d92577e9242a10e774df..ac5840d9689aed0f79fafa82e6a56e4515bb1b27 100644 (file)
@@ -292,4 +292,4 @@ MODULE_PARM_DESC(nodelay,
                 "Force selection of a timeout setting without initial delay "
                 "(max6373/74 only, default=0)");
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 1a11aedc4fe850738bfe17d469232da64959b84d..68952d9ccf8394412654d76fc2c14edef3f74124 100644 (file)
@@ -608,7 +608,7 @@ static int usb_pcwd_probe(struct usb_interface *interface,
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *endpoint;
        struct usb_pcwd_private *usb_pcwd = NULL;
-       int pipe, maxp;
+       int pipe;
        int retval = -ENOMEM;
        int got_fw_rev;
        unsigned char fw_rev_major, fw_rev_minor;
@@ -641,7 +641,6 @@ static int usb_pcwd_probe(struct usb_interface *interface,
 
        /* get a handle to the interrupt data pipe */
        pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
-       maxp = usb_maxpacket(udev, pipe, usb_pipeout(pipe));
 
        /* allocate memory for our device and initialize it */
        usb_pcwd = kzalloc(sizeof(struct usb_pcwd_private), GFP_KERNEL);
index 01d816251302c2491c24a70e8f7c542b61c2f15a..e7a715e820217eb82a90cc8e5eed54d6d826e22c 100644 (file)
@@ -139,12 +139,11 @@ static int wdt_config(struct watchdog_device *wdd, bool ping)
 
        writel_relaxed(UNLOCK, wdt->base + WDTLOCK);
        writel_relaxed(wdt->load_val, wdt->base + WDTLOAD);
+       writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
 
-       if (!ping) {
-               writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
+       if (!ping)
                writel_relaxed(INT_ENABLE | RESET_ENABLE, wdt->base +
                                WDTCONTROL);
-       }
 
        writel_relaxed(LOCK, wdt->base + WDTLOCK);
 
index 945fc43272017cfe05bab171eb8b97e62057244c..4ac2ca8a76561952798f7c49bdd292719d0439a8 100644 (file)
@@ -242,7 +242,7 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
        return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
-static struct cleancache_ops tmem_cleancache_ops = {
+static const struct cleancache_ops tmem_cleancache_ops = {
        .put_page = tmem_cleancache_put_page,
        .get_page = tmem_cleancache_get_page,
        .invalidate_page = tmem_cleancache_flush_page,
index 73dafdc494aa8322e037f1d4a9ad4ec5530c5743..fb0221434f814aa49eef41f6aea7a60c86a4c817 100644 (file)
@@ -227,8 +227,9 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
        /*
         * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
         * to access the BARs where the MSI-X entries reside.
+        * But VF devices are unique in which the PF needs to be checked.
         */
-       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+       pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd);
        if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
                return -ENXIO;
 
@@ -332,6 +333,9 @@ void xen_pcibk_do_op(struct work_struct *data)
        struct xen_pcibk_dev_data *dev_data = NULL;
        struct xen_pci_op *op = &pdev->op;
        int test_intx = 0;
+#ifdef CONFIG_PCI_MSI
+       unsigned int nr = 0;
+#endif
 
        *op = pdev->sh_info->op;
        barrier();
@@ -360,6 +364,7 @@ void xen_pcibk_do_op(struct work_struct *data)
                        op->err = xen_pcibk_disable_msi(pdev, dev, op);
                        break;
                case XEN_PCI_OP_enable_msix:
+                       nr = op->value;
                        op->err = xen_pcibk_enable_msix(pdev, dev, op);
                        break;
                case XEN_PCI_OP_disable_msix:
@@ -382,7 +387,7 @@ void xen_pcibk_do_op(struct work_struct *data)
        if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
                unsigned int i;
 
-               for (i = 0; i < op->value; i++)
+               for (i = 0; i < nr; i++)
                        pdev->sh_info->op.msix_entries[i].vector =
                                op->msix_entries[i].vector;
        }
index ad4eb1024d1ffb7663db4e770fdd4c5c2663e821..c46ee189466f81ab584282651494fad765a38417 100644 (file)
@@ -848,6 +848,24 @@ static int scsiback_map(struct vscsibk_info *info)
        return scsiback_init_sring(info, ring_ref, evtchn);
 }
 
+/*
+  Check for a translation entry being present
+*/
+static struct v2p_entry *scsiback_chk_translation_entry(
+       struct vscsibk_info *info, struct ids_tuple *v)
+{
+       struct list_head *head = &(info->v2p_entry_lists);
+       struct v2p_entry *entry;
+
+       list_for_each_entry(entry, head, l)
+               if ((entry->v.chn == v->chn) &&
+                   (entry->v.tgt == v->tgt) &&
+                   (entry->v.lun == v->lun))
+                       return entry;
+
+       return NULL;
+}
+
 /*
   Add a new translation entry
 */
@@ -855,9 +873,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
                                          char *phy, struct ids_tuple *v)
 {
        int err = 0;
-       struct v2p_entry *entry;
        struct v2p_entry *new;
-       struct list_head *head = &(info->v2p_entry_lists);
        unsigned long flags;
        char *lunp;
        unsigned long long unpacked_lun;
@@ -917,15 +933,10 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
        spin_lock_irqsave(&info->v2p_lock, flags);
 
        /* Check double assignment to identical virtual ID */
-       list_for_each_entry(entry, head, l) {
-               if ((entry->v.chn == v->chn) &&
-                   (entry->v.tgt == v->tgt) &&
-                   (entry->v.lun == v->lun)) {
-                       pr_warn("Virtual ID is already used. Assignment was not performed.\n");
-                       err = -EEXIST;
-                       goto out;
-               }
-
+       if (scsiback_chk_translation_entry(info, v)) {
+               pr_warn("Virtual ID is already used. Assignment was not performed.\n");
+               err = -EEXIST;
+               goto out;
        }
 
        /* Create a new translation entry and add to the list */
@@ -933,18 +944,18 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
        new->v = *v;
        new->tpg = tpg;
        new->lun = unpacked_lun;
-       list_add_tail(&new->l, head);
+       list_add_tail(&new->l, &info->v2p_entry_lists);
 
 out:
        spin_unlock_irqrestore(&info->v2p_lock, flags);
 
 out_free:
-       mutex_lock(&tpg->tv_tpg_mutex);
-       tpg->tv_tpg_fe_count--;
-       mutex_unlock(&tpg->tv_tpg_mutex);
-
-       if (err)
+       if (err) {
+               mutex_lock(&tpg->tv_tpg_mutex);
+               tpg->tv_tpg_fe_count--;
+               mutex_unlock(&tpg->tv_tpg_mutex);
                kfree(new);
+       }
 
        return err;
 }
@@ -956,39 +967,40 @@ static void __scsiback_del_translation_entry(struct v2p_entry *entry)
 }
 
 /*
-  Delete the translation entry specfied
+  Delete the translation entry specified
 */
 static int scsiback_del_translation_entry(struct vscsibk_info *info,
                                          struct ids_tuple *v)
 {
        struct v2p_entry *entry;
-       struct list_head *head = &(info->v2p_entry_lists);
        unsigned long flags;
+       int ret = 0;
 
        spin_lock_irqsave(&info->v2p_lock, flags);
        /* Find out the translation entry specified */
-       list_for_each_entry(entry, head, l) {
-               if ((entry->v.chn == v->chn) &&
-                   (entry->v.tgt == v->tgt) &&
-                   (entry->v.lun == v->lun)) {
-                       goto found;
-               }
-       }
-
-       spin_unlock_irqrestore(&info->v2p_lock, flags);
-       return 1;
-
-found:
-       /* Delete the translation entry specfied */
-       __scsiback_del_translation_entry(entry);
+       entry = scsiback_chk_translation_entry(info, v);
+       if (entry)
+               __scsiback_del_translation_entry(entry);
+       else
+               ret = -ENOENT;
 
        spin_unlock_irqrestore(&info->v2p_lock, flags);
-       return 0;
+       return ret;
 }
 
 static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
                                char *phy, struct ids_tuple *vir, int try)
 {
+       struct v2p_entry *entry;
+       unsigned long flags;
+
+       if (try) {
+               spin_lock_irqsave(&info->v2p_lock, flags);
+               entry = scsiback_chk_translation_entry(info, vir);
+               spin_unlock_irqrestore(&info->v2p_lock, flags);
+               if (entry)
+                       return;
+       }
        if (!scsiback_add_translation_entry(info, phy, vir)) {
                if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
                                  "%d", XenbusStateInitialised)) {
index 9433e46518c8dc8680fd1e5a44d823aa7f65fdb0..912b64edb42b4a016201b4e7d1a695ca25056610 100644 (file)
@@ -188,6 +188,8 @@ static int queue_reply(struct list_head *queue, const void *data, size_t len)
 
        if (len == 0)
                return 0;
+       if (len > XENSTORE_PAYLOAD_MAX)
+               return -EINVAL;
 
        rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
        if (rb == NULL)
index 7bf835f85bc822ef1119b639be82619af066d326..eadc894faea2ead1f720ad22da63a42889967ebe 100644 (file)
@@ -449,14 +449,14 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
        if (retval)
                return retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
 
        fid = filp->private_data;
        v9fs_blank_wstat(&wstat);
 
        retval = p9_client_wstat(fid, &wstat);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return retval;
 }
@@ -472,13 +472,13 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
        if (retval)
                return retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
 
        fid = filp->private_data;
 
        retval = p9_client_fsync(fid, datasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return retval;
 }
index 659c579c4588b2a5e844d0e82a0195494d70a073..22fc7c802d698766fb5b274e686e89df4e0d76d1 100644 (file)
@@ -33,11 +33,11 @@ affs_file_release(struct inode *inode, struct file *filp)
                 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 
        if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (inode->i_size != AFFS_I(inode)->mmu_private)
                        affs_truncate(inode);
                affs_free_prealloc(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        return 0;
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
        pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
                 page->index, to);
        BUG_ON(to > PAGE_CACHE_SIZE);
-       kmap(page);
-       data = page_address(page);
        bsize = AFFS_SB(sb)->s_data_blksize;
        tmp = page->index << PAGE_CACHE_SHIFT;
        bidx = tmp / bsize;
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
                        return PTR_ERR(bh);
                tmp = min(bsize - boff, to - pos);
                BUG_ON(pos + tmp > to || tmp > bsize);
+               data = kmap_atomic(page);
                memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
+               kunmap_atomic(data);
                affs_brelse(bh);
                bidx++;
                pos += tmp;
                boff = 0;
        }
        flush_dcache_page(page);
-       kunmap(page);
        return 0;
 }
 
@@ -958,12 +957,12 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = write_inode_now(inode, 0);
        err = sync_blockdev(inode->i_sb->s_bdev);
        if (!ret)
                ret = err;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 const struct file_operations affs_file_operations = {
index 4baf1d2b39e410ffb501c2aba457fdceadcc2cdb..d91a9c9cfbd0a9aa0770ad394af0c156057a8a85 100644 (file)
@@ -483,7 +483,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 
        fl->fl_type = F_UNLCK;
 
-       mutex_lock(&vnode->vfs_inode.i_mutex);
+       inode_lock(&vnode->vfs_inode);
 
        /* check local lock records first */
        ret = 0;
@@ -505,7 +505,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
        }
 
 error:
-       mutex_unlock(&vnode->vfs_inode.i_mutex);
+       inode_unlock(&vnode->vfs_inode);
        _leave(" = %d [%hd]", ret, fl->fl_type);
        return ret;
 }
index 0714abcd7f32321754287e46aec129196832e2ef..dfef94f70667cde167dad60d71d1e23622c24cf7 100644 (file)
@@ -693,7 +693,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret)
                return ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* use a writeback record as a marker in the queue - when this reaches
         * the front of the queue, all the outstanding writes are either
@@ -735,7 +735,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        afs_put_writeback(wb);
        _leave(" = %d", ret);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 6530ced19697d49a9189fa289c9112187448fee3..25b24d0f6c8810c86ef79319e091fc65231733d0 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -195,7 +195,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
        struct timespec now;
        unsigned int ia_valid = attr->ia_valid;
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
 
        if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
index 3a93755e880fee23fa6d8370916caefb21f2c9eb..7d914c67a9d07f7ebb8a3f8f0bea8200d739ee9f 100644 (file)
@@ -491,6 +491,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
  * arch_check_elf() - check an ELF executable
  * @ehdr:      The main ELF header
  * @has_interp:        True if the ELF has an interpreter, else false.
+ * @interp_ehdr: The interpreter's ELF header
  * @state:     Architecture-specific state preserved throughout the process
  *             of loading the ELF.
  *
@@ -502,6 +503,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
  *         with that return code.
  */
 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+                                struct elfhdr *interp_ehdr,
                                 struct arch_elf_state *state)
 {
        /* Dummy implementation, always proceed */
@@ -651,7 +653,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
 
        if ((current->flags & PF_RANDOMIZE) &&
                !(current->personality & ADDR_NO_RANDOMIZE)) {
-               random_variable = (unsigned long) get_random_int();
+               random_variable = get_random_long();
                random_variable &= STACK_RND_MASK;
                random_variable <<= PAGE_SHIFT;
        }
@@ -829,7 +831,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
         * still possible to return an error to the code that invoked
         * the exec syscall.
         */
-       retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
+       retval = arch_check_elf(&loc->elf_ex,
+                               !!interpreter, &loc->interp_elf_ex,
+                               &arch_state);
        if (retval)
                goto out_free_dentry;
 
index 78f005f378476011a43c41e7d0eaa8d99ac1af21..3a3ced779fc738199cb5bb3d25a8f8c76edc754d 100644 (file)
@@ -638,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete this handler. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&d_inode(root)->i_mutex);
+               inode_lock(d_inode(root));
 
                kill_node(e);
 
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                dput(root);
                break;
        default:
@@ -675,7 +675,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
                return PTR_ERR(e);
 
        root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        dentry = lookup_one_len(e->name, root, strlen(e->name));
        err = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -711,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 out2:
        dput(dentry);
 out:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        dput(root);
 
        if (err) {
@@ -754,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete all handlers. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&d_inode(root)->i_mutex);
+               inode_lock(d_inode(root));
 
                while (!list_empty(&entries))
                        kill_node(list_entry(entries.next, Node, list));
 
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                dput(root);
                break;
        default:
index ba762ea07f679c9bafd078bb316270a38e9a7d80..826b164a4b5b1faa9719aefc9cceccec20eda479 100644 (file)
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
 
-       if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+       if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
                return;
 
        invalidate_bh_lrus();
@@ -346,9 +346,9 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
        struct inode *bd_inode = bdev_file_inode(file);
        loff_t retval;
 
-       mutex_lock(&bd_inode->i_mutex);
+       inode_lock(bd_inode);
        retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
-       mutex_unlock(&bd_inode->i_mutex);
+       inode_unlock(bd_inode);
        return retval;
 }
        
@@ -1142,9 +1142,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 {
        unsigned bsize = bdev_logical_block_size(bdev);
 
-       mutex_lock(&bdev->bd_inode->i_mutex);
+       inode_lock(bdev->bd_inode);
        i_size_write(bdev->bd_inode, size);
-       mutex_unlock(&bdev->bd_inode->i_mutex);
+       inode_unlock(bdev->bd_inode);
        while (bsize < PAGE_CACHE_SIZE) {
                if (size & bsize)
                        break;
@@ -1201,7 +1201,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                bdev->bd_disk = disk;
                bdev->bd_queue = disk->queue;
                bdev->bd_contains = bdev;
-               bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
+               if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+                       bdev->bd_inode->i_flags = S_DAX;
+               else
+                       bdev->bd_inode->i_flags = 0;
+
                if (!partno) {
                        ret = -ENXIO;
                        bdev->bd_part = disk_get_part(disk, partno);
@@ -1693,13 +1697,24 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
        return try_to_free_buffers(page);
 }
 
+static int blkdev_writepages(struct address_space *mapping,
+                            struct writeback_control *wbc)
+{
+       if (dax_mapping(mapping)) {
+               struct block_device *bdev = I_BDEV(mapping->host);
+
+               return dax_writeback_mapping_range(mapping, bdev, wbc);
+       }
+       return generic_writepages(mapping, wbc);
+}
+
 static const struct address_space_operations def_blk_aops = {
        .readpage       = blkdev_readpage,
        .readpages      = blkdev_readpages,
        .writepage      = blkdev_writepage,
        .write_begin    = blkdev_write_begin,
        .write_end      = blkdev_write_end,
-       .writepages     = generic_writepages,
+       .writepages     = blkdev_writepages,
        .releasepage    = blkdev_releasepage,
        .direct_IO      = blkdev_direct_IO,
        .is_dirty_writeback = buffer_check_dirty_writeback,
@@ -1730,43 +1745,25 @@ static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return __dax_fault(vma, vmf, blkdev_get_block, NULL);
 }
 
-static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
-               pmd_t *pmd, unsigned int flags)
-{
-       return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
-}
-
-static void blkdev_vm_open(struct vm_area_struct *vma)
+static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
+               struct vm_fault *vmf)
 {
-       struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-       struct block_device *bdev = I_BDEV(bd_inode);
-
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count++;
-       mutex_unlock(&bd_inode->i_mutex);
+       return dax_pfn_mkwrite(vma, vmf);
 }
 
-static void blkdev_vm_close(struct vm_area_struct *vma)
+static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+               pmd_t *pmd, unsigned int flags)
 {
-       struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-       struct block_device *bdev = I_BDEV(bd_inode);
-
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count--;
-       mutex_unlock(&bd_inode->i_mutex);
+       return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
 }
 
 static const struct vm_operations_struct blkdev_dax_vm_ops = {
-       .open           = blkdev_vm_open,
-       .close          = blkdev_vm_close,
        .fault          = blkdev_dax_fault,
        .pmd_fault      = blkdev_dax_pmd_fault,
-       .pfn_mkwrite    = blkdev_dax_fault,
+       .pfn_mkwrite    = blkdev_dax_pfn_mkwrite,
 };
 
 static const struct vm_operations_struct blkdev_default_vm_ops = {
-       .open           = blkdev_vm_open,
-       .close          = blkdev_vm_close,
        .fault          = filemap_fault,
        .map_pages      = filemap_map_pages,
 };
@@ -1774,18 +1771,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
 static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct inode *bd_inode = bdev_file_inode(file);
-       struct block_device *bdev = I_BDEV(bd_inode);
 
        file_accessed(file);
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count++;
        if (IS_DAX(bd_inode)) {
                vma->vm_ops = &blkdev_dax_vm_ops;
                vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
        } else {
                vma->vm_ops = &blkdev_default_vm_ops;
        }
-       mutex_unlock(&bd_inode->i_mutex);
 
        return 0;
 }
index 88d9af3d4581f7c45aa0065fa467d097625be5c1..5fb60ea7eee2b2c28e067d11b54c2fad7127c7d8 100644 (file)
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
                list_add_tail(&work->ordered_list, &wq->ordered_list);
                spin_unlock_irqrestore(&wq->list_lock, flags);
        }
-       queue_work(wq->normal_wq, &work->normal_work);
        trace_btrfs_work_queued(work);
+       queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,
index 08405a3da6b1baf9a382327d72dc5a5a85d0c68d..f6dac40f87ff5d4022559e6701b506589aad0353 100644 (file)
@@ -560,13 +560,13 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
  */
 static void __merge_refs(struct list_head *head, int mode)
 {
-       struct __prelim_ref *ref1;
+       struct __prelim_ref *pos1;
 
-       list_for_each_entry(ref1, head, list) {
-               struct __prelim_ref *ref2 = ref1, *tmp;
+       list_for_each_entry(pos1, head, list) {
+               struct __prelim_ref *pos2 = pos1, *tmp;
 
-               list_for_each_entry_safe_continue(ref2, tmp, head, list) {
-                       struct __prelim_ref *xchg;
+               list_for_each_entry_safe_continue(pos2, tmp, head, list) {
+                       struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
                        struct extent_inode_elem *eie;
 
                        if (!ref_for_same_block(ref1, ref2))
@@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                        read_extent_buffer(eb, dest + bytes_left,
                                           name_off, name_len);
                if (eb != eb_in) {
-                       btrfs_tree_read_unlock_blocking(eb);
+                       if (!path->skip_locking)
+                               btrfs_tree_read_unlock_blocking(eb);
                        free_extent_buffer(eb);
                }
                ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                eb = path->nodes[0];
                /* make sure we can use eb after releasing the path */
                if (eb != eb_in) {
-                       atomic_inc(&eb->refs);
-                       btrfs_tree_read_lock(eb);
-                       btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                       if (!path->skip_locking)
+                               btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                       path->nodes[0] = NULL;
+                       path->locks[0] = 0;
                }
                btrfs_release_path(path);
                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
index c473c42d7d6c4d559dbe1587491cd2fb93804145..3346cd8f991032f2457b4c48a8e3440780d53a9a 100644 (file)
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        faili = nr_pages - 1;
        cb->nr_pages = nr_pages;
 
-       /* In the parent-locked case, we only locked the range we are
-        * interested in.  In all other cases, we can opportunistically
-        * cache decompressed data that goes beyond the requested range. */
-       if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
-               add_ra_bio_pages(inode, em_start + em_len, cb);
+       add_ra_bio_pages(inode, em_start + em_len, cb);
 
        /* include any pages we added in add_ra-bio_pages */
        uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
index 97ad9bbeb35d24ec0ad228aa6f040b61bffb33a5..bfe4a337fb4d13a058446265b7baf4a1437aa602 100644 (file)
@@ -1614,7 +1614,7 @@ struct btrfs_fs_info {
 
        spinlock_t delayed_iput_lock;
        struct list_head delayed_iputs;
-       struct rw_semaphore delayed_iput_sem;
+       struct mutex cleaner_delayed_iput_mutex;
 
        /* this protects tree_mod_seq_list */
        spinlock_t tree_mod_seq_lock;
@@ -3641,6 +3641,7 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 int __get_raid_index(u64 flags);
 int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
 void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
 void check_system_chunk(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        const u64 type);
index 0be47e4b813672996b490515ef88af357c40a276..b57daa895cea8340fd7b3ed491bb730196b6b5e6 100644 (file)
@@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
  *
  */
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
-                                   struct list_head *ins_list)
+                                   struct list_head *ins_list, bool *emitted)
 {
        struct btrfs_dir_item *di;
        struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 
                if (over)
                        return 1;
+               *emitted = true;
        }
        return 0;
 }
index f70119f254216583f3c7317085ec209eceb03d0f..0167853c84aea2d93a8fdd0314342e0a1ef42d1b 100644 (file)
@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
 int btrfs_should_delete_dir_index(struct list_head *del_list,
                                  u64 index);
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
-                                   struct list_head *ins_list);
+                                   struct list_head *ins_list, bool *emitted);
 
 /* for init */
 int __init btrfs_delayed_inode_init(void);
index 1e668fb7dd4c73dbc03b7f642660f2af20bd2ddb..cbb7dbfb3fffffc54f621b8dc43a7b1ec9a1185b 100644 (file)
@@ -614,7 +614,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
                em = lookup_extent_mapping(em_tree, start, (u64)-1);
                if (!em)
                        break;
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                for (i = 0; i < map->num_stripes; i++)
                        if (srcdev == map->stripes[i].dev)
                                map->stripes[i].dev = tgtdev;
index e99ccd6ffb2c14f58bf38f548e202f0e0f86e9ea..4545e2e2ad45a556113eecf30b1a7d49ee7ca260 100644 (file)
 #include <asm/cpufeature.h>
 #endif
 
+#define BTRFS_SUPER_FLAG_SUPP  (BTRFS_HEADER_FLAG_WRITTEN |\
+                                BTRFS_HEADER_FLAG_RELOC |\
+                                BTRFS_SUPER_FLAG_ERROR |\
+                                BTRFS_SUPER_FLAG_SEEDING |\
+                                BTRFS_SUPER_FLAG_METADUMP)
+
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
@@ -176,6 +182,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
        { .id = BTRFS_UUID_TREE_OBJECTID,       .name_stem = "uuid"     },
+       { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
        { .id = 0,                              .name_stem = "tree"     },
 };
 
@@ -1583,8 +1590,23 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        ret = get_anon_bdev(&root->anon_dev);
        if (ret)
                goto free_writers;
+
+       mutex_lock(&root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(root,
+                                       &root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&root->objectid_mutex);
+               goto free_root_dev;
+       }
+
+       ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&root->objectid_mutex);
+
        return 0;
 
+free_root_dev:
+       free_anon_bdev(root->anon_dev);
 free_writers:
        btrfs_free_subvolume_writers(root->subv_writers);
 fail:
@@ -1766,7 +1788,6 @@ static int cleaner_kthread(void *arg)
        int again;
        struct btrfs_trans_handle *trans;
 
-       set_freezable();
        do {
                again = 0;
 
@@ -1786,7 +1807,10 @@ static int cleaner_kthread(void *arg)
                        goto sleep;
                }
 
+               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
                btrfs_run_delayed_iputs(root);
+               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
+
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&root->fs_info->cleaner_mutex);
 
@@ -2556,8 +2580,8 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
+       mutex_init(&fs_info->cleaner_delayed_iput_mutex);
        seqlock_init(&fs_info->profiles_lock);
-       init_rwsem(&fs_info->delayed_iput_sem);
 
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
@@ -2742,26 +2766,6 @@ int open_ctree(struct super_block *sb,
                goto fail_alloc;
        }
 
-       /*
-        * Leafsize and nodesize were always equal, this is only a sanity check.
-        */
-       if (le32_to_cpu(disk_super->__unused_leafsize) !=
-           btrfs_super_nodesize(disk_super)) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksizes don't match.  node %d leaf %d\n",
-                      btrfs_super_nodesize(disk_super),
-                      le32_to_cpu(disk_super->__unused_leafsize));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-       if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksize (%d) was too large\n",
-                      btrfs_super_nodesize(disk_super));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-
        features = btrfs_super_incompat_flags(disk_super);
        features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
        if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
@@ -2833,17 +2837,6 @@ int open_ctree(struct super_block *sb,
        sb->s_blocksize = sectorsize;
        sb->s_blocksize_bits = blksize_bits(sectorsize);
 
-       if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
-               printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
-               goto fail_sb_buffer;
-       }
-
-       if (sectorsize != PAGE_SIZE) {
-               printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
-                      "found on %s\n", (unsigned long)sectorsize, sb->s_id);
-               goto fail_sb_buffer;
-       }
-
        mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_sys_array(tree_root);
        mutex_unlock(&fs_info->chunk_mutex);
@@ -2915,6 +2908,18 @@ retry_root_backup:
        tree_root->commit_root = btrfs_root_node(tree_root);
        btrfs_set_root_refs(&tree_root->root_item, 1);
 
+       mutex_lock(&tree_root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(tree_root,
+                                       &tree_root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&tree_root->objectid_mutex);
+               goto recovery_tree_root;
+       }
+
+       ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&tree_root->objectid_mutex);
+
        ret = btrfs_read_roots(fs_info, tree_root);
        if (ret)
                goto recovery_tree_root;
@@ -4018,8 +4023,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                              int read_only)
 {
        struct btrfs_super_block *sb = fs_info->super_copy;
+       u64 nodesize = btrfs_super_nodesize(sb);
+       u64 sectorsize = btrfs_super_sectorsize(sb);
        int ret = 0;
 
+       if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+               printk(KERN_ERR "BTRFS: no valid FS found\n");
+               ret = -EINVAL;
+       }
+       if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+               printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+                               btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
        if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
                printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
                                btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4037,31 +4051,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
        }
 
        /*
-        * The common minimum, we don't know if we can trust the nodesize/sectorsize
-        * items yet, they'll be verified later. Issue just a warning.
+        * Check sectorsize and nodesize first, other check will need it.
+        * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
         */
-       if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+       if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+           sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+               ret = -EINVAL;
+       }
+       /* Only PAGE SIZE is supported yet */
+       if (sectorsize != PAGE_CACHE_SIZE) {
+               printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+                               sectorsize, PAGE_CACHE_SIZE);
+               ret = -EINVAL;
+       }
+       if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+           nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+               ret = -EINVAL;
+       }
+       if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+               printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+                               le32_to_cpu(sb->__unused_leafsize),
+                               nodesize);
+               ret = -EINVAL;
+       }
+
+       /* Root alignment check */
+       if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
                printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
                                btrfs_super_root(sb));
-       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+               ret = -EINVAL;
+       }
+       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
                printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
                                btrfs_super_chunk_root(sb));
-       if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
-               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
-                               btrfs_super_log_root(sb));
-
-       /*
-        * Check the lower bound, the alignment and other constraints are
-        * checked later.
-        */
-       if (btrfs_super_nodesize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
-                               btrfs_super_nodesize(sb));
                ret = -EINVAL;
        }
-       if (btrfs_super_sectorsize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
-                               btrfs_super_sectorsize(sb));
+       if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+                               btrfs_super_log_root(sb));
                ret = -EINVAL;
        }
 
index 60cc1399c64f9de26ab9fa6efcaf3e8b0e067829..e2287c7c10bebbbb6dc68f0c4aed056a97bdaafe 100644 (file)
@@ -4139,8 +4139,10 @@ commit_trans:
                    !atomic_read(&root->fs_info->open_ioctl_trans)) {
                        need_commit--;
 
-                       if (need_commit > 0)
+                       if (need_commit > 0) {
+                               btrfs_start_delalloc_roots(fs_info, 0, -1);
                                btrfs_wait_ordered_roots(fs_info, -1);
+                       }
 
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
@@ -4153,11 +4155,12 @@ commit_trans:
                                if (ret)
                                        return ret;
                                /*
-                                * make sure that all running delayed iput are
-                                * done
+                                * The cleaner kthread might still be doing iput
+                                * operations. Wait for it to finish so that
+                                * more space is released.
                                 */
-                               down_write(&root->fs_info->delayed_iput_sem);
-                               up_write(&root->fs_info->delayed_iput_sem);
+                               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
+                               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
                                goto again;
                        } else {
                                btrfs_end_transaction(trans, root);
@@ -10399,7 +10402,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
         * more device items and remove one chunk item), but this is done at
         * btrfs_remove_chunk() through a call to check_system_chunk().
         */
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        num_items = 3 + map->num_stripes;
        free_extent_map(em);
 
@@ -10586,7 +10589,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 
        disk_super = fs_info->super_copy;
        if (!btrfs_super_root(disk_super))
-               return 1;
+               return -EINVAL;
 
        features = btrfs_super_incompat_flags(disk_super);
        if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
@@ -10816,3 +10819,23 @@ int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
        }
        return 1;
 }
+
+static int wait_snapshoting_atomic_t(atomic_t *a)
+{
+       schedule();
+       return 0;
+}
+
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
+{
+       while (true) {
+               int ret;
+
+               ret = btrfs_start_write_no_snapshoting(root);
+               if (ret)
+                       break;
+               wait_on_atomic_t(&root->will_be_snapshoted,
+                                wait_snapshoting_atomic_t,
+                                TASK_UNINTERRUPTIBLE);
+       }
+}
index 2e7c97a3f3444aec33a688a4d1f705b0486026cf..392592dc70106e72f61a76b9e2924b5aa99363af 100644 (file)
@@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
        struct block_device *bdev;
        int ret;
        int nr = 0;
-       int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
        size_t pg_offset = 0;
        size_t iosize;
        size_t disk_io_size;
        size_t blocksize = inode->i_sb->s_blocksize;
-       unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
+       unsigned long this_bio_flag = 0;
 
        set_page_extent_mapped(page);
 
@@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
                        kunmap_atomic(userpage);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            &cached, GFP_NOFS);
-                       if (!parent_locked)
-                               unlock_extent_cached(tree, cur,
-                                                    cur + iosize - 1,
-                                                    &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur,
+                                            cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                        break;
                }
                em = __get_extent_map(inode, page, pg_offset, cur,
                                      end - cur + 1, get_extent, em_cached);
                if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, end);
+                       unlock_extent(tree, cur, end);
                        break;
                }
                extent_offset = cur - em->start;
@@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
 
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            &cached, GFP_NOFS);
-                       if (parent_locked)
-                               free_extent_state(cached);
-                       else
-                               unlock_extent_cached(tree, cur,
-                                                    cur + iosize - 1,
-                                                    &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur,
+                                            cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
                if (test_range_bit(tree, cur, cur_end,
                                   EXTENT_UPTODATE, 1, NULL)) {
                        check_page_uptodate(tree, page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, cur + iosize - 1);
+                       unlock_extent(tree, cur, cur + iosize - 1);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
                 */
                if (block_start == EXTENT_MAP_INLINE) {
                        SetPageError(page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, cur + iosize - 1);
+                       unlock_extent(tree, cur, cur + iosize - 1);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
                        *bio_flags = this_bio_flag;
                } else {
                        SetPageError(page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, cur + iosize - 1);
+                       unlock_extent(tree, cur, cur + iosize - 1);
                }
                cur = cur + iosize;
                pg_offset += iosize;
@@ -3213,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        return ret;
 }
 
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
-                                get_extent_t *get_extent, int mirror_num)
-{
-       struct bio *bio = NULL;
-       unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
-       int ret;
-
-       ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
-                           &bio_flags, READ, NULL);
-       if (bio)
-               ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
-       return ret;
-}
-
 static noinline void update_nr_written(struct page *page,
                                      struct writeback_control *wbc,
                                      unsigned long nr_written)
index 0377413bd4b98257eae07d12041d4dce45ac2c4f..880d5292e9721f6f2c59dfd0f85d3f6fd2cde3bb 100644 (file)
@@ -29,7 +29,6 @@
  */
 #define EXTENT_BIO_COMPRESSED 1
 #define EXTENT_BIO_TREE_LOG 2
-#define EXTENT_BIO_PARENT_LOCKED 4
 #define EXTENT_BIO_FLAG_SHIFT 16
 
 /* these are bit numbers for test/set bit */
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
                          get_extent_t *get_extent, int mirror_num);
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
-                                get_extent_t *get_extent, int mirror_num);
 int __init extent_io_init(void);
 void extent_io_exit(void);
 
index 6a98bddd8f33a96e82d0fe856763e53b67e9b064..84fb56d5c018dbfea2a4b1970e4275ada5daa0fb 100644 (file)
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
                WARN_ON(extent_map_in_tree(em));
                WARN_ON(!list_empty(&em->list));
                if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
-                       kfree(em->bdev);
+                       kfree(em->map_lookup);
                kmem_cache_free(extent_map_cache, em);
        }
 }
index b2991fd8583efe8ed92de32904eb510542bc1f66..eb8b8fae036bc3c67ceea03220cdca503626546f 100644 (file)
@@ -32,7 +32,15 @@ struct extent_map {
        u64 block_len;
        u64 generation;
        unsigned long flags;
-       struct block_device *bdev;
+       union {
+               struct block_device *bdev;
+
+               /*
+                * used for chunk mappings
+                * flags & EXTENT_FLAG_FS_MAPPING must be set
+                */
+               struct map_lookup *map_lookup;
+       };
        atomic_t refs;
        unsigned int compress_type;
        struct list_head list;
index 83d7859d76199d96ec882c35c87dea98526d229e..098bb8f690c992e1ebd01270f49ff2d37e6658bd 100644 (file)
@@ -406,8 +406,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 /* simple helper to fault in pages and copy.  This should go away
  * and be replaced with calls into generic code.
  */
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
-                                        size_t write_bytes,
+static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
                                         struct page **prepared_pages,
                                         struct iov_iter *i)
 {
@@ -1588,8 +1587,7 @@ again:
                        ret = 0;
                }
 
-               copied = btrfs_copy_from_user(pos, num_pages,
-                                          write_bytes, pages, i);
+               copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
 
                /*
                 * if we have trouble faulting in the pages, fall
@@ -1764,17 +1762,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        loff_t pos;
        size_t count;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = generic_write_checks(iocb, from);
        if (err <= 0) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return err;
        }
 
        current->backing_dev_info = inode_to_bdi(inode);
        err = file_remove_privs(file);
        if (err) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
 
@@ -1785,7 +1783,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
         * to stop this write operation to ensure FS consistency.
         */
        if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                err = -EROFS;
                goto out;
        }
@@ -1806,7 +1804,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                end_pos = round_up(pos + count, root->sectorsize);
                err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out;
                }
        }
@@ -1822,7 +1820,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                        iocb->ki_pos = pos + num_written;
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /*
         * We also have to set last_sub_trans to the current log transid,
@@ -1911,7 +1909,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        atomic_inc(&root->log_batch);
        full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                             &BTRFS_I(inode)->runtime_flags);
@@ -1963,7 +1961,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                ret = start_ordered_ops(inode, start, end);
        }
        if (ret) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
        atomic_inc(&root->log_batch);
@@ -2009,7 +2007,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 */
                clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                          &BTRFS_I(inode)->runtime_flags);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
 
@@ -2033,7 +2031,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
        trans->sync = true;
@@ -2056,7 +2054,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         * file again, but that will end up using the synchronization
         * inside btrfs_sync_log to keep things safe.
         */
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /*
         * If any of the ordered extents had an error, just return it to user
@@ -2305,7 +2303,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
        ret = find_first_non_hole(inode, &offset, &len);
        if (ret < 0)
@@ -2345,7 +2343,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                truncated_page = true;
                ret = btrfs_truncate_page(inode, offset, 0, 0);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
@@ -2421,7 +2419,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                ret = btrfs_wait_ordered_range(inode, lockstart,
                                               lockend - lockstart + 1);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
@@ -2576,7 +2574,7 @@ out_only_mutex:
                        ret = btrfs_end_transaction(trans, root);
                }
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret && !err)
                err = ret;
        return err;
@@ -2660,7 +2658,7 @@ static long btrfs_fallocate(struct file *file, int mode,
        if (ret < 0)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = inode_newsize_ok(inode, alloc_end);
        if (ret)
                goto out;
@@ -2818,7 +2816,7 @@ out:
         * So this is completely used as cleanup.
         */
        btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        /* Let go of our reservation. */
        btrfs_free_reserved_data_space(inode, alloc_start,
                                       alloc_end - alloc_start);
@@ -2894,7 +2892,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file->f_mapping->host;
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
        case SEEK_END:
        case SEEK_CUR:
@@ -2903,20 +2901,20 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
        case SEEK_DATA:
        case SEEK_HOLE:
                if (offset >= i_size_read(inode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return -ENXIO;
                }
 
                ret = find_desired_extent(inode, &offset, whence);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
 
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
index 393e36bd5845d996d216e14ef2c6ac6866fce746..53dbeaf6ce941cc7a6a8bf06c6a81df354e9e364 100644 (file)
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+       void *mem;
+
+       /*
+        * The allocation size varies, observed numbers were < 4K up to 16K.
+        * Using vmalloc unconditionally would be too heavy, we'll try
+        * contiguous allocations first.
+        */
+       if  (bitmap_size <= PAGE_SIZE)
+               return kzalloc(bitmap_size, GFP_NOFS);
+
+       mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+       if (mem)
+               return mem;
+
        return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
                         PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
index 8b57c17b3fb3194bfff6ae1e487fe0fa7a4fa887..e50316c4af157183d3862fac8e318abe7eb66ecd 100644 (file)
@@ -515,7 +515,7 @@ out:
        return ret;
 }
 
-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 {
        struct btrfs_path *path;
        int ret;
@@ -555,13 +555,6 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
        int ret;
        mutex_lock(&root->objectid_mutex);
 
-       if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
-               ret = btrfs_find_highest_objectid(root,
-                                                 &root->highest_objectid);
-               if (ret)
-                       goto out;
-       }
-
        if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
                ret = -ENOSPC;
                goto out;
index ddb347bfee232ec26543055fbf408bfb92f8028f..c8e864b2d530f88c7e65350ca557e36e6061700e 100644 (file)
@@ -9,5 +9,6 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
                         struct btrfs_trans_handle *trans);
 
 int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
 
 #endif
index 24783010768680bea28f4f0e5e9aaa2c6a62a41c..d96f5cf38a2dd2a8e227ad93ff22f7a678611034 100644 (file)
@@ -3134,7 +3134,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
 
-       down_read(&fs_info->delayed_iput_sem);
        spin_lock(&fs_info->delayed_iput_lock);
        while (!list_empty(&fs_info->delayed_iputs)) {
                struct btrfs_inode *inode;
@@ -3153,7 +3152,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
                spin_lock(&fs_info->delayed_iput_lock);
        }
        spin_unlock(&fs_info->delayed_iput_lock);
-       up_read(&root->fs_info->delayed_iput_sem);
 }
 
 /*
@@ -4874,26 +4872,6 @@ next:
        return err;
 }
 
-static int wait_snapshoting_atomic_t(atomic_t *a)
-{
-       schedule();
-       return 0;
-}
-
-static void wait_for_snapshot_creation(struct btrfs_root *root)
-{
-       while (true) {
-               int ret;
-
-               ret = btrfs_start_write_no_snapshoting(root);
-               if (ret)
-                       break;
-               wait_on_atomic_t(&root->will_be_snapshoted,
-                                wait_snapshoting_atomic_t,
-                                TASK_UNINTERRUPTIBLE);
-       }
-}
-
 static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4925,7 +4903,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                 * truncation, it must capture all writes that happened before
                 * this truncation.
                 */
-               wait_for_snapshot_creation(root);
+               btrfs_wait_for_snapshot_creation(root);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
                if (ret) {
                        btrfs_end_write_no_snapshoting(root);
@@ -5739,6 +5717,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        char *name_ptr;
        int name_len;
        int is_curr = 0;        /* ctx->pos points to the current index? */
+       bool emitted;
 
        /* FIXME, use a real flag for deciding about the key type */
        if (root->fs_info->tree_root == root)
@@ -5767,6 +5746,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        if (ret < 0)
                goto err;
 
+       emitted = false;
        while (1) {
                leaf = path->nodes[0];
                slot = path->slots[0];
@@ -5846,6 +5826,7 @@ skip:
 
                        if (over)
                                goto nopos;
+                       emitted = true;
                        di_len = btrfs_dir_name_len(leaf, di) +
                                 btrfs_dir_data_len(leaf, di) + sizeof(*di);
                        di_cur += di_len;
@@ -5858,11 +5839,20 @@ next:
        if (key_type == BTRFS_DIR_INDEX_KEY) {
                if (is_curr)
                        ctx->pos++;
-               ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
+               ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
                if (ret)
                        goto nopos;
        }
 
+       /*
+        * If we haven't emitted any dir entry, we must not touch ctx->pos as
+        * it was was set to the termination value in previous call. We assume
+        * that "." and ".." were emitted if we reach this point and set the
+        * termination value as well for an empty directory.
+        */
+       if (ctx->pos > 2 && !emitted)
+               goto nopos;
+
        /* Reached end of directory/root. Bump pos past the last item. */
        ctx->pos++;
 
@@ -7138,21 +7128,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        if (ret)
                return ERR_PTR(ret);
 
-       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                             ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em)) {
-               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               return em;
-       }
-
+       /*
+        * Create the ordered extent before the extent map. This is to avoid
+        * races with the fast fsync path that would lead to it logging file
+        * extent items that point to disk extents that were not yet written to.
+        * The fast fsync path collects ordered extents into a local list and
+        * then collects all the new extent maps, so we must create the ordered
+        * extent first and make sure the fast fsync path collects any new
+        * ordered extents after collecting new extent maps as well.
+        * The fsync path simply can not rely on inode_dio_wait() because it
+        * causes deadlock with AIO.
+        */
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               free_extent_map(em);
                return ERR_PTR(ret);
        }
 
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, ins.offset, 0);
+       if (IS_ERR(em)) {
+               struct btrfs_ordered_extent *oe;
+
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+               oe = btrfs_lookup_ordered_extent(inode, start);
+               ASSERT(oe);
+               if (WARN_ON(!oe))
+                       return em;
+               set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+               set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+               btrfs_remove_ordered_extent(inode, oe);
+               /* Once for our lookup and once for the ordered extents tree. */
+               btrfs_put_ordered_extent(oe);
+               btrfs_put_ordered_extent(oe);
+       }
        return em;
 }
 
@@ -7976,6 +7986,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
        kfree(dip);
 
+       dio_bio->bi_error = bio->bi_error;
        dio_end_io(dio_bio, bio->bi_error);
 
        if (io_bio->end_io)
@@ -8030,6 +8041,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
 
        kfree(dip);
 
+       dio_bio->bi_error = bio->bi_error;
        dio_end_io(dio_bio, bio->bi_error);
        bio_put(bio);
 }
@@ -8469,7 +8481,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 * not unlock the i_mutex at this case.
                 */
                if (offset + count <= inode->i_size) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        relock = true;
                }
                ret = btrfs_delalloc_reserve_space(inode, offset, count);
@@ -8526,7 +8538,7 @@ out:
        if (wakeup)
                inode_dio_end(inode);
        if (relock)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        return ret;
 }
index 2a47a3148ec80df57150e3f5aa7d321abb8d1ccd..48aee9846329c526260aa09f5751335c351ea1e3 100644 (file)
@@ -240,7 +240,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ip_oldflags = ip->flags;
        i_oldflags = inode->i_flags;
@@ -358,7 +358,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        }
 
  out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(file);
        return ret;
 }
@@ -568,6 +568,10 @@ static noinline int create_subvol(struct inode *dir,
                goto fail;
        }
 
+       mutex_lock(&new_root->objectid_mutex);
+       new_root->highest_objectid = new_dirid;
+       mutex_unlock(&new_root->objectid_mutex);
+
        /*
         * insert the directory item
         */
@@ -877,7 +881,7 @@ out_up_read:
 out_dput:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return error;
 }
 
@@ -1389,18 +1393,18 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        ra_index += cluster;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
                        BTRFS_I(inode)->force_compress = compress_type;
                ret = cluster_pages_for_defrag(inode, pages, i, cluster);
                if (ret < 0) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out_ra;
                }
 
                defrag_count += ret;
                balance_dirty_pages_ratelimited(inode->i_mapping);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                if (newer_than) {
                        if (newer_off == (u64)-1)
@@ -1461,9 +1465,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
 out_ra:
        if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        if (!file)
                kfree(ra);
@@ -2426,7 +2430,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                goto out_dput;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Don't allow to delete a subvolume with send in progress. This is
@@ -2539,7 +2543,7 @@ out_up_write:
                spin_unlock(&dest->root_item_lock);
        }
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!err) {
                d_invalidate(dentry);
                btrfs_invalidate_inodes(dest);
@@ -2555,7 +2559,7 @@ out_unlock_inode:
 out_dput:
        dput(dentry);
 out_unlock_dir:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 out_drop_write:
        mnt_drop_write_file(file);
 out:
@@ -2790,24 +2794,29 @@ out:
 static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
 {
        struct page *page;
-       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 
        page = grab_cache_page(inode->i_mapping, index);
        if (!page)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        if (!PageUptodate(page)) {
-               if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
-                                                0))
-                       return NULL;
+               int ret;
+
+               ret = btrfs_readpage(NULL, page);
+               if (ret)
+                       return ERR_PTR(ret);
                lock_page(page);
                if (!PageUptodate(page)) {
                        unlock_page(page);
                        page_cache_release(page);
-                       return NULL;
+                       return ERR_PTR(-EIO);
+               }
+               if (page->mapping != inode->i_mapping) {
+                       unlock_page(page);
+                       page_cache_release(page);
+                       return ERR_PTR(-EAGAIN);
                }
        }
-       unlock_page(page);
 
        return page;
 }
@@ -2819,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
        pgoff_t index = off >> PAGE_CACHE_SHIFT;
 
        for (i = 0; i < num_pages; i++) {
+again:
                pages[i] = extent_same_get_page(inode, index + i);
-               if (!pages[i])
-                       return -ENOMEM;
+               if (IS_ERR(pages[i])) {
+                       int err = PTR_ERR(pages[i]);
+
+                       if (err == -EAGAIN)
+                               goto again;
+                       pages[i] = NULL;
+                       return err;
+               }
        }
        return 0;
 }
 
-static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+static int lock_extent_range(struct inode *inode, u64 off, u64 len,
+                            bool retry_range_locking)
 {
-       /* do any pending delalloc/csum calc on src, one way or
-          another, and lock file content */
+       /*
+        * Do any pending delalloc/csum calculations on inode, one way or
+        * another, and lock file content.
+        * The locking order is:
+        *
+        *   1) pages
+        *   2) range in the inode's io tree
+        */
        while (1) {
                struct btrfs_ordered_extent *ordered;
                lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2847,14 +2870,17 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
                unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
                if (ordered)
                        btrfs_put_ordered_extent(ordered);
+               if (!retry_range_locking)
+                       return -EAGAIN;
                btrfs_wait_ordered_range(inode, off, len);
        }
+       return 0;
 }
 
 static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
 {
-       mutex_unlock(&inode1->i_mutex);
-       mutex_unlock(&inode2->i_mutex);
+       inode_unlock(inode1);
+       inode_unlock(inode2);
 }
 
 static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
@@ -2862,8 +2888,8 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
        if (inode1 < inode2)
                swap(inode1, inode2);
 
-       mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-       mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(inode1, I_MUTEX_PARENT);
+       inode_lock_nested(inode2, I_MUTEX_CHILD);
 }
 
 static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@@ -2873,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
        unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
 }
 
-static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
-                                    struct inode *inode2, u64 loff2, u64 len)
+static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+                                   struct inode *inode2, u64 loff2, u64 len,
+                                   bool retry_range_locking)
 {
+       int ret;
+
        if (inode1 < inode2) {
                swap(inode1, inode2);
                swap(loff1, loff2);
        }
-       lock_extent_range(inode1, loff1, len);
-       lock_extent_range(inode2, loff2, len);
+       ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
+       if (ret)
+               return ret;
+       ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
+       if (ret)
+               unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
+                             loff1 + len - 1);
+       return ret;
 }
 
 struct cmp_pages {
@@ -2897,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
 
        for (i = 0; i < cmp->num_pages; i++) {
                pg = cmp->src_pages[i];
-               if (pg)
+               if (pg) {
+                       unlock_page(pg);
                        page_cache_release(pg);
+               }
                pg = cmp->dst_pages[i];
-               if (pg)
+               if (pg) {
+                       unlock_page(pg);
                        page_cache_release(pg);
+               }
        }
        kfree(cmp->src_pages);
        kfree(cmp->dst_pages);
@@ -2962,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
 
                src_page = cmp->src_pages[i];
                dst_page = cmp->dst_pages[i];
+               ASSERT(PageLocked(src_page));
+               ASSERT(PageLocked(dst_page));
 
                addr = kmap_atomic(src_page);
                dst_addr = kmap_atomic(dst_page);
@@ -3022,7 +3063,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                return 0;
 
        if (same_inode) {
-               mutex_lock(&src->i_mutex);
+               inode_lock(src);
 
                ret = extent_same_check_offsets(src, loff, &len, olen);
                if (ret)
@@ -3074,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                goto out_unlock;
        }
 
+again:
        ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
        if (ret)
                goto out_unlock;
 
        if (same_inode)
-               lock_extent_range(src, same_lock_start, same_lock_len);
+               ret = lock_extent_range(src, same_lock_start, same_lock_len,
+                                       false);
        else
-               btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+               ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
+                                              false);
+       /*
+        * If one of the inodes has dirty pages in the respective range or
+        * ordered extents, we need to flush dellaloc and wait for all ordered
+        * extents in the range. We must unlock the pages and the ranges in the
+        * io trees to avoid deadlocks when flushing delalloc (requires locking
+        * pages) and when waiting for ordered extents to complete (they require
+        * range locking).
+        */
+       if (ret == -EAGAIN) {
+               /*
+                * Ranges in the io trees already unlocked. Now unlock all
+                * pages before waiting for all IO to complete.
+                */
+               btrfs_cmp_data_free(&cmp);
+               if (same_inode) {
+                       btrfs_wait_ordered_range(src, same_lock_start,
+                                                same_lock_len);
+               } else {
+                       btrfs_wait_ordered_range(src, loff, len);
+                       btrfs_wait_ordered_range(dst, dst_loff, len);
+               }
+               goto again;
+       }
+       ASSERT(ret == 0);
+       if (WARN_ON(ret)) {
+               /* ranges in the io trees already unlocked */
+               btrfs_cmp_data_free(&cmp);
+               return ret;
+       }
 
        /* pass original length for comparison so we stay within i_size */
        ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3097,7 +3170,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
        btrfs_cmp_data_free(&cmp);
 out_unlock:
        if (same_inode)
-               mutex_unlock(&src->i_mutex);
+               inode_unlock(src);
        else
                btrfs_double_inode_unlock(src, dst);
 
@@ -3745,7 +3818,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
        if (!same_inode) {
                btrfs_double_inode_lock(src, inode);
        } else {
-               mutex_lock(&src->i_mutex);
+               inode_lock(src);
        }
 
        /* determine range to clone */
@@ -3791,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
                u64 lock_start = min_t(u64, off, destoff);
                u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
 
-               lock_extent_range(src, lock_start, lock_len);
+               ret = lock_extent_range(src, lock_start, lock_len, true);
        } else {
-               btrfs_double_extent_lock(src, off, inode, destoff, len);
+               ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
+                                              true);
+       }
+       ASSERT(ret == 0);
+       if (WARN_ON(ret)) {
+               /* ranges in the io trees already unlocked */
+               goto out_unlock;
        }
 
        ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
@@ -3816,7 +3895,7 @@ out_unlock:
        if (!same_inode)
                btrfs_double_inode_unlock(src, inode);
        else
-               mutex_unlock(&src->i_mutex);
+               inode_unlock(src);
        return ret;
 }
 
index 6d707545f775119e4883378636b5255a9dcc9144..55161369fab14d7d2381c2c3950a576c895be2e8 100644 (file)
@@ -609,13 +609,28 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
        return 1;
 }
 
+static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
+                                 int index)
+{
+       return stripe * rbio->stripe_npages + index;
+}
+
+/*
+ * these are just the pages from the rbio array, not from anything
+ * the FS sent down to us
+ */
+static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
+                                    int index)
+{
+       return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
+}
+
 /*
  * helper to index into the pstripe
  */
 static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
-       index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data, index);
 }
 
 /*
@@ -626,10 +641,7 @@ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
        if (rbio->nr_data + 1 == rbio->real_stripes)
                return NULL;
-
-       index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
-               PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
 }
 
 /*
@@ -889,6 +901,7 @@ static void raid_write_end_io(struct bio *bio)
 {
        struct btrfs_raid_bio *rbio = bio->bi_private;
        int err = bio->bi_error;
+       int max_errors;
 
        if (err)
                fail_bio_stripe(rbio, bio);
@@ -901,7 +914,9 @@ static void raid_write_end_io(struct bio *bio)
        err = 0;
 
        /* OK, we have read all the stripes we need to. */
-       if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+       max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+                    0 : rbio->bbio->max_errors;
+       if (atomic_read(&rbio->error) > max_errors)
                err = -EIO;
 
        rbio_orig_end_io(rbio, err);
@@ -947,8 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
  */
 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 {
-       unsigned long nr = stripe_len * nr_stripes;
-       return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
+       return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
 }
 
 /*
@@ -966,8 +980,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
        void *p;
 
        rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
-                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
-                       GFP_NOFS);
+                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
+                      sizeof(long), GFP_NOFS);
        if (!rbio)
                return ERR_PTR(-ENOMEM);
 
@@ -1021,18 +1035,17 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
                if (!page)
                        return -ENOMEM;
                rbio->stripe_pages[i] = page;
-               ClearPageUptodate(page);
        }
        return 0;
 }
 
-/* allocate pages for just the p/q stripes */
+/* only allocate pages for p/q stripes */
 static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
 {
        int i;
        struct page *page;
 
-       i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
+       i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
 
        for (; i < rbio->nr_pages; i++) {
                if (rbio->stripe_pages[i])
@@ -1120,18 +1133,6 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
        }
 }
 
-/*
- * these are just the pages from the rbio array, not from anything
- * the FS sent down to us
- */
-static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
-{
-       int index;
-       index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
-       index += page;
-       return rbio->stripe_pages[index];
-}
-
 /*
  * helper function to walk our bio list and populate the bio_pages array with
  * the result.  This seems expensive, but it is faster than constantly
@@ -1175,7 +1176,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 {
        struct btrfs_bio *bbio = rbio->bbio;
        void *pointers[rbio->real_stripes];
-       int stripe_len = rbio->stripe_len;
        int nr_data = rbio->nr_data;
        int stripe;
        int pagenr;
@@ -1183,7 +1183,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        int q_stripe = -1;
        struct bio_list bio_list;
        struct bio *bio;
-       int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
        int ret;
 
        bio_list_init(&bio_list);
@@ -1226,7 +1225,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        else
                clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 
-       for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                struct page *p;
                /* first collect one page from each data stripe */
                for (stripe = 0; stripe < nr_data; stripe++) {
@@ -1268,7 +1267,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
         * everything else.
         */
        for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        if (stripe < rbio->nr_data) {
                                page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1292,7 +1291,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
                if (!bbio->tgtdev_map[stripe])
                        continue;
 
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        if (stripe < rbio->nr_data) {
                                page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1506,7 +1505,6 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
        int bios_to_read = 0;
        struct bio_list bio_list;
        int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        int pagenr;
        int stripe;
        struct bio *bio;
@@ -1525,7 +1523,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
         * stripe
         */
        for (stripe = 0; stripe < rbio->nr_data; stripe++) {
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        /*
                         * we want to find all the pages missing from
@@ -1801,7 +1799,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
        int pagenr, stripe;
        void **pointers;
        int faila = -1, failb = -1;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        struct page *page;
        int err;
        int i;
@@ -1824,7 +1821,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 
        index_rbio_pages(rbio);
 
-       for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                /*
                 * Now we just use bitmap to mark the horizontal stripes in
                 * which we have data when doing parity scrub.
@@ -1935,7 +1932,7 @@ pstripe:
                 * other endio functions will fiddle the uptodate bits
                 */
                if (rbio->operation == BTRFS_RBIO_WRITE) {
-                       for (i = 0;  i < nr_pages; i++) {
+                       for (i = 0;  i < rbio->stripe_npages; i++) {
                                if (faila != -1) {
                                        page = rbio_stripe_page(rbio, faila, i);
                                        SetPageUptodate(page);
@@ -2031,7 +2028,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
        int bios_to_read = 0;
        struct bio_list bio_list;
        int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        int pagenr;
        int stripe;
        struct bio *bio;
@@ -2055,7 +2051,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
                        continue;
                }
 
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *p;
 
                        /*
@@ -2279,37 +2275,11 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
                        if (!page)
                                return -ENOMEM;
                        rbio->stripe_pages[index] = page;
-                       ClearPageUptodate(page);
                }
        }
        return 0;
 }
 
-/*
- * end io function used by finish_rmw.  When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_parity_end_io(struct bio *bio)
-{
-       struct btrfs_raid_bio *rbio = bio->bi_private;
-       int err = bio->bi_error;
-
-       if (bio->bi_error)
-               fail_bio_stripe(rbio, bio);
-
-       bio_put(bio);
-
-       if (!atomic_dec_and_test(&rbio->stripes_pending))
-               return;
-
-       err = 0;
-
-       if (atomic_read(&rbio->error))
-               err = -EIO;
-
-       rbio_orig_end_io(rbio, err);
-}
-
 static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                                         int need_check)
 {
@@ -2462,7 +2432,7 @@ submit_write:
                        break;
 
                bio->bi_private = rbio;
-               bio->bi_end_io = raid_write_parity_end_io;
+               bio->bi_end_io = raid_write_end_io;
                submit_bio(WRITE, bio);
        }
        return;
index ef6d8fc85853851da0da03923b75421982d65d8f..2bd0011450df2715ca926a6ccd2d99cb7d40f8b4 100644 (file)
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
            root_objectid == BTRFS_TREE_LOG_OBJECTID ||
            root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
            root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-           root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+           root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+           root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
                return 1;
        return 0;
 }
@@ -3030,7 +3031,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
        int ret = 0;
 
        BUG_ON(cluster->start != cluster->boundary[0]);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = btrfs_check_data_free_space(inode, cluster->start,
                                          cluster->end + 1 - cluster->start);
@@ -3057,7 +3058,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
        btrfs_free_reserved_data_space(inode, cluster->start,
                                       cluster->end + 1 - cluster->start);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 0c981ebe2acb427d5684bca46cc1281e61753905..92bf5ee732fb0e14f7e330dddaadccb4b70b35cc 100644 (file)
@@ -2813,7 +2813,7 @@ out:
 
 static inline int scrub_calc_parity_bitmap_len(int nsectors)
 {
-       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
 }
 
 static void scrub_parity_get(struct scrub_parity *sparity)
@@ -3458,7 +3458,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
                return ret;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (em->start != chunk_offset)
                goto out;
 
@@ -4279,7 +4279,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
                return PTR_ERR(inode);
 
        /* Avoid truncate/dio/punch hole.. */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        inode_dio_wait(inode);
 
        physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
@@ -4358,7 +4358,7 @@ next_page:
        }
        ret = COPY_COMPLETE;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        iput(inode);
        return ret;
 }
index 9b9eab6d048e93d32963b0c66a6d9ab6c022ee63..d41e09fe8e38d77674862c7fe61f610a81d1159d 100644 (file)
@@ -383,6 +383,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
        int ret = 0;
        char *compress_type;
        bool compress_force = false;
+       enum btrfs_compression_type saved_compress_type;
+       bool saved_compress_force;
+       int no_compress = 0;
 
        cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
        if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
@@ -462,6 +465,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        /* Fallthrough */
                case Opt_compress:
                case Opt_compress_type:
+                       saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+                               info->compress_type : BTRFS_COMPRESS_NONE;
+                       saved_compress_force =
+                               btrfs_test_opt(root, FORCE_COMPRESS);
                        if (token == Opt_compress ||
                            token == Opt_compress_force ||
                            strcmp(args[0].from, "zlib") == 0) {
@@ -470,6 +477,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_set_opt(info->mount_opt, COMPRESS);
                                btrfs_clear_opt(info->mount_opt, NODATACOW);
                                btrfs_clear_opt(info->mount_opt, NODATASUM);
+                               no_compress = 0;
                        } else if (strcmp(args[0].from, "lzo") == 0) {
                                compress_type = "lzo";
                                info->compress_type = BTRFS_COMPRESS_LZO;
@@ -477,25 +485,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_clear_opt(info->mount_opt, NODATACOW);
                                btrfs_clear_opt(info->mount_opt, NODATASUM);
                                btrfs_set_fs_incompat(info, COMPRESS_LZO);
+                               no_compress = 0;
                        } else if (strncmp(args[0].from, "no", 2) == 0) {
                                compress_type = "no";
                                btrfs_clear_opt(info->mount_opt, COMPRESS);
                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                                compress_force = false;
+                               no_compress++;
                        } else {
                                ret = -EINVAL;
                                goto out;
                        }
 
                        if (compress_force) {
-                               btrfs_set_and_info(root, FORCE_COMPRESS,
-                                                  "force %s compression",
-                                                  compress_type);
+                               btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
                        } else {
-                               if (!btrfs_test_opt(root, COMPRESS))
-                                       btrfs_info(root->fs_info,
-                                                  "btrfs: use %s compression",
-                                                  compress_type);
                                /*
                                 * If we remount from compress-force=xxx to
                                 * compress=xxx, we need clear FORCE_COMPRESS
@@ -504,6 +508,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 */
                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                        }
+                       if ((btrfs_test_opt(root, COMPRESS) &&
+                            (info->compress_type != saved_compress_type ||
+                             compress_force != saved_compress_force)) ||
+                           (!btrfs_test_opt(root, COMPRESS) &&
+                            no_compress == 1)) {
+                               btrfs_info(root->fs_info,
+                                          "%s %s compression",
+                                          (compress_force) ? "force" : "use",
+                                          compress_type);
+                       }
+                       compress_force = false;
                        break;
                case Opt_ssd:
                        btrfs_set_and_info(root, SSD,
index e0ac85949067c30191ce3635e2a65edbe8a56361..539e7b5e3f86a83059f070409fdb36749a958ce4 100644 (file)
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(raid56),
        BTRFS_FEAT_ATTR_PTR(skinny_metadata),
        BTRFS_FEAT_ATTR_PTR(no_holes),
+       BTRFS_FEAT_ATTR_PTR(free_space_tree),
        NULL
 };
 
@@ -780,6 +782,39 @@ failure:
        return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct kobject *fsid_kobj;
+       u64 features;
+       int ret;
+
+       if (!fs_info)
+               return;
+
+       features = get_features(fs_info, set);
+       ASSERT(bit & supported_feature_masks[set]);
+
+       fs_devs = fs_info->fs_devices;
+       fsid_kobj = &fs_devs->fsid_kobj;
+
+       if (!fsid_kobj->state_initialized)
+               return;
+
+       /*
+        * FIXME: this is too heavy to update just one value, ideally we'd like
+        * to use sysfs_update_group but some refactoring is needed first.
+        */
+       sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+       ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
index 9c09522125a6b26d0577e139c38ebad7ca5c8331..d7da1a4c2f6c12d04a8f29e4d8f503943058d46d 100644 (file)
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                            \
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
                                struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */
index b1d920b3007017c2014d28217cb5efa1d7ed4d96..0e1e61a7ec23265d292e165fd39f1f8d2080f604 100644 (file)
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
        struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-                                               GFP_NOFS);
+                                               GFP_KERNEL);
 
        if (!fs_info)
                return fs_info;
        fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->fs_devices) {
                kfree(fs_info);
                return NULL;
        }
        fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->super_copy) {
                kfree(fs_info->fs_devices);
                kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
 {
        struct btrfs_block_group_cache *cache;
 
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       cache = kzalloc(sizeof(*cache), GFP_KERNEL);
        if (!cache)
                return NULL;
        cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
+                                       GFP_KERNEL);
        if (!cache->free_space_ctl) {
                kfree(cache);
                return NULL;
index e29fa297e053951a2d2d178a8793c9d49e021023..669b58201e36881fb1434d76fd787a45329f042e 100644 (file)
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
         * test.
         */
        for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
                if (!page) {
                        test_msg("Failed to allocate test page\n");
                        ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
         * |--- delalloc ---|
         * |---  search  ---|
         */
-       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
        start = 0;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
                test_msg("Couldn't find the locked page\n");
                goto out_bits;
        }
-       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
         *
         * We are re-using our test_start from above since it works out well.
         */
-       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
        }
        ret = 0;
 out_bits:
-       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
        if (locked_page)
                page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
 
        test_msg("Running extent buffer bitmap tests\n");
 
-       bitmap = kmalloc(len, GFP_NOFS);
+       bitmap = kmalloc(len, GFP_KERNEL);
        if (!bitmap) {
                test_msg("Couldn't allocate test bitmap\n");
                return -ENOMEM;
index 5de55fdd28bcda6aca3d0a2becf5bed8e96f1b63..e2d3da02deee4b73b9f9c96ed1e6cc9f4c1b5700 100644 (file)
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
                               (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
                               EXTENT_DELALLOC | EXTENT_DIRTY |
                               EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
                               BTRFS_MAX_EXTENT_SIZE+8191,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1096,7 +1096,7 @@ out:
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
                                 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                                NULL, GFP_NOFS);
+                                NULL, GFP_KERNEL);
        iput(inode);
        btrfs_free_dummy_root(root);
        return ret;
index 323e12cc9d2f522388fe929ed66d4dd946b5881d..978c3a8108936381309de4681e90400aeb86874f 100644 (file)
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct inode *inode,
                                     struct btrfs_path *path,
                                     struct list_head *logged_list,
-                                    struct btrfs_log_ctx *ctx)
+                                    struct btrfs_log_ctx *ctx,
+                                    const u64 start,
+                                    const u64 end)
 {
        struct extent_map *em, *n;
        struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        }
 
        list_sort(NULL, &extents, extent_cmp);
-
+       /*
+        * Collect any new ordered extents within the range. This is to
+        * prevent logging file extent items without waiting for the disk
+        * location they point to being written. We do this only to deal
+        * with races against concurrent lockless direct IO writes.
+        */
+       btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
        while (!list_empty(&extents)) {
                em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
                        goto out_unlock;
                }
                ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-                                               &logged_list, ctx);
+                                               &logged_list, ctx, start, end);
                if (ret) {
                        err = ret;
                        goto out_unlock;
index c32abbca9d77f65684b85d137fa41bec8bac005c..366b335946fae763e380ed884792df5efe07a2c9 100644 (file)
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        },
 };
 
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
        [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
        [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
@@ -233,6 +233,7 @@ static struct btrfs_device *__alloc_device(void)
        spin_lock_init(&dev->reada_lock);
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
+       btrfs_device_data_ordered_init(dev);
        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
@@ -1183,7 +1184,7 @@ again:
                struct map_lookup *map;
                int i;
 
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                for (i = 0; i < map->num_stripes; i++) {
                        u64 end;
 
@@ -2755,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                        free_extent_map(em);
                return -EINVAL;
        }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        lock_chunks(root->fs_info->chunk_root);
        check_system_chunk(trans, extent_root, map->type);
        unlock_chunks(root->fs_info->chunk_root);
@@ -3751,7 +3752,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
                btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
                btrfs_warn(fs_info,
-       "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx",
+       "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
                        bctl->meta.target, bctl->data.target);
        }
 
@@ -4718,7 +4719,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                goto error;
        }
        set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
        em->start = start;
        em->len = num_bytes;
        em->block_start = 0;
@@ -4813,7 +4814,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                return -EINVAL;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        item_size = btrfs_chunk_item_size(map->num_stripes);
        stripe_size = em->orig_block_len;
 
@@ -4968,7 +4969,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
        if (!em)
                return 1;
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        for (i = 0; i < map->num_stripes; i++) {
                if (map->stripes[i].dev->missing) {
                        miss_ndevs++;
@@ -5048,7 +5049,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
                return 1;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
                ret = map->num_stripes;
        else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5084,7 +5085,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                len = map->stripe_len * nr_data_stripes(map);
        free_extent_map(em);
@@ -5105,7 +5106,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                ret = 1;
        free_extent_map(em);
@@ -5264,7 +5265,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                return -EINVAL;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        offset = logical - em->start;
 
        stripe_len = map->stripe_len;
@@ -5378,35 +5379,33 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                 * target drive.
                 */
                for (i = 0; i < tmp_num_stripes; i++) {
-                       if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
-                               /*
-                                * In case of DUP, in order to keep it
-                                * simple, only add the mirror with the
-                                * lowest physical address
-                                */
-                               if (found &&
-                                   physical_of_found <=
-                                    tmp_bbio->stripes[i].physical)
-                                       continue;
-                               index_srcdev = i;
-                               found = 1;
-                               physical_of_found =
-                                       tmp_bbio->stripes[i].physical;
-                       }
+                       if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+                               continue;
+
+                       /*
+                        * In case of DUP, in order to keep it simple, only add
+                        * the mirror with the lowest physical address
+                        */
+                       if (found &&
+                           physical_of_found <= tmp_bbio->stripes[i].physical)
+                               continue;
+
+                       index_srcdev = i;
+                       found = 1;
+                       physical_of_found = tmp_bbio->stripes[i].physical;
                }
 
-               if (found) {
-                       mirror_num = index_srcdev + 1;
-                       patch_the_first_stripe_for_dev_replace = 1;
-                       physical_to_patch_in_first_stripe = physical_of_found;
-               } else {
+               btrfs_put_bbio(tmp_bbio);
+
+               if (!found) {
                        WARN_ON(1);
                        ret = -EIO;
-                       btrfs_put_bbio(tmp_bbio);
                        goto out;
                }
 
-               btrfs_put_bbio(tmp_bbio);
+               mirror_num = index_srcdev + 1;
+               patch_the_first_stripe_for_dev_replace = 1;
+               physical_to_patch_in_first_stripe = physical_of_found;
        } else if (mirror_num > map->num_stripes) {
                mirror_num = 0;
        }
@@ -5806,7 +5805,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                free_extent_map(em);
                return -EIO;
        }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
 
        length = em->len;
        rmap_len = map->stripe_len;
@@ -6069,7 +6068,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
        bbio->fs_info = root->fs_info;
        atomic_set(&bbio->stripes_pending, bbio->num_stripes);
 
-       if (bbio->raid_map) {
+       if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+           ((rw & WRITE) || (mirror_num > 1))) {
                /* In this case, map_length has been set to the length of
                   a single stripe; not the whole write */
                if (rw & WRITE) {
@@ -6210,6 +6210,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        struct extent_map *em;
        u64 logical;
        u64 length;
+       u64 stripe_len;
        u64 devid;
        u8 uuid[BTRFS_UUID_SIZE];
        int num_stripes;
@@ -6218,6 +6219,37 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       /* Validation check */
+       if (!num_stripes) {
+               btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+                         num_stripes);
+               return -EIO;
+       }
+       if (!IS_ALIGNED(logical, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                         "invalid chunk logical %llu", logical);
+               return -EIO;
+       }
+       if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                       "invalid chunk length %llu", length);
+               return -EIO;
+       }
+       if (!is_power_of_2(stripe_len)) {
+               btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+                         stripe_len);
+               return -EIO;
+       }
+       if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+           btrfs_chunk_type(leaf, chunk)) {
+               btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+                         ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+                           BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+                         btrfs_chunk_type(leaf, chunk));
+               return -EIO;
+       }
 
        read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6234,7 +6266,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        em = alloc_extent_map();
        if (!em)
                return -ENOMEM;
-       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
        map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
        if (!map) {
                free_extent_map(em);
@@ -6242,7 +6273,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        }
 
        set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
        em->start = logical;
        em->len = length;
        em->orig_start = 0;
@@ -6944,7 +6975,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
        /* In order to kick the device replace finish process */
        lock_chunks(root);
        list_for_each_entry(em, &transaction->pending_chunks, list) {
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
 
                for (i = 0; i < map->num_stripes; i++) {
                        dev = map->stripes[i].dev;
index fd953c361a43c7c7f0faf3e100df12b052c06552..6c68d6356197afb630443f383100894227b07e65 100644 (file)
@@ -126,7 +126,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
         * locks the inode's i_mutex before calling setxattr or removexattr.
         */
        if (flags & XATTR_REPLACE) {
-               ASSERT(mutex_is_locked(&inode->i_mutex));
+               ASSERT(inode_is_locked(inode));
                di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
                                        name, name_len, 0);
                if (!di)
index afa023dded5be937ddd713847b37f2d165f586be..675a3332d72fad1eed4cba643959dbb9dabde051 100644 (file)
@@ -446,7 +446,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
                return 0;
 
        cachefiles_begin_secure(cache, &saved_cred);
-       mutex_lock(&d_inode(object->backer)->i_mutex);
+       inode_lock(d_inode(object->backer));
 
        /* if there's an extension to a partial page at the end of the backing
         * file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
        ret = notify_change(object->backer, &newattrs, NULL);
 
 truncate_failed:
-       mutex_unlock(&d_inode(object->backer)->i_mutex);
+       inode_unlock(d_inode(object->backer));
        cachefiles_end_secure(cache, saved_cred);
 
        if (ret == -EIO) {
index c4b893453e0eefda8475b3537e934f2bfc46d268..1c2334c163ddea17f99298b89ec58e09c51d3bd0 100644 (file)
@@ -295,7 +295,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                                cachefiles_mark_object_buried(cache, rep, why);
                }
 
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
 
                if (ret == -EIO)
                        cachefiles_io_error(cache, "Unlink failed");
@@ -306,7 +306,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 
        /* directories have to be moved to the graveyard */
        _debug("move stale object to graveyard");
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
 try_again:
        /* first step is to make up a grave dentry in the graveyard */
@@ -423,13 +423,13 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
 
        dir = dget_parent(object->dentry);
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {
                /* object allocation for the same key preemptively deleted this
                 * object's file so that it could create its own file */
                _debug("object preemptively buried");
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                ret = 0;
        } else {
                /* we need to check that our parent is _still_ our parent - it
@@ -442,7 +442,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
                        /* it got moved, presumably by cachefilesd culling it,
                         * so it's no longer in the key path and we can ignore
                         * it */
-                       mutex_unlock(&d_inode(dir)->i_mutex);
+                       inode_unlock(d_inode(dir));
                        ret = 0;
                }
        }
@@ -501,7 +501,7 @@ lookup_again:
        /* search the current directory for the element name */
        _debug("lookup '%s'", name);
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        start = jiffies;
        next = lookup_one_len(name, dir, nlen);
@@ -585,7 +585,7 @@ lookup_again:
        /* process the next component */
        if (key) {
                _debug("advance");
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                dput(dir);
                dir = next;
                next = NULL;
@@ -623,7 +623,7 @@ lookup_again:
        /* note that we're now using this object */
        ret = cachefiles_mark_object_active(cache, object);
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(dir);
        dir = NULL;
 
@@ -705,7 +705,7 @@ lookup_error:
                cachefiles_io_error(cache, "Lookup failed");
        next = NULL;
 error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(next);
 error_out2:
        dput(dir);
@@ -729,7 +729,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        _enter(",,%s", dirname);
 
        /* search the current directory for the element name */
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
 
        start = jiffies;
        subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -768,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
                       d_backing_inode(subdir)->i_ino);
        }
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        /* we need to make sure the subdir is a directory */
        ASSERT(d_backing_inode(subdir));
@@ -800,19 +800,19 @@ check_error:
        return ERR_PTR(ret);
 
 mkdir_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(subdir);
        pr_err("mkdir %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 lookup_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = PTR_ERR(subdir);
        pr_err("Lookup %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 nomem_d_alloc:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        _leave(" = -ENOMEM");
        return ERR_PTR(-ENOMEM);
 }
@@ -837,7 +837,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
        //       dir, filename);
 
        /* look up the victim */
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        start = jiffies;
        victim = lookup_one_len(filename, dir, strlen(filename));
@@ -852,7 +852,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
         * at the netfs's request whilst the cull was in progress
         */
        if (d_is_negative(victim)) {
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                dput(victim);
                _leave(" = -ENOENT [absent]");
                return ERR_PTR(-ENOENT);
@@ -881,13 +881,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 
 object_in_use:
        read_unlock(&cache->active_lock);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(victim);
        //_leave(" = -EBUSY [in use]");
        return ERR_PTR(-EBUSY);
 
 lookup_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = PTR_ERR(victim);
        if (ret == -ENOENT) {
                /* file or dir now absent - probably retired by netfs */
@@ -947,7 +947,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
        return 0;
 
 error_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 error:
        dput(victim);
        if (ret == -ENOENT) {
@@ -982,7 +982,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
        if (IS_ERR(victim))
                return PTR_ERR(victim);
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(victim);
        //_leave(" = 0");
        return 0;
index b7d218a168fb81c2c028ea38ffccf2a17a1d68d5..c22213789090f975b3680b10e022a1e5f2f9fdb4 100644 (file)
@@ -1108,7 +1108,7 @@ retry_locked:
                return 0;
 
        /* past end of file? */
-       i_size = inode->i_size;   /* caller holds i_mutex */
+       i_size = i_size_read(inode);
 
        if (page_off >= i_size ||
            (pos_in_page == 0 && (pos+len) >= i_size &&
@@ -1149,7 +1149,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
                page = grab_cache_page_write_begin(mapping, index, 0);
                if (!page)
                        return -ENOMEM;
-               *pagep = page;
 
                dout("write_begin file %p inode %p page %p %d~%d\n", file,
                     inode, page, (int)pos, (int)len);
@@ -1184,8 +1183,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
                zero_user_segment(page, from+copied, len);
 
        /* did file size increase? */
-       /* (no need for i_size_read(); we caller holds i_mutex */
-       if (pos+copied > inode->i_size)
+       if (pos+copied > i_size_read(inode))
                check_cap = ceph_inode_set_size(inode, pos+copied);
 
        if (!PageUptodate(page))
@@ -1378,11 +1376,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        ret = VM_FAULT_NOPAGE;
        if ((off > size) ||
-           (page->mapping != inode->i_mapping))
+           (page->mapping != inode->i_mapping)) {
+               unlock_page(page);
                goto out;
+       }
 
        ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
-       if (ret == 0) {
+       if (ret >= 0) {
                /* success.  we'll keep the page locked. */
                set_page_dirty(page);
                ret = VM_FAULT_LOCKED;
@@ -1393,8 +1393,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                        ret = VM_FAULT_SIGBUS;
        }
 out:
-       if (ret != VM_FAULT_LOCKED)
-               unlock_page(page);
        if (ret == VM_FAULT_LOCKED ||
            ci->i_inline_version != CEPH_INLINE_NONE) {
                int dirty;
index a4766ded1ba78e8bbbff70d4c5e65b3f9060d687..a351480dbabc95891e4b61f83fb92485f8ea7b18 100644 (file)
@@ -106,7 +106,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
 
        memset(&aux, 0, sizeof(aux));
        aux.mtime = inode->i_mtime;
-       aux.size = inode->i_size;
+       aux.size = i_size_read(inode);
 
        memcpy(buffer, &aux, sizeof(aux));
 
@@ -117,9 +117,7 @@ static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
                                        uint64_t *size)
 {
        const struct ceph_inode_info* ci = cookie_netfs_data;
-       const struct inode* inode = &ci->vfs_inode;
-
-       *size = inode->i_size;
+       *size = i_size_read(&ci->vfs_inode);
 }
 
 static enum fscache_checkaux ceph_fscache_inode_check_aux(
@@ -134,7 +132,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
 
        memset(&aux, 0, sizeof(aux));
        aux.mtime = inode->i_mtime;
-       aux.size = inode->i_size;
+       aux.size = i_size_read(inode);
 
        if (memcmp(data, &aux, sizeof(aux)) != 0)
                return FSCACHE_CHECKAUX_OBSOLETE;
@@ -197,7 +195,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
                return;
 
        /* Avoid multiple racing open requests */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (ci->fscache)
                goto done;
@@ -207,7 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
                                             ci, true);
        fscache_check_consistency(ci->fscache);
 done:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
 }
 
index c69e1253b47bfbefb2eb2cb1564a5c4b9d87f9a3..cdbf8cf3d52c3354569b7c86a709f4ce3fc09e2c 100644 (file)
@@ -2030,7 +2030,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (datasync)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -2046,7 +2046,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = wait_event_interruptible(ci->i_cap_wq,
                                        caps_are_flushed(inode, flush_tid));
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
        return ret;
index 9314b4ea2375145647aa16446a1f33ae2c8106b2..fd11fb231a2ea796e86eae933265a488ad6774e5 100644 (file)
@@ -507,7 +507,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
        loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
        loff_t retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        retval = -EINVAL;
        switch (whence) {
        case SEEK_CUR:
@@ -542,7 +542,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                }
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return retval;
 }
 
index fe02ae7f056a332220929ce653e822a0a39e2605..3b31723573268e924346c1bffddb7361390b2a8d 100644 (file)
@@ -215,7 +215,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        req->r_inode = d_inode(child);
        ihold(d_inode(child));
@@ -224,7 +224,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
 
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        if (!err) {
                struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
index 3c68e6aee2f0531bdd8afb0125bbae48419d0dfb..eb9028e8cfc5197ef1ea99524fcbc67d67b4df91 100644 (file)
@@ -397,8 +397,9 @@ int ceph_release(struct inode *inode, struct file *file)
 }
 
 enum {
-       CHECK_EOF = 1,
-       READ_INLINE = 2,
+       HAVE_RETRIED = 1,
+       CHECK_EOF =    2,
+       READ_INLINE =  3,
 };
 
 /*
@@ -411,17 +412,15 @@ enum {
 static int striped_read(struct inode *inode,
                        u64 off, u64 len,
                        struct page **pages, int num_pages,
-                       int *checkeof, bool o_direct,
-                       unsigned long buf_align)
+                       int *checkeof)
 {
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 pos, this_len, left;
-       int io_align, page_align;
-       int pages_left;
-       int read;
+       loff_t i_size;
+       int page_align, pages_left;
+       int read, ret;
        struct page **page_pos;
-       int ret;
        bool hit_stripe, was_short;
 
        /*
@@ -432,13 +431,9 @@ static int striped_read(struct inode *inode,
        page_pos = pages;
        pages_left = num_pages;
        read = 0;
-       io_align = off & ~PAGE_MASK;
 
 more:
-       if (o_direct)
-               page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
-       else
-               page_align = pos & ~PAGE_MASK;
+       page_align = pos & ~PAGE_MASK;
        this_len = left;
        ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
                                  &ci->i_layout, pos, &this_len,
@@ -452,13 +447,12 @@ more:
        dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
             ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
 
+       i_size = i_size_read(inode);
        if (ret >= 0) {
                int didpages;
-               if (was_short && (pos + ret < inode->i_size)) {
-                       int zlen = min(this_len - ret,
-                                      inode->i_size - pos - ret);
-                       int zoff = (o_direct ? buf_align : io_align) +
-                                   read + ret;
+               if (was_short && (pos + ret < i_size)) {
+                       int zlen = min(this_len - ret, i_size - pos - ret);
+                       int zoff = (off & ~PAGE_MASK) + read + ret;
                        dout(" zero gap %llu to %llu\n",
                                pos + ret, pos + ret + zlen);
                        ceph_zero_page_vector_range(zoff, zlen, pages);
@@ -473,14 +467,14 @@ more:
                pages_left -= didpages;
 
                /* hit stripe and need continue*/
-               if (left && hit_stripe && pos < inode->i_size)
+               if (left && hit_stripe && pos < i_size)
                        goto more;
        }
 
        if (read > 0) {
                ret = read;
                /* did we bounce off eof? */
-               if (pos + left > inode->i_size)
+               if (pos + left > i_size)
                        *checkeof = CHECK_EOF;
        }
 
@@ -521,54 +515,28 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
        if (ret < 0)
                return ret;
 
-       if (iocb->ki_flags & IOCB_DIRECT) {
-               while (iov_iter_count(i)) {
-                       size_t start;
-                       ssize_t n;
-
-                       n = dio_get_pagev_size(i);
-                       pages = dio_get_pages_alloc(i, n, &start, &num_pages);
-                       if (IS_ERR(pages))
-                               return PTR_ERR(pages);
-
-                       ret = striped_read(inode, off, n,
-                                          pages, num_pages, checkeof,
-                                          1, start);
-
-                       ceph_put_page_vector(pages, num_pages, true);
-
-                       if (ret <= 0)
-                               break;
-                       off += ret;
-                       iov_iter_advance(i, ret);
-                       if (ret < n)
+       num_pages = calc_pages_for(off, len);
+       pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+       if (IS_ERR(pages))
+               return PTR_ERR(pages);
+       ret = striped_read(inode, off, len, pages,
+                               num_pages, checkeof);
+       if (ret > 0) {
+               int l, k = 0;
+               size_t left = ret;
+
+               while (left) {
+                       size_t page_off = off & ~PAGE_MASK;
+                       size_t copy = min_t(size_t, left,
+                                           PAGE_SIZE - page_off);
+                       l = copy_page_to_iter(pages[k++], page_off, copy, i);
+                       off += l;
+                       left -= l;
+                       if (l < copy)
                                break;
                }
-       } else {
-               num_pages = calc_pages_for(off, len);
-               pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
-               if (IS_ERR(pages))
-                       return PTR_ERR(pages);
-               ret = striped_read(inode, off, len, pages,
-                                       num_pages, checkeof, 0, 0);
-               if (ret > 0) {
-                       int l, k = 0;
-                       size_t left = ret;
-
-                       while (left) {
-                               size_t page_off = off & ~PAGE_MASK;
-                               size_t copy = min_t(size_t,
-                                                   PAGE_SIZE - page_off, left);
-                               l = copy_page_to_iter(pages[k++], page_off,
-                                                     copy, i);
-                               off += l;
-                               left -= l;
-                               if (l < copy)
-                                       break;
-                       }
-               }
-               ceph_release_page_vector(pages, num_pages);
        }
+       ceph_release_page_vector(pages, num_pages);
 
        if (off > iocb->ki_pos) {
                ret = off - iocb->ki_pos;
@@ -579,6 +547,193 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
        return ret;
 }
 
+struct ceph_aio_request {
+       struct kiocb *iocb;
+       size_t total_len;
+       int write;
+       int error;
+       struct list_head osd_reqs;
+       unsigned num_reqs;
+       atomic_t pending_reqs;
+       struct timespec mtime;
+       struct ceph_cap_flush *prealloc_cf;
+};
+
+struct ceph_aio_work {
+       struct work_struct work;
+       struct ceph_osd_request *req;
+};
+
+static void ceph_aio_retry_work(struct work_struct *work);
+
+static void ceph_aio_complete(struct inode *inode,
+                             struct ceph_aio_request *aio_req)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       int ret;
+
+       if (!atomic_dec_and_test(&aio_req->pending_reqs))
+               return;
+
+       ret = aio_req->error;
+       if (!ret)
+               ret = aio_req->total_len;
+
+       dout("ceph_aio_complete %p rc %d\n", inode, ret);
+
+       if (ret >= 0 && aio_req->write) {
+               int dirty;
+
+               loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
+               if (endoff > i_size_read(inode)) {
+                       if (ceph_inode_set_size(inode, endoff))
+                               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+               }
+
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_inline_version = CEPH_INLINE_NONE;
+               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+                                              &aio_req->prealloc_cf);
+               spin_unlock(&ci->i_ceph_lock);
+               if (dirty)
+                       __mark_inode_dirty(inode, dirty);
+
+       }
+
+       ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
+                                               CEPH_CAP_FILE_RD));
+
+       aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+
+       ceph_free_cap_flush(aio_req->prealloc_cf);
+       kfree(aio_req);
+}
+
+static void ceph_aio_complete_req(struct ceph_osd_request *req,
+                                 struct ceph_msg *msg)
+{
+       int rc = req->r_result;
+       struct inode *inode = req->r_inode;
+       struct ceph_aio_request *aio_req = req->r_priv;
+       struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+       int num_pages = calc_pages_for((u64)osd_data->alignment,
+                                      osd_data->length);
+
+       dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
+            inode, rc, osd_data->length);
+
+       if (rc == -EOLDSNAPC) {
+               struct ceph_aio_work *aio_work;
+               BUG_ON(!aio_req->write);
+
+               aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
+               if (aio_work) {
+                       INIT_WORK(&aio_work->work, ceph_aio_retry_work);
+                       aio_work->req = req;
+                       queue_work(ceph_inode_to_client(inode)->wb_wq,
+                                  &aio_work->work);
+                       return;
+               }
+               rc = -ENOMEM;
+       } else if (!aio_req->write) {
+               if (rc == -ENOENT)
+                       rc = 0;
+               if (rc >= 0 && osd_data->length > rc) {
+                       int zoff = osd_data->alignment + rc;
+                       int zlen = osd_data->length - rc;
+                       /*
+                        * If read is satisfied by single OSD request,
+                        * it can pass EOF. Otherwise read is within
+                        * i_size.
+                        */
+                       if (aio_req->num_reqs == 1) {
+                               loff_t i_size = i_size_read(inode);
+                               loff_t endoff = aio_req->iocb->ki_pos + rc;
+                               if (endoff < i_size)
+                                       zlen = min_t(size_t, zlen,
+                                                    i_size - endoff);
+                               aio_req->total_len = rc + zlen;
+                       }
+
+                       if (zlen > 0)
+                               ceph_zero_page_vector_range(zoff, zlen,
+                                                           osd_data->pages);
+               }
+       }
+
+       ceph_put_page_vector(osd_data->pages, num_pages, false);
+       ceph_osdc_put_request(req);
+
+       if (rc < 0)
+               cmpxchg(&aio_req->error, 0, rc);
+
+       ceph_aio_complete(inode, aio_req);
+       return;
+}
+
+static void ceph_aio_retry_work(struct work_struct *work)
+{
+       struct ceph_aio_work *aio_work =
+               container_of(work, struct ceph_aio_work, work);
+       struct ceph_osd_request *orig_req = aio_work->req;
+       struct ceph_aio_request *aio_req = orig_req->r_priv;
+       struct inode *inode = orig_req->r_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_snap_context *snapc;
+       struct ceph_osd_request *req;
+       int ret;
+
+       spin_lock(&ci->i_ceph_lock);
+       if (__ceph_have_pending_cap_snap(ci)) {
+               struct ceph_cap_snap *capsnap =
+                       list_last_entry(&ci->i_cap_snaps,
+                                       struct ceph_cap_snap,
+                                       ci_item);
+               snapc = ceph_get_snap_context(capsnap->context);
+       } else {
+               BUG_ON(!ci->i_head_snapc);
+               snapc = ceph_get_snap_context(ci->i_head_snapc);
+       }
+       spin_unlock(&ci->i_ceph_lock);
+
+       req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
+                       false, GFP_NOFS);
+       if (!req) {
+               ret = -ENOMEM;
+               req = orig_req;
+               goto out;
+       }
+
+       req->r_flags =  CEPH_OSD_FLAG_ORDERSNAP |
+                       CEPH_OSD_FLAG_ONDISK |
+                       CEPH_OSD_FLAG_WRITE;
+       req->r_base_oloc = orig_req->r_base_oloc;
+       req->r_base_oid = orig_req->r_base_oid;
+
+       req->r_ops[0] = orig_req->r_ops[0];
+       osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+
+       ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
+                               snapc, CEPH_NOSNAP, &aio_req->mtime);
+
+       ceph_osdc_put_request(orig_req);
+
+       req->r_callback = ceph_aio_complete_req;
+       req->r_inode = inode;
+       req->r_priv = aio_req;
+
+       ret = ceph_osdc_start_request(req->r_osdc, req, false);
+out:
+       if (ret < 0) {
+               BUG_ON(ret == -EOLDSNAPC);
+               req->r_result = ret;
+               ceph_aio_complete_req(req, NULL);
+       }
+
+       ceph_put_snap_context(snapc);
+       kfree(aio_work);
+}
+
 /*
  * Write commit request unsafe callback, called to tell us when a
  * request is unsafe (that is, in flight--has been handed to the
@@ -612,16 +767,10 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
 }
 
 
-/*
- * Synchronous write, straight from __user pointer or user pages.
- *
- * If write spans object boundary, just do multiple writes.  (For a
- * correct atomic write, we should e.g. take write locks on all
- * objects, rollback on failure, etc.)
- */
 static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
-                      struct ceph_snap_context *snapc)
+ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+                      struct ceph_snap_context *snapc,
+                      struct ceph_cap_flush **pcf)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -630,44 +779,52 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
        struct ceph_vino vino;
        struct ceph_osd_request *req;
        struct page **pages;
-       int num_pages;
-       int written = 0;
+       struct ceph_aio_request *aio_req = NULL;
+       int num_pages = 0;
        int flags;
-       int check_caps = 0;
        int ret;
        struct timespec mtime = CURRENT_TIME;
-       size_t count = iov_iter_count(from);
+       size_t count = iov_iter_count(iter);
+       loff_t pos = iocb->ki_pos;
+       bool write = iov_iter_rw(iter) == WRITE;
 
-       if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+       if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
 
-       dout("sync_direct_write on file %p %lld~%u\n", file, pos,
-            (unsigned)count);
+       dout("sync_direct_read_write (%s) on file %p %lld~%u\n",
+            (write ? "write" : "read"), file, pos, (unsigned)count);
 
        ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
        if (ret < 0)
                return ret;
 
-       ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                           pos >> PAGE_CACHE_SHIFT,
-                                           (pos + count) >> PAGE_CACHE_SHIFT);
-       if (ret < 0)
-               dout("invalidate_inode_pages2_range returned %d\n", ret);
+       if (write) {
+               ret = invalidate_inode_pages2_range(inode->i_mapping,
+                                       pos >> PAGE_CACHE_SHIFT,
+                                       (pos + count) >> PAGE_CACHE_SHIFT);
+               if (ret < 0)
+                       dout("invalidate_inode_pages2_range returned %d\n", ret);
 
-       flags = CEPH_OSD_FLAG_ORDERSNAP |
-               CEPH_OSD_FLAG_ONDISK |
-               CEPH_OSD_FLAG_WRITE;
+               flags = CEPH_OSD_FLAG_ORDERSNAP |
+                       CEPH_OSD_FLAG_ONDISK |
+                       CEPH_OSD_FLAG_WRITE;
+       } else {
+               flags = CEPH_OSD_FLAG_READ;
+       }
 
-       while (iov_iter_count(from) > 0) {
-               u64 len = dio_get_pagev_size(from);
-               size_t start;
-               ssize_t n;
+       while (iov_iter_count(iter) > 0) {
+               u64 size = dio_get_pagev_size(iter);
+               size_t start = 0;
+               ssize_t len;
 
                vino = ceph_vino(inode);
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-                                           vino, pos, &len, 0,
-                                           2,/*include a 'startsync' command*/
-                                           CEPH_OSD_OP_WRITE, flags, snapc,
+                                           vino, pos, &size, 0,
+                                           /*include a 'startsync' command*/
+                                           write ? 2 : 1,
+                                           write ? CEPH_OSD_OP_WRITE :
+                                                   CEPH_OSD_OP_READ,
+                                           flags, snapc,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
                                            false);
@@ -676,10 +833,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                        break;
                }
 
-               osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
-
-               n = len;
-               pages = dio_get_pages_alloc(from, len, &start, &num_pages);
+               len = size;
+               pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
                if (IS_ERR(pages)) {
                        ceph_osdc_put_request(req);
                        ret = PTR_ERR(pages);
@@ -687,47 +842,128 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                }
 
                /*
-                * throw out any page cache pages in this range. this
-                * may block.
+                * To simplify error handling, allow AIO when IO within i_size
+                * or IO can be satisfied by single OSD request.
                 */
-               truncate_inode_pages_range(inode->i_mapping, pos,
-                                  (pos+n) | (PAGE_CACHE_SIZE-1));
-               osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
-                                               false, false);
+               if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) &&
+                   (len == count || pos + count <= i_size_read(inode))) {
+                       aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL);
+                       if (aio_req) {
+                               aio_req->iocb = iocb;
+                               aio_req->write = write;
+                               INIT_LIST_HEAD(&aio_req->osd_reqs);
+                               if (write) {
+                                       aio_req->mtime = mtime;
+                                       swap(aio_req->prealloc_cf, *pcf);
+                               }
+                       }
+                       /* ignore error */
+               }
+
+               if (write) {
+                       /*
+                        * throw out any page cache pages in this range. this
+                        * may block.
+                        */
+                       truncate_inode_pages_range(inode->i_mapping, pos,
+                                       (pos+len) | (PAGE_CACHE_SIZE - 1));
+
+                       osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+               }
+
+
+               osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
+                                                false, false);
 
-               /* BUG_ON(vino.snap != CEPH_NOSNAP); */
                ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
 
-               ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+               if (aio_req) {
+                       aio_req->total_len += len;
+                       aio_req->num_reqs++;
+                       atomic_inc(&aio_req->pending_reqs);
+
+                       req->r_callback = ceph_aio_complete_req;
+                       req->r_inode = inode;
+                       req->r_priv = aio_req;
+                       list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
+
+                       pos += len;
+                       iov_iter_advance(iter, len);
+                       continue;
+               }
+
+               ret = ceph_osdc_start_request(req->r_osdc, req, false);
                if (!ret)
                        ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
 
+               size = i_size_read(inode);
+               if (!write) {
+                       if (ret == -ENOENT)
+                               ret = 0;
+                       if (ret >= 0 && ret < len && pos + ret < size) {
+                               int zlen = min_t(size_t, len - ret,
+                                                size - pos - ret);
+                               ceph_zero_page_vector_range(start + ret, zlen,
+                                                           pages);
+                               ret += zlen;
+                       }
+                       if (ret >= 0)
+                               len = ret;
+               }
+
                ceph_put_page_vector(pages, num_pages, false);
 
                ceph_osdc_put_request(req);
-               if (ret)
+               if (ret < 0)
+                       break;
+
+               pos += len;
+               iov_iter_advance(iter, len);
+
+               if (!write && pos >= size)
                        break;
-               pos += n;
-               written += n;
-               iov_iter_advance(from, n);
 
-               if (pos > i_size_read(inode)) {
-                       check_caps = ceph_inode_set_size(inode, pos);
-                       if (check_caps)
+               if (write && pos > size) {
+                       if (ceph_inode_set_size(inode, pos))
                                ceph_check_caps(ceph_inode(inode),
                                                CHECK_CAPS_AUTHONLY,
                                                NULL);
                }
        }
 
-       if (ret != -EOLDSNAPC && written > 0) {
+       if (aio_req) {
+               if (aio_req->num_reqs == 0) {
+                       kfree(aio_req);
+                       return ret;
+               }
+
+               ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR :
+                                             CEPH_CAP_FILE_RD);
+
+               while (!list_empty(&aio_req->osd_reqs)) {
+                       req = list_first_entry(&aio_req->osd_reqs,
+                                              struct ceph_osd_request,
+                                              r_unsafe_item);
+                       list_del_init(&req->r_unsafe_item);
+                       if (ret >= 0)
+                               ret = ceph_osdc_start_request(req->r_osdc,
+                                                             req, false);
+                       if (ret < 0) {
+                               BUG_ON(ret == -EOLDSNAPC);
+                               req->r_result = ret;
+                               ceph_aio_complete_req(req, NULL);
+                       }
+               }
+               return -EIOCBQUEUED;
+       }
+
+       if (ret != -EOLDSNAPC && pos > iocb->ki_pos) {
+               ret = pos - iocb->ki_pos;
                iocb->ki_pos = pos;
-               ret = written;
        }
        return ret;
 }
 
-
 /*
  * Synchronous write, straight from __user pointer or user pages.
  *
@@ -897,8 +1133,14 @@ again:
                     ceph_cap_string(got));
 
                if (ci->i_inline_version == CEPH_INLINE_NONE) {
-                       /* hmm, this isn't really async... */
-                       ret = ceph_sync_read(iocb, to, &retry_op);
+                       if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+                               ret = ceph_direct_read_write(iocb, to,
+                                                            NULL, NULL);
+                               if (ret >= 0 && ret < len)
+                                       retry_op = CHECK_EOF;
+                       } else {
+                               ret = ceph_sync_read(iocb, to, &retry_op);
+                       }
                } else {
                        retry_op = READ_INLINE;
                }
@@ -916,7 +1158,7 @@ again:
                pinned_page = NULL;
        }
        ceph_put_cap_refs(ci, got);
-       if (retry_op && ret >= 0) {
+       if (retry_op > HAVE_RETRIED && ret >= 0) {
                int statret;
                struct page *page = NULL;
                loff_t i_size;
@@ -968,12 +1210,11 @@ again:
                if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
                    ret < len) {
                        dout("sync_read hit hole, ppos %lld < size %lld"
-                            ", reading more\n", iocb->ki_pos,
-                            inode->i_size);
+                            ", reading more\n", iocb->ki_pos, i_size);
 
                        read += ret;
                        len -= ret;
-                       retry_op = 0;
+                       retry_op = HAVE_RETRIED;
                        goto again;
                }
        }
@@ -1014,7 +1255,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (!prealloc_cf)
                return -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1052,7 +1293,7 @@ retry_snap:
        }
 
        dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
-            inode, ceph_vinop(inode), pos, count, inode->i_size);
+            inode, ceph_vinop(inode), pos, count, i_size_read(inode));
        if (fi->fmode & CEPH_FILE_MODE_LAZY)
                want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
        else
@@ -1070,7 +1311,7 @@ retry_snap:
            (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
                struct ceph_snap_context *snapc;
                struct iov_iter data;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                spin_lock(&ci->i_ceph_lock);
                if (__ceph_have_pending_cap_snap(ci)) {
@@ -1088,8 +1329,8 @@ retry_snap:
                /* we might need to revert back to that point */
                data = *from;
                if (iocb->ki_flags & IOCB_DIRECT)
-                       written = ceph_sync_direct_write(iocb, &data, pos,
-                                                        snapc);
+                       written = ceph_direct_read_write(iocb, &data, snapc,
+                                                        &prealloc_cf);
                else
                        written = ceph_sync_write(iocb, &data, pos, snapc);
                if (written == -EOLDSNAPC) {
@@ -1097,14 +1338,14 @@ retry_snap:
                                "got EOLDSNAPC, retrying\n",
                                inode, ceph_vinop(inode),
                                pos, (unsigned)count);
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        goto retry_snap;
                }
                if (written > 0)
                        iov_iter_advance(from, written);
                ceph_put_snap_context(snapc);
        } else {
-               loff_t old_size = inode->i_size;
+               loff_t old_size = i_size_read(inode);
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -1115,9 +1356,9 @@ retry_snap:
                written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
-               if (inode->i_size > old_size)
+               if (i_size_read(inode) > old_size)
                        ceph_fscache_update_objectsize(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        if (written >= 0) {
@@ -1147,7 +1388,7 @@ retry_snap:
        goto out_unlocked;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out_unlocked:
        ceph_free_cap_flush(prealloc_cf);
        current->backing_dev_info = NULL;
@@ -1160,9 +1401,10 @@ out_unlocked:
 static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
 {
        struct inode *inode = file->f_mapping->host;
+       loff_t i_size;
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
                ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
@@ -1172,9 +1414,10 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
                }
        }
 
+       i_size = i_size_read(inode);
        switch (whence) {
        case SEEK_END:
-               offset += inode->i_size;
+               offset += i_size;
                break;
        case SEEK_CUR:
                /*
@@ -1190,24 +1433,24 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
                offset += file->f_pos;
                break;
        case SEEK_DATA:
-               if (offset >= inode->i_size) {
+               if (offset >= i_size) {
                        ret = -ENXIO;
                        goto out;
                }
                break;
        case SEEK_HOLE:
-               if (offset >= inode->i_size) {
+               if (offset >= i_size) {
                        ret = -ENXIO;
                        goto out;
                }
-               offset = inode->i_size;
+               offset = i_size;
                break;
        }
 
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -1363,7 +1606,7 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!prealloc_cf)
                return -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (ceph_snap(inode) != CEPH_NOSNAP) {
                ret = -EROFS;
@@ -1418,7 +1661,7 @@ static long ceph_fallocate(struct file *file, int mode,
 
        ceph_put_cap_refs(ci, got);
 unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        ceph_free_cap_flush(prealloc_cf);
        return ret;
 }
index da55eb8bcffab89755baf5229b92ededf49dd484..fb4ba2e4e2a5fa5c5d62afa3b94f758c7906687b 100644 (file)
@@ -548,7 +548,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
        if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
            (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
                dout("size %lld -> %llu\n", inode->i_size, size);
-               inode->i_size = size;
+               i_size_write(inode, size);
                inode->i_blocks = (size + (1<<9) - 1) >> 9;
                ci->i_reported_size = size;
                if (truncate_seq != ci->i_truncate_seq) {
@@ -808,7 +808,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
                        spin_unlock(&ci->i_ceph_lock);
 
                        err = -EINVAL;
-                       if (WARN_ON(symlen != inode->i_size))
+                       if (WARN_ON(symlen != i_size_read(inode)))
                                goto out;
 
                        err = -ENOMEM;
@@ -1549,7 +1549,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
 
        spin_lock(&ci->i_ceph_lock);
        dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
-       inode->i_size = size;
+       i_size_write(inode, size);
        inode->i_blocks = (size + (1 << 9) - 1) >> 9;
 
        /* tell the MDS if we are approaching max_size */
@@ -1911,7 +1911,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                     inode->i_size, attr->ia_size);
                if ((issued & CEPH_CAP_FILE_EXCL) &&
                    attr->ia_size > inode->i_size) {
-                       inode->i_size = attr->ia_size;
+                       i_size_write(inode, attr->ia_size);
                        inode->i_blocks =
                                (attr->ia_size + (1 << 9) - 1) >> 9;
                        inode->i_ctime = attr->ia_ctime;
index 7febcf2475c5ab675c04dfd2fddaa3ed574522a0..50b268483302922ee63d205a2fa560ec27676eed 100644 (file)
@@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
        vaf.fmt = fmt;
        vaf.va = &args;
 
-       pr_err("CIFS VFS: %pV", &vaf);
+       pr_err_ratelimited("CIFS VFS: %pV", &vaf);
 
        va_end(args);
 }
index f40fbaca1b2a2c1d7457d71bd1a5e7dd4b477859..66cf0f9fff8984cb12eed1c89d91bc9c6ccc6a9d 100644 (file)
@@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
 /* information message: e.g., configuration, major event */
 #define cifs_dbg(type, fmt, ...)                                       \
 do {                                                                   \
-       if (type == FYI) {                                              \
-               if (cifsFYI & CIFS_INFO) {                              \
-                       pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__);  \
-               }                                                       \
+       if (type == FYI && cifsFYI & CIFS_INFO) {                       \
+               pr_debug_ratelimited("%s: "                             \
+                           fmt, __FILE__, ##__VA_ARGS__);              \
        } else if (type == VFS) {                                       \
                cifs_vfs_err(fmt, ##__VA_ARGS__);                       \
        } else if (type == NOISY && type != 0) {                        \
-               pr_debug(fmt, ##__VA_ARGS__);                           \
+               pr_debug_ratelimited(fmt, ##__VA_ARGS__);               \
        }                                                               \
 } while (0)
 
index 7dc886c9a78fc428b368a1c911b8c1ad745f48a5..e956cba94338184416c96925690f21493fc61ab6 100644 (file)
@@ -175,7 +175,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
         * string to the length of the original string to allow for worst case.
         */
        md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
-       mountdata = kzalloc(md_len + 1, GFP_KERNEL);
+       mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL);
        if (mountdata == NULL) {
                rc = -ENOMEM;
                goto compose_mount_options_err;
index afa09fce81515e4caf7500b04c16dfb96a71cfd1..e682b36a210f236561817bd11a799d3a8f57caf2 100644 (file)
@@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
 
        ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
        if (!ses->auth_key.response) {
-               rc = ENOMEM;
+               rc = -ENOMEM;
                ses->auth_key.len = 0;
                goto setup_ntlmv2_rsp_ret;
        }
index c4c1169814b21d15463410ca474ef8a8608c0ed0..c48ca13673e37f2eba7c4045f140e6e8b740e499 100644 (file)
@@ -507,6 +507,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 
        seq_printf(s, ",rsize=%u", cifs_sb->rsize);
        seq_printf(s, ",wsize=%u", cifs_sb->wsize);
+       seq_printf(s, ",echo_interval=%lu",
+                       tcon->ses->server->echo_interval / HZ);
        /* convert actimeo and display it in seconds */
        seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
 
@@ -640,9 +642,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
                while (*s && *s != sep)
                        s++;
 
-               mutex_lock(&dir->i_mutex);
+               inode_lock(dir);
                child = lookup_one_len(p, dentry, s - p);
-               mutex_unlock(&dir->i_mutex);
+               inode_unlock(dir);
                dput(dentry);
                dentry = child;
        } while (!IS_ERR(dentry));
@@ -752,6 +754,9 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
        ssize_t rc;
        struct inode *inode = file_inode(iocb->ki_filp);
 
+       if (iocb->ki_filp->f_flags & O_DIRECT)
+               return cifs_user_readv(iocb, iter);
+
        rc = cifs_revalidate_mapping(inode);
        if (rc)
                return rc;
@@ -766,6 +771,18 @@ static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        ssize_t written;
        int rc;
 
+       if (iocb->ki_filp->f_flags & O_DIRECT) {
+               written = cifs_user_writev(iocb, from);
+               if (written > 0 && CIFS_CACHE_READ(cinode)) {
+                       cifs_zap_mapping(inode);
+                       cifs_dbg(FYI,
+                                "Set no oplock for inode=%p after a write operation\n",
+                                inode);
+                       cinode->oplock = 0;
+               }
+               return written;
+       }
+
        written = cifs_get_writer(cinode);
        if (written)
                return written;
index 2b510c537a0d588c532fe9200cdd4a9b03a4c9f4..a25b2513f1460608596f9c9fa7a9cb8e8383e5f2 100644 (file)
 #define SERVER_NAME_LENGTH 40
 #define SERVER_NAME_LEN_WITH_NULL     (SERVER_NAME_LENGTH + 1)
 
-/* SMB echo "timeout" -- FIXME: tunable? */
-#define SMB_ECHO_INTERVAL (60 * HZ)
+/* echo interval in seconds */
+#define SMB_ECHO_INTERVAL_MIN 1
+#define SMB_ECHO_INTERVAL_MAX 600
+#define SMB_ECHO_INTERVAL_DEFAULT 60
 
 #include "cifspdu.h"
 
@@ -225,7 +227,7 @@ struct smb_version_operations {
        void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
        void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
        /* verify the message */
-       int (*check_message)(char *, unsigned int);
+       int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
        bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
        void (*downgrade_oplock)(struct TCP_Server_Info *,
                                        struct cifsInodeInfo *, bool);
@@ -507,6 +509,7 @@ struct smb_vol {
        struct sockaddr_storage dstaddr; /* destination address */
        struct sockaddr_storage srcaddr; /* allow binding to a local IP */
        struct nls_table *local_nls;
+       unsigned int echo_interval; /* echo interval in secs */
 };
 
 #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -627,7 +630,9 @@ struct TCP_Server_Info {
 #ifdef CONFIG_CIFS_SMB2
        unsigned int    max_read;
        unsigned int    max_write;
+       __u8            preauth_hash[512];
 #endif /* CONFIG_CIFS_SMB2 */
+       unsigned long echo_interval;
 };
 
 static inline unsigned int
@@ -809,7 +814,10 @@ struct cifs_ses {
        bool need_reconnect:1; /* connection reset, uid now invalid */
 #ifdef CONFIG_CIFS_SMB2
        __u16 session_flags;
-       char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
+       __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+       __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8 preauth_hash[512];
 #endif /* CONFIG_CIFS_SMB2 */
 };
 
index c63fd1dde25b861b011f604522572c5619f177f1..eed7ff50faf016e6714867542953334b199e7627 100644 (file)
@@ -102,7 +102,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
                        struct smb_hdr *out_buf,
                        int *bytes_returned);
 extern int cifs_reconnect(struct TCP_Server_Info *server);
-extern int checkSMB(char *buf, unsigned int length);
+extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
 extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
 extern bool backup_cred(struct cifs_sb_info *);
 extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
@@ -439,7 +439,8 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
 extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
 extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
 extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb3signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *);
+extern int generate_smb311signingkey(struct cifs_ses *);
 
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern int calc_lanman_hash(const char *password, const char *cryptkey,
index ecb0803bdb0e50479f50b0e874a4e87cb064e2d0..a763cd3d9e7c80589df30ddaa9515906c6fa6138 100644 (file)
@@ -95,6 +95,7 @@ enum {
        Opt_cruid, Opt_gid, Opt_file_mode,
        Opt_dirmode, Opt_port,
        Opt_rsize, Opt_wsize, Opt_actimeo,
+       Opt_echo_interval,
 
        /* Mount options which take string value */
        Opt_user, Opt_pass, Opt_ip,
@@ -188,6 +189,7 @@ static const match_table_t cifs_mount_option_tokens = {
        { Opt_rsize, "rsize=%s" },
        { Opt_wsize, "wsize=%s" },
        { Opt_actimeo, "actimeo=%s" },
+       { Opt_echo_interval, "echo_interval=%s" },
 
        { Opt_blank_user, "user=" },
        { Opt_blank_user, "username=" },
@@ -368,7 +370,6 @@ cifs_reconnect(struct TCP_Server_Info *server)
        server->session_key.response = NULL;
        server->session_key.len = 0;
        server->lstrp = jiffies;
-       mutex_unlock(&server->srv_mutex);
 
        /* mark submitted MIDs for retry and issue callback */
        INIT_LIST_HEAD(&retry_list);
@@ -381,6 +382,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                list_move(&mid_entry->qhead, &retry_list);
        }
        spin_unlock(&GlobalMid_Lock);
+       mutex_unlock(&server->srv_mutex);
 
        cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
        list_for_each_safe(tmp, tmp2, &retry_list) {
@@ -418,6 +420,7 @@ cifs_echo_request(struct work_struct *work)
        int rc;
        struct TCP_Server_Info *server = container_of(work,
                                        struct TCP_Server_Info, echo.work);
+       unsigned long echo_interval = server->echo_interval;
 
        /*
         * We cannot send an echo if it is disabled or until the
@@ -427,7 +430,7 @@ cifs_echo_request(struct work_struct *work)
         */
        if (!server->ops->need_neg || server->ops->need_neg(server) ||
            (server->ops->can_echo && !server->ops->can_echo(server)) ||
-           time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+           time_before(jiffies, server->lstrp + echo_interval - HZ))
                goto requeue_echo;
 
        rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -436,7 +439,7 @@ cifs_echo_request(struct work_struct *work)
                         server->hostname);
 
 requeue_echo:
-       queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
+       queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
 }
 
 static bool
@@ -487,9 +490,9 @@ server_unresponsive(struct TCP_Server_Info *server)
         *     a response in >60s.
         */
        if (server->tcpStatus == CifsGood &&
-           time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
-               cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
-                        server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+           time_after(jiffies, server->lstrp + 2 * server->echo_interval)) {
+               cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n",
+                        server->hostname, (2 * server->echo_interval) / HZ);
                cifs_reconnect(server);
                wake_up(&server->response_q);
                return true;
@@ -828,7 +831,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
         * 48 bytes is enough to display the header and a little bit
         * into the payload for debugging purposes.
         */
-       length = server->ops->check_message(buf, server->total_read);
+       length = server->ops->check_message(buf, server->total_read, server);
        if (length != 0)
                cifs_dump_mem("Bad SMB: ", buf,
                        min_t(unsigned int, server->total_read, 48));
@@ -1624,6 +1627,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                                goto cifs_parse_mount_err;
                        }
                        break;
+               case Opt_echo_interval:
+                       if (get_option_ul(args, &option)) {
+                               cifs_dbg(VFS, "%s: Invalid echo interval value\n",
+                                        __func__);
+                               goto cifs_parse_mount_err;
+                       }
+                       vol->echo_interval = option;
+                       break;
 
                /* String Arguments */
 
@@ -2089,6 +2100,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
        if (!match_security(server, vol))
                return 0;
 
+       if (server->echo_interval != vol->echo_interval)
+               return 0;
+
        return 1;
 }
 
@@ -2208,6 +2222,12 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        tcp_ses->tcpStatus = CifsNew;
        ++tcp_ses->srv_count;
 
+       if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+               volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+               tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+       else
+               tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
+
        rc = ip_connect(tcp_ses);
        if (rc < 0) {
                cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
@@ -2237,7 +2257,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        cifs_fscache_get_client_cookie(tcp_ses);
 
        /* queue echo request delayed work */
-       queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
+       queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
 
        return tcp_ses;
 
@@ -2979,8 +2999,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
        if (ses_init_buf) {
                ses_init_buf->trailer.session_req.called_len = 32;
 
-               if (server->server_RFC1001_name &&
-                   server->server_RFC1001_name[0] != 0)
+               if (server->server_RFC1001_name[0] != 0)
                        rfc1002mangle(ses_init_buf->trailer.
                                      session_req.called_name,
                                      server->server_RFC1001_name,
index 0a2752b79e72cc2b7a083894843a8b3ae1dea23d..ff882aeaccc67c404a289fff472a26d8537e1128 100644 (file)
@@ -2267,7 +2267,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (rc)
                return rc;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        xid = get_xid();
 
@@ -2292,7 +2292,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        }
 
        free_xid(xid);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
@@ -2309,7 +2309,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (rc)
                return rc;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        xid = get_xid();
 
@@ -2326,7 +2326,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        }
 
        free_xid(xid);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
@@ -2672,7 +2672,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
         * with a brlock that prevents writing.
         */
        down_read(&cinode->lock_sem);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        rc = generic_write_checks(iocb, from);
        if (rc <= 0)
@@ -2685,7 +2685,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
        else
                rc = -EACCES;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (rc > 0) {
                ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
index a329f5ba35aad8649ce3f644b9b11913c57cc5f8..aeb26dbfa1bf2dbfb2d111316064d240ef807336 100644 (file)
@@ -814,8 +814,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                        }
                } else
                        fattr.cf_uniqueid = iunique(sb, ROOT_I);
-       } else
-               fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+       } else {
+               if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+                   validinum == false && server->ops->get_srv_inum) {
+                       /*
+                        * Pass a NULL tcon to ensure we don't make a round
+                        * trip to the server. This only works for SMB2+.
+                        */
+                       tmprc = server->ops->get_srv_inum(xid,
+                               NULL, cifs_sb, full_path,
+                               &fattr.cf_uniqueid, data);
+                       if (tmprc)
+                               fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+               } else
+                       fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+       }
 
        /* query for SFU type info if supported and needed */
        if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
@@ -856,6 +869,13 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
        } else {
                /* we already have inode, update it */
 
+               /* if uniqueid is different, return error */
+               if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+                   CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+                       rc = -ESTALE;
+                       goto cgii_exit;
+               }
+
                /* if filetype is different, return error */
                if (unlikely(((*inode)->i_mode & S_IFMT) !=
                    (fattr.cf_mode & S_IFMT))) {
index 8442b8b8e0be145e7f68055e0025f1909442d156..813fe13c2ae175869cdc6db06a19392d09d7f05f 100644 (file)
@@ -310,7 +310,7 @@ check_smb_hdr(struct smb_hdr *smb)
 }
 
 int
-checkSMB(char *buf, unsigned int total_read)
+checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
 {
        struct smb_hdr *smb = (struct smb_hdr *)buf;
        __u32 rfclen = be32_to_cpu(smb->smb_buf_length);
index 0557c45e9c3308a368f7ff3e73f9423c81eedea4..b30a4a6d98a0f002c235bf3128012aff06ba2bc3 100644 (file)
@@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
                 * if buggy server returns . and .. late do we want to
                 * check for that here?
                 */
+               *tmp_buf = 0;
                rc = cifs_filldir(current_entry, file, ctx,
                                  tmp_buf, max_len);
                if (rc) {
index 1c5907019045517ac302daf4c04e8caea21f1fda..389fb9f8c84e22308ac5fcb44c27f005bee6e27c 100644 (file)
@@ -38,7 +38,7 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
         * Make sure that this really is an SMB, that it is a response,
         * and that the message ids match.
         */
-       if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
+       if ((hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
            (mid == wire_mid)) {
                if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
                        return 0;
@@ -50,9 +50,9 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
                                cifs_dbg(VFS, "Received Request not response\n");
                }
        } else { /* bad signature or mid */
-               if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
+               if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
                        cifs_dbg(VFS, "Bad protocol string signature header %x\n",
-                                *(unsigned int *) hdr->ProtocolId);
+                                le32_to_cpu(hdr->ProtocolId));
                if (mid != wire_mid)
                        cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
                                 mid, wire_mid);
@@ -93,11 +93,11 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
 };
 
 int
-smb2_check_message(char *buf, unsigned int length)
+smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
 {
        struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
        struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
-       __u64 mid = le64_to_cpu(hdr->MessageId);
+       __u64 mid;
        __u32 len = get_rfc1002_length(buf);
        __u32 clc_len;  /* calculated length */
        int command;
@@ -111,6 +111,30 @@ smb2_check_message(char *buf, unsigned int length)
         * ie Validate the wct via smb2_struct_sizes table above
         */
 
+       if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+               struct smb2_transform_hdr *thdr =
+                       (struct smb2_transform_hdr *)buf;
+               struct cifs_ses *ses = NULL;
+               struct list_head *tmp;
+
+               /* decrypt frame now that it is completely read in */
+               spin_lock(&cifs_tcp_ses_lock);
+               list_for_each(tmp, &srvr->smb_ses_list) {
+                       ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+                       if (ses->Suid == thdr->SessionId)
+                               break;
+
+                       ses = NULL;
+               }
+               spin_unlock(&cifs_tcp_ses_lock);
+               if (ses == NULL) {
+                       cifs_dbg(VFS, "no decryption - session id not found\n");
+                       return 1;
+               }
+       }
+
+
+       mid = le64_to_cpu(hdr->MessageId);
        if (length < sizeof(struct smb2_pdu)) {
                if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
                        pdu->StructureSize2 = 0;
@@ -322,7 +346,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
 
        /* return pointer to beginning of data area, ie offset from SMB start */
        if ((*off != 0) && (*len != 0))
-               return (char *)(&hdr->ProtocolId[0]) + *off;
+               return (char *)(&hdr->ProtocolId) + *off;
        else
                return NULL;
 }
index 53ccdde6ff187c16084e10dd07372d6516d25139..3525ed756173d40009017ca11c9ae04cbd97f34a 100644 (file)
@@ -182,6 +182,11 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
        struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
        __u64 wire_mid = le64_to_cpu(hdr->MessageId);
 
+       if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+               cifs_dbg(VFS, "encrypted frame parsing not supported yet");
+               return NULL;
+       }
+
        spin_lock(&GlobalMid_Lock);
        list_for_each_entry(mid, &server->pending_mid_q, qhead) {
                if ((mid->mid == wire_mid) &&
@@ -1692,7 +1697,7 @@ struct smb_version_operations smb30_operations = {
        .get_lease_key = smb2_get_lease_key,
        .set_lease_key = smb2_set_lease_key,
        .new_lease_key = smb2_new_lease_key,
-       .generate_signingkey = generate_smb3signingkey,
+       .generate_signingkey = generate_smb30signingkey,
        .calc_signature = smb3_calc_signature,
        .set_integrity  = smb3_set_integrity,
        .is_read_op = smb21_is_read_op,
@@ -1779,7 +1784,7 @@ struct smb_version_operations smb311_operations = {
        .get_lease_key = smb2_get_lease_key,
        .set_lease_key = smb2_set_lease_key,
        .new_lease_key = smb2_new_lease_key,
-       .generate_signingkey = generate_smb3signingkey,
+       .generate_signingkey = generate_smb311signingkey,
        .calc_signature = smb3_calc_signature,
        .set_integrity  = smb3_set_integrity,
        .is_read_op = smb21_is_read_op,
@@ -1838,7 +1843,7 @@ struct smb_version_values smb21_values = {
 struct smb_version_values smb30_values = {
        .version_string = SMB30_VERSION_STRING,
        .protocol_id = SMB30_PROT_ID,
-       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -1858,7 +1863,7 @@ struct smb_version_values smb30_values = {
 struct smb_version_values smb302_values = {
        .version_string = SMB302_VERSION_STRING,
        .protocol_id = SMB302_PROT_ID,
-       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
index 767555518d40ccd357b6f5540f2758bb8be6e654..10f8d5cf5681e26b843c0ac15821eab69e6eeb22 100644 (file)
@@ -97,10 +97,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
        hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr)
                        - 4 /*  RFC 1001 length field itself not counted */);
 
-       hdr->ProtocolId[0] = 0xFE;
-       hdr->ProtocolId[1] = 'S';
-       hdr->ProtocolId[2] = 'M';
-       hdr->ProtocolId[3] = 'B';
+       hdr->ProtocolId = SMB2_PROTO_NUMBER;
        hdr->StructureSize = cpu_to_le16(64);
        hdr->Command = smb2_cmd;
        hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
@@ -1573,7 +1570,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
                goto ioctl_exit;
        }
 
-       memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
+       memcpy(*out_data,
+              (char *)&rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
               *plen);
 ioctl_exit:
        free_rsp_buf(resp_buftype, rsp);
@@ -2093,7 +2091,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
        }
 
        if (*buf) {
-               memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
+               memcpy(*buf, (char *)&rsp->hdr.ProtocolId + rsp->DataOffset,
                       *nbytes);
                free_rsp_buf(resp_buftype, iov[0].iov_base);
        } else if (resp_buftype != CIFS_NO_BUFFER) {
index 4af52780ec350323cd41e993389e7214fb50f97d..ff88d9feb01e7475f75a230c004d5f40a80f14f9 100644 (file)
@@ -86,6 +86,7 @@
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
 
 /*
  * SMB2 Header Definition
@@ -102,7 +103,7 @@ struct smb2_hdr {
        __be32 smb2_buf_length; /* big endian on wire */
                                /* length is only two or three bytes - with
                                 one or two byte type preceding it that MBZ */
-       __u8   ProtocolId[4];   /* 0xFE 'S' 'M' 'B' */
+       __le32 ProtocolId;      /* 0xFE 'S' 'M' 'B' */
        __le16 StructureSize;   /* 64 */
        __le16 CreditCharge;    /* MBZ */
        __le32 Status;          /* Error from server */
@@ -128,11 +129,10 @@ struct smb2_transform_hdr {
                                 one or two byte type preceding it that MBZ */
        __u8   ProtocolId[4];   /* 0xFD 'S' 'M' 'B' */
        __u8   Signature[16];
-       __u8   Nonce[11];
-       __u8   Reserved[5];
+       __u8   Nonce[16];
        __le32 OriginalMessageSize;
        __u16  Reserved1;
-       __le16 EncryptionAlgorithm;
+       __le16 Flags; /* EncryptionAlgorithm */
        __u64  SessionId;
 } __packed;
 
index 79dc650c18b26baf2e9a4ca8ac2c67df5ebdc63e..4f07dc93608db3fc8723f53d86219087b7cc76c8 100644 (file)
@@ -34,7 +34,8 @@ struct smb_rqst;
  *****************************************************************
  */
 extern int map_smb2_to_linux_error(char *buf, bool log_err);
-extern int smb2_check_message(char *buf, unsigned int length);
+extern int smb2_check_message(char *buf, unsigned int length,
+                             struct TCP_Server_Info *server);
 extern unsigned int smb2_calc_size(void *buf);
 extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
 extern __le16 *cifs_convert_path_to_utf16(const char *from,
index d4c5b6f109a7feaa6f2c99f21ca332ff41a2673f..8732a43b10084bf787a8410498e3ec51c69ea93c 100644 (file)
@@ -222,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
        return rc;
 }
 
-int
-generate_smb3signingkey(struct cifs_ses *ses)
+static int generate_key(struct cifs_ses *ses, struct kvec label,
+                       struct kvec context, __u8 *key, unsigned int key_size)
 {
        unsigned char zero = 0x0;
        __u8 i[4] = {0, 0, 0, 1};
@@ -233,7 +233,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        unsigned char *hashptr = prfhash;
 
        memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
-       memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+       memset(key, 0x0, key_size);
 
        rc = smb3_crypto_shash_allocate(ses->server);
        if (rc) {
@@ -262,7 +262,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        }
 
        rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
-                               "SMB2AESCMAC", 12);
+                               label.iov_base, label.iov_len);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
                goto smb3signkey_ret;
@@ -276,7 +276,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        }
 
        rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
-                               "SmbSign", 8);
+                               context.iov_base, context.iov_len);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
                goto smb3signkey_ret;
@@ -296,12 +296,102 @@ generate_smb3signingkey(struct cifs_ses *ses)
                goto smb3signkey_ret;
        }
 
-       memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+       memcpy(key, hashptr, key_size);
 
 smb3signkey_ret:
        return rc;
 }
 
+struct derivation {
+       struct kvec label;
+       struct kvec context;
+};
+
+struct derivation_triplet {
+       struct derivation signing;
+       struct derivation encryption;
+       struct derivation decryption;
+};
+
+static int
+generate_smb3signingkey(struct cifs_ses *ses,
+                       const struct derivation_triplet *ptriplet)
+{
+       int rc;
+
+       rc = generate_key(ses, ptriplet->signing.label,
+                         ptriplet->signing.context, ses->smb3signingkey,
+                         SMB3_SIGN_KEY_SIZE);
+       if (rc)
+               return rc;
+
+       rc = generate_key(ses, ptriplet->encryption.label,
+                         ptriplet->encryption.context, ses->smb3encryptionkey,
+                         SMB3_SIGN_KEY_SIZE);
+       if (rc)
+               return rc;
+
+       return generate_key(ses, ptriplet->decryption.label,
+                           ptriplet->decryption.context,
+                           ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+}
+
+int
+generate_smb30signingkey(struct cifs_ses *ses)
+
+{
+       struct derivation_triplet triplet;
+       struct derivation *d;
+
+       d = &triplet.signing;
+       d->label.iov_base = "SMB2AESCMAC";
+       d->label.iov_len = 12;
+       d->context.iov_base = "SmbSign";
+       d->context.iov_len = 8;
+
+       d = &triplet.encryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerIn ";
+       d->context.iov_len = 10;
+
+       d = &triplet.decryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerOut";
+       d->context.iov_len = 10;
+
+       return generate_smb3signingkey(ses, &triplet);
+}
+
+int
+generate_smb311signingkey(struct cifs_ses *ses)
+
+{
+       struct derivation_triplet triplet;
+       struct derivation *d;
+
+       d = &triplet.signing;
+       d->label.iov_base = "SMB2AESCMAC";
+       d->label.iov_len = 12;
+       d->context.iov_base = "SmbSign";
+       d->context.iov_len = 8;
+
+       d = &triplet.encryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerIn ";
+       d->context.iov_len = 10;
+
+       d = &triplet.decryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerOut";
+       d->context.iov_len = 10;
+
+       return generate_smb3signingkey(ses, &triplet);
+}
+
 int
 smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
index 2a24c524fb9a90cedd4187460ecb90c0e5dfcf0c..87abe8ed074c31962a969b178d55757e44bdc0d5 100644 (file)
@@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
        cifs_in_send_dec(server);
        cifs_save_when_sent(mid);
 
-       if (rc < 0)
+       if (rc < 0) {
                server->sequence_number -= 2;
+               cifs_delete_mid(mid);
+       }
+
        mutex_unlock(&server->srv_mutex);
 
        if (rc == 0)
                return 0;
 
-       cifs_delete_mid(mid);
        add_credits_and_wake_if(server, credits, optype);
        return rc;
 }
index f829fe963f5bab9e1de4b81ec9195de90e5da77d..5104d84c4f6425c5ea950a889040a5ca54527c29 100644 (file)
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
 } while (0)
 
 
-#define CODA_FREE(ptr,size) \
-    do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
 
 /* inode to cnode access functions */
 
index fda9f4311212fe4a6d3b884d43dfdb7b5ff51e7e..42e731b8c80a680602d401b6b28239acf74291e0 100644 (file)
@@ -427,13 +427,13 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
        if (host_file->f_op->iterate) {
                struct inode *host_inode = file_inode(host_file);
 
-               mutex_lock(&host_inode->i_mutex);
+               inode_lock(host_inode);
                ret = -ENOENT;
                if (!IS_DEADDIR(host_inode)) {
                        ret = host_file->f_op->iterate(host_file, ctx);
                        file_accessed(host_file);
                }
-               mutex_unlock(&host_inode->i_mutex);
+               inode_unlock(host_inode);
                return ret;
        }
        /* Venus: we must read Venus dirents from a file */
index 1da3805f3ddcdb209a6b357e12aacac35809dda9..f47c7483863b5ae55b1160de0f059ec23b3d4cb3 100644 (file)
@@ -71,12 +71,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
 
        host_file = cfi->cfi_container;
        file_start_write(host_file);
-       mutex_lock(&coda_inode->i_mutex);
+       inode_lock(coda_inode);
        ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
        coda_inode->i_size = file_inode(host_file)->i_size;
        coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
        coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
-       mutex_unlock(&coda_inode->i_mutex);
+       inode_unlock(coda_inode);
        file_end_write(host_file);
        return ret;
 }
@@ -203,7 +203,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
        err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&coda_inode->i_mutex);
+       inode_lock(coda_inode);
 
        cfi = CODA_FTOC(coda_file);
        BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -212,7 +212,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
        err = vfs_fsync(host_file, datasync);
        if (!err && !datasync)
                err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
-       mutex_unlock(&coda_inode->i_mutex);
+       inode_unlock(coda_inode);
 
        return err;
 }
index a5b8eb69a8f42526d4fb4d46d1cc26d33e78e6b2..6402eaf8ab958f27cc3371d0773ad45046dc0b8e 100644 (file)
@@ -1261,6 +1261,9 @@ COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
 COMPATIBLE_IOCTL(HCIINQUIRY)
 COMPATIBLE_IOCTL(HCIUARTSETPROTO)
 COMPATIBLE_IOCTL(HCIUARTGETPROTO)
+COMPATIBLE_IOCTL(HCIUARTGETDEVICE)
+COMPATIBLE_IOCTL(HCIUARTSETFLAGS)
+COMPATIBLE_IOCTL(HCIUARTGETFLAGS)
 COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
 COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
 COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
index cab612b2ae767bbe2ffeb187a6f630f891cd52f3..f419519ec41fb4f3e932fe18561b34f3313e747b 100644 (file)
@@ -640,13 +640,13 @@ static void detach_groups(struct config_group *group)
 
                child = sd->s_dentry;
 
-               mutex_lock(&d_inode(child)->i_mutex);
+               inode_lock(d_inode(child));
 
                configfs_detach_group(sd->s_element);
                d_inode(child)->i_flags |= S_DEAD;
                dont_mount(child);
 
-               mutex_unlock(&d_inode(child)->i_mutex);
+               inode_unlock(d_inode(child));
 
                d_delete(child);
                dput(child);
@@ -834,11 +834,11 @@ static int configfs_attach_item(struct config_item *parent_item,
                         * the VFS may already have hit and used them. Thus,
                         * we must lock them as rmdir() would.
                         */
-                       mutex_lock(&d_inode(dentry)->i_mutex);
+                       inode_lock(d_inode(dentry));
                        configfs_remove_dir(item);
                        d_inode(dentry)->i_flags |= S_DEAD;
                        dont_mount(dentry);
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        d_delete(dentry);
                }
        }
@@ -874,7 +874,7 @@ static int configfs_attach_group(struct config_item *parent_item,
                 * We must also lock the inode to remove it safely in case of
                 * error, as rmdir() would.
                 */
-               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
                configfs_adjust_dir_dirent_depth_before_populate(sd);
                ret = populate_groups(to_config_group(item));
                if (ret) {
@@ -883,7 +883,7 @@ static int configfs_attach_group(struct config_item *parent_item,
                        dont_mount(dentry);
                }
                configfs_adjust_dir_dirent_depth_after_populate(sd);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
                if (ret)
                        d_delete(dentry);
        }
@@ -1135,7 +1135,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
         * subsystem is really registered, and so we need to lock out
         * configfs_[un]register_subsystem().
         */
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item);
        if (!subsys_sd) {
@@ -1147,7 +1147,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
        ret = configfs_do_depend_item(subsys_sd->s_dentry, target);
 
 out_unlock_fs:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        /*
         * If we succeeded, the fs is pinned via other methods.  If not,
@@ -1230,7 +1230,7 @@ int configfs_depend_item_unlocked(struct configfs_subsystem *caller_subsys,
                 * additional locking to prevent other subsystem from being
                 * unregistered
                 */
-               mutex_lock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+               inode_lock(d_inode(root->cg_item.ci_dentry));
 
                /*
                 * As we are trying to depend item from other subsystem
@@ -1254,7 +1254,7 @@ out_root_unlock:
                 * We were called from subsystem other than our target so we
                 * took some locks so now it's time to release them
                 */
-               mutex_unlock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+               inode_unlock(d_inode(root->cg_item.ci_dentry));
 
        return ret;
 }
@@ -1561,7 +1561,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
        down_write(&configfs_rename_sem);
        parent = item->parent->dentry;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
        if (!IS_ERR(new_dentry)) {
@@ -1577,7 +1577,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
                        error = -EEXIST;
                dput(new_dentry);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        up_write(&configfs_rename_sem);
 
        return error;
@@ -1590,7 +1590,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
        int err;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        /*
         * Fake invisibility if dir belongs to a group/default groups hierarchy
         * being attached
@@ -1603,7 +1603,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
                else
                        err = 0;
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        return err;
 }
@@ -1613,11 +1613,11 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_path.dentry;
        struct configfs_dirent * cursor = file->private_data;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        spin_lock(&configfs_dirent_lock);
        list_del_init(&cursor->s_sibling);
        spin_unlock(&configfs_dirent_lock);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        release_configfs_dirent(cursor);
 
@@ -1698,7 +1698,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry * dentry = file->f_path.dentry;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -1706,7 +1706,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -1732,7 +1732,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&configfs_dirent_lock);
                }
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return offset;
 }
 
@@ -1767,14 +1767,14 @@ int configfs_register_group(struct config_group *parent_group,
 
        parent = parent_group->cg_item.ci_dentry;
 
-       mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
        ret = create_default_group(parent_group, group);
        if (!ret) {
                spin_lock(&configfs_dirent_lock);
                configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
                spin_unlock(&configfs_dirent_lock);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return ret;
 }
 EXPORT_SYMBOL(configfs_register_group);
@@ -1791,7 +1791,7 @@ void configfs_unregister_group(struct config_group *group)
        struct dentry *dentry = group->cg_item.ci_dentry;
        struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
 
-       mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
        spin_lock(&configfs_dirent_lock);
        configfs_detach_prep(dentry, NULL);
        spin_unlock(&configfs_dirent_lock);
@@ -1800,7 +1800,7 @@ void configfs_unregister_group(struct config_group *group)
        d_inode(dentry)->i_flags |= S_DEAD;
        dont_mount(dentry);
        d_delete(dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        dput(dentry);
 
@@ -1872,7 +1872,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
        sd = root->d_fsdata;
        link_group(to_config_group(sd->s_element), group);
 
-       mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
 
        err = -ENOMEM;
        dentry = d_alloc_name(root, group->cg_item.ci_name);
@@ -1892,7 +1892,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                }
        }
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        if (err) {
                unlink_group(group);
@@ -1913,9 +1913,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
                return;
        }
 
-       mutex_lock_nested(&d_inode(root)->i_mutex,
+       inode_lock_nested(d_inode(root),
                          I_MUTEX_PARENT);
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        if (configfs_detach_prep(dentry, NULL)) {
@@ -1926,11 +1926,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
        configfs_detach_group(&group->cg_item);
        d_inode(dentry)->i_flags |= S_DEAD;
        dont_mount(dentry);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        d_delete(dentry);
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        dput(dentry);
 
index 3687187c8ea59e8a6a2795c8e36fa95e897955a7..33b7ee34eda5f135fef480f0bdf55bb4037ab334 100644 (file)
@@ -540,10 +540,10 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
        umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
+       inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL);
        error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
                                     CONFIGFS_ITEM_ATTR);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        return error;
 }
@@ -562,10 +562,10 @@ int configfs_create_bin_file(struct config_item *item,
        umode_t mode = (bin_attr->cb_attr.ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
+       inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL);
        error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode,
                                     CONFIGFS_ITEM_BIN_ATTR);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        return error;
 }
index 0cc810e9dccc15fb764d53a4cb26604b3d16672f..cee087d8f7e02f5b62198a6091a5a7f23cf0109f 100644 (file)
@@ -255,7 +255,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                /* no inode means this hasn't been made visible yet */
                return;
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (!sd->s_element)
                        continue;
@@ -268,5 +268,5 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                        break;
                }
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 }
index 7af8797590640e032c74f619b9fd1931fed77e15..711172450da642ccdcf4ac7dcceb7546faf66fad 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/pagevec.h>
 #include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
@@ -57,16 +58,35 @@ static void dax_unmap_atomic(struct block_device *bdev,
        blk_queue_exit(bdev->bd_queue);
 }
 
+struct page *read_dax_sector(struct block_device *bdev, sector_t n)
+{
+       struct page *page = alloc_pages(GFP_KERNEL, 0);
+       struct blk_dax_ctl dax = {
+               .size = PAGE_SIZE,
+               .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
+       };
+       long rc;
+
+       if (!page)
+               return ERR_PTR(-ENOMEM);
+
+       rc = dax_map_atomic(bdev, &dax);
+       if (rc < 0)
+               return ERR_PTR(rc);
+       memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
+       dax_unmap_atomic(bdev, &dax);
+       return page;
+}
+
 /*
- * dax_clear_blocks() is called from within transaction context from XFS,
+ * dax_clear_sectors() is called from within transaction context from XFS,
  * and hence this means the stack from this point must follow GFP_NOFS
  * semantics for all operations.
  */
-int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
 {
-       struct block_device *bdev = inode->i_sb->s_bdev;
        struct blk_dax_ctl dax = {
-               .sector = block << (inode->i_blkbits - 9),
+               .sector = _sector,
                .size = _size,
        };
 
@@ -88,7 +108,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
        wmb_pmem();
        return 0;
 }
-EXPORT_SYMBOL_GPL(dax_clear_blocks);
+EXPORT_SYMBOL_GPL(dax_clear_sectors);
 
 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
 static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
@@ -245,13 +265,14 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        loff_t end = pos + iov_iter_count(iter);
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
 
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
                struct address_space *mapping = inode->i_mapping;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = filemap_write_and_wait_range(mapping, pos, end - 1);
                if (retval) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out;
                }
        }
@@ -263,7 +284,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        retval = dax_io(inode, iter, pos, end, get_block, &bh);
 
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        if ((retval > 0) && end_io)
                end_io(iocb, pos, retval, bh.b_private);
@@ -324,6 +345,202 @@ static int copy_user_bh(struct page *to, struct inode *inode,
        return 0;
 }
 
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+               sector_t sector, bool pmd_entry, bool dirty)
+{
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+       pgoff_t pmd_index = DAX_PMD_INDEX(index);
+       int type, error = 0;
+       void *entry;
+
+       WARN_ON_ONCE(pmd_entry && !dirty);
+       if (dirty)
+               __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+       spin_lock_irq(&mapping->tree_lock);
+
+       entry = radix_tree_lookup(page_tree, pmd_index);
+       if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+               index = pmd_index;
+               goto dirty;
+       }
+
+       entry = radix_tree_lookup(page_tree, index);
+       if (entry) {
+               type = RADIX_DAX_TYPE(entry);
+               if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+                                       type != RADIX_DAX_PMD)) {
+                       error = -EIO;
+                       goto unlock;
+               }
+
+               if (!pmd_entry || type == RADIX_DAX_PMD)
+                       goto dirty;
+
+               /*
+                * We only insert dirty PMD entries into the radix tree.  This
+                * means we don't need to worry about removing a dirty PTE
+                * entry and inserting a clean PMD entry, thus reducing the
+                * range we would flush with a follow-up fsync/msync call.
+                */
+               radix_tree_delete(&mapping->page_tree, index);
+               mapping->nrexceptional--;
+       }
+
+       if (sector == NO_SECTOR) {
+               /*
+                * This can happen during correct operation if our pfn_mkwrite
+                * fault raced against a hole punch operation.  If this
+                * happens the pte that was hole punched will have been
+                * unmapped and the radix tree entry will have been removed by
+                * the time we are called, but the call will still happen.  We
+                * will return all the way up to wp_pfn_shared(), where the
+                * pte_same() check will fail, eventually causing page fault
+                * to be retried by the CPU.
+                */
+               goto unlock;
+       }
+
+       error = radix_tree_insert(page_tree, index,
+                       RADIX_DAX_ENTRY(sector, pmd_entry));
+       if (error)
+               goto unlock;
+
+       mapping->nrexceptional++;
+ dirty:
+       if (dirty)
+               radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+       spin_unlock_irq(&mapping->tree_lock);
+       return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+               struct address_space *mapping, pgoff_t index, void *entry)
+{
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+       int type = RADIX_DAX_TYPE(entry);
+       struct radix_tree_node *node;
+       struct blk_dax_ctl dax;
+       void **slot;
+       int ret = 0;
+
+       spin_lock_irq(&mapping->tree_lock);
+       /*
+        * Regular page slots are stabilized by the page lock even
+        * without the tree itself locked.  These unlocked entries
+        * need verification under the tree lock.
+        */
+       if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+               goto unlock;
+       if (*slot != entry)
+               goto unlock;
+
+       /* another fsync thread may have already written back this entry */
+       if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+               goto unlock;
+
+       if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+               ret = -EIO;
+               goto unlock;
+       }
+
+       dax.sector = RADIX_DAX_SECTOR(entry);
+       dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+       spin_unlock_irq(&mapping->tree_lock);
+
+       /*
+        * We cannot hold tree_lock while calling dax_map_atomic() because it
+        * eventually calls cond_resched().
+        */
+       ret = dax_map_atomic(bdev, &dax);
+       if (ret < 0)
+               return ret;
+
+       if (WARN_ON_ONCE(ret < dax.size)) {
+               ret = -EIO;
+               goto unmap;
+       }
+
+       wb_cache_pmem(dax.addr, dax.size);
+
+       spin_lock_irq(&mapping->tree_lock);
+       radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+       spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+       dax_unmap_atomic(bdev, &dax);
+       return ret;
+
+ unlock:
+       spin_unlock_irq(&mapping->tree_lock);
+       return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping,
+               struct block_device *bdev, struct writeback_control *wbc)
+{
+       struct inode *inode = mapping->host;
+       pgoff_t start_index, end_index, pmd_index;
+       pgoff_t indices[PAGEVEC_SIZE];
+       struct pagevec pvec;
+       bool done = false;
+       int i, ret = 0;
+       void *entry;
+
+       if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+               return -EIO;
+
+       if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
+               return 0;
+
+       start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
+       end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
+       pmd_index = DAX_PMD_INDEX(start_index);
+
+       rcu_read_lock();
+       entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+       rcu_read_unlock();
+
+       /* see if the start of our range is covered by a PMD entry */
+       if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+               start_index = pmd_index;
+
+       tag_pages_for_writeback(mapping, start_index, end_index);
+
+       pagevec_init(&pvec, 0);
+       while (!done) {
+               pvec.nr = find_get_entries_tag(mapping, start_index,
+                               PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+                               pvec.pages, indices);
+
+               if (pvec.nr == 0)
+                       break;
+
+               for (i = 0; i < pvec.nr; i++) {
+                       if (indices[i] > end_index) {
+                               done = true;
+                               break;
+                       }
+
+                       ret = dax_writeback_one(bdev, mapping, indices[i],
+                                       pvec.pages[i]);
+                       if (ret < 0)
+                               return ret;
+               }
+       }
+       wmb_pmem();
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                        struct vm_area_struct *vma, struct vm_fault *vmf)
 {
@@ -363,6 +580,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        }
        dax_unmap_atomic(bdev, &dax);
 
+       error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+                       vmf->flags & FAULT_FLAG_WRITE);
+       if (error)
+               goto out;
+
        error = vm_insert_mixed(vma, vaddr, dax.pfn);
 
  out:
@@ -408,6 +630,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
        memset(&bh, 0, sizeof(bh));
        block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+       bh.b_bdev = inode->i_sb->s_bdev;
        bh.b_size = PAGE_SIZE;
 
  repeat:
@@ -487,6 +710,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                delete_from_page_cache(page);
                unlock_page(page);
                page_cache_release(page);
+               page = NULL;
        }
 
        /*
@@ -590,7 +814,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        struct block_device *bdev;
        pgoff_t size, pgoff;
        sector_t block;
-       int result = 0;
+       int error, result = 0;
+       bool alloc = false;
 
        /* dax pmd mappings require pfn_t_devmap() */
        if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
@@ -624,13 +849,21 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        }
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
        block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
 
        bh.b_size = PMD_SIZE;
-       if (get_block(inode, block, &bh, write) != 0)
+
+       if (get_block(inode, block, &bh, 0) != 0)
                return VM_FAULT_SIGBUS;
+
+       if (!buffer_mapped(&bh) && write) {
+               if (get_block(inode, block, &bh, 1) != 0)
+                       return VM_FAULT_SIGBUS;
+               alloc = true;
+       }
+
        bdev = bh.b_bdev;
-       i_mmap_lock_read(mapping);
 
        /*
         * If the filesystem isn't willing to tell us the length of a hole,
@@ -639,19 +872,22 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         */
        if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
                dax_pmd_dbg(&bh, address, "allocated block too small");
-               goto fallback;
+               return VM_FAULT_FALLBACK;
        }
 
        /*
         * If we allocated new storage, make sure no process has any
         * zero pages covering this hole
         */
-       if (buffer_new(&bh)) {
-               i_mmap_unlock_read(mapping);
-               unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-               i_mmap_lock_read(mapping);
+       if (alloc) {
+               loff_t lstart = pgoff << PAGE_SHIFT;
+               loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+               truncate_pagecache_range(inode, lstart, lend);
        }
 
+       i_mmap_lock_read(mapping);
+
        /*
         * If a truncate happened while we were allocating blocks, we may
         * leave blocks allocated to the file that are beyond EOF.  We can't
@@ -664,7 +900,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                goto out;
        }
        if ((pgoff | PG_PMD_COLOUR) >= size) {
-               dax_pmd_dbg(&bh, address, "pgoff unaligned");
+               dax_pmd_dbg(&bh, address,
+                               "offset + huge page size > file size");
                goto fallback;
        }
 
@@ -732,6 +969,31 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                }
                dax_unmap_atomic(bdev, &dax);
 
+               /*
+                * For PTE faults we insert a radix tree entry for reads, and
+                * leave it clean.  Then on the first write we dirty the radix
+                * tree entry via the dax_pfn_mkwrite() path.  This sequence
+                * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+                * call into get_block() to translate the pgoff to a sector in
+                * order to be able to create a new radix tree entry.
+                *
+                * The PMD path doesn't have an equivalent to
+                * dax_pfn_mkwrite(), though, so for a read followed by a
+                * write we traverse all the way through __dax_pmd_fault()
+                * twice.  This means we can just skip inserting a radix tree
+                * entry completely on the initial read and just wait until
+                * the write to insert a dirty entry.
+                */
+               if (write) {
+                       error = dax_radix_entry(mapping, pgoff, dax.sector,
+                                       true, true);
+                       if (error) {
+                               dax_pmd_dbg(&bh, address,
+                                               "PMD radix insertion failed");
+                               goto fallback;
+                       }
+               }
+
                dev_dbg(part_to_dev(bdev->bd_part),
                                "%s: %s addr: %lx pfn: %lx sect: %llx\n",
                                __func__, current->comm, address,
@@ -790,15 +1052,20 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
  * @vmf: The description of the fault
- *
  */
 int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+       struct file *file = vma->vm_file;
 
-       sb_start_pagefault(sb);
-       file_update_time(vma->vm_file);
-       sb_end_pagefault(sb);
+       /*
+        * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+        * RADIX_DAX_PTE entry already exists in the radix tree from a
+        * previous call to __dax_fault().  We just want to look up that PTE
+        * entry using vmf->pgoff and make sure the dirty tag is set.  This
+        * saves us from having to make a call to get_block() here to look
+        * up the sector.
+        */
+       dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
        return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -835,6 +1102,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
        BUG_ON((offset + length) > PAGE_CACHE_SIZE);
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
        bh.b_size = PAGE_CACHE_SIZE;
        err = get_block(inode, index, &bh, 0);
        if (err < 0)
index b4539e84e5779a48c470804f9051ca5f3142eb48..92d5140de8516c642b6ea5af81f0ffe89399a416 100644 (file)
@@ -2462,7 +2462,7 @@ EXPORT_SYMBOL(d_rehash);
  */
 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 {
-       BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
+       BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
 
        spin_lock(&dentry->d_lock);
@@ -2738,7 +2738,7 @@ static int __d_unalias(struct inode *inode,
        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
                goto out_err;
        m1 = &dentry->d_sb->s_vfs_rename_mutex;
-       if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+       if (!inode_trylock(alias->d_parent->d_inode))
                goto out_err;
        m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
index b7fcc0de0b2f2771aff08a130eb20564adb3cbd4..bece948b363df5b8d331252cd9468d1bfebead68 100644 (file)
@@ -265,7 +265,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!parent)
                parent = debugfs_mount->mnt_root;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
                dput(dentry);
@@ -273,7 +273,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        }
 
        if (IS_ERR(dentry)) {
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        }
 
@@ -282,7 +282,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 
 static struct dentry *failed_creating(struct dentry *dentry)
 {
-       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+       inode_unlock(d_inode(dentry->d_parent));
        dput(dentry);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        return NULL;
@@ -290,7 +290,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
 
 static struct dentry *end_creating(struct dentry *dentry)
 {
-       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+       inode_unlock(d_inode(dentry->d_parent));
        return dentry;
 }
 
@@ -560,9 +560,9 @@ void debugfs_remove(struct dentry *dentry)
        if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        ret = __debugfs_remove(dentry, parent);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        if (!ret)
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
@@ -594,7 +594,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 
        parent = dentry;
  down:
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
  loop:
        /*
         * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -609,7 +609,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
                        spin_unlock(&parent->d_lock);
-                       mutex_unlock(&d_inode(parent)->i_mutex);
+                       inode_unlock(d_inode(parent));
                        parent = child;
                        goto down;
                }
@@ -630,10 +630,10 @@ void debugfs_remove_recursive(struct dentry *dentry)
        }
        spin_unlock(&parent->d_lock);
 
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        child = parent;
        parent = parent->d_parent;
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        if (child != dentry)
                /* go up */
@@ -641,7 +641,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 
        if (!__debugfs_remove(child, parent))
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 }
 EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
 
index c35ffdc12bbafd1874dcea43ec5094867daf3e0d..655f21f991606b5bf2bef600a54b814994dbed98 100644 (file)
@@ -255,7 +255,7 @@ static int mknod_ptmx(struct super_block *sb)
        if (!uid_valid(root_uid) || !gid_valid(root_gid))
                return -EINVAL;
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        /* If we have already created ptmx node, return */
        if (fsi->ptmx_dentry) {
@@ -292,7 +292,7 @@ static int mknod_ptmx(struct super_block *sb)
        fsi->ptmx_dentry = dentry;
        rc = 0;
 out:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        return rc;
 }
 
@@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
        mutex_unlock(&allocated_ptys_lock);
 }
 
+/*
+ * pty code needs to hold extra references in case of last /dev/tty close
+ */
+
+void devpts_add_ref(struct inode *ptmx_inode)
+{
+       struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+
+       atomic_inc(&sb->s_active);
+       ihold(ptmx_inode);
+}
+
+void devpts_del_ref(struct inode *ptmx_inode)
+{
+       struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+
+       iput(ptmx_inode);
+       deactivate_super(sb);
+}
+
 /**
  * devpts_pty_new -- create a new inode in /dev/pts/
  * @ptmx_inode: inode of the master
@@ -615,7 +635,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
 
        sprintf(s, "%d", index);
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_alloc_name(root, s);
        if (dentry) {
@@ -626,7 +646,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
                inode = ERR_PTR(-ENOMEM);
        }
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        return inode;
 }
@@ -671,7 +691,7 @@ void devpts_pty_kill(struct inode *inode)
 
        BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_find_alias(inode);
 
@@ -680,7 +700,7 @@ void devpts_pty_kill(struct inode *inode)
        dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
        dput(dentry);           /* d_find_alias above */
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 }
 
 static int __init init_devpts_fs(void)
index 602e8441bc0fb6b094ec96dfb3273ff73b8b1506..d6a9012d42ad570f231a1f48c6706c6af8d5a598 100644 (file)
@@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
                dio->io_error = -EIO;
 
        if (dio->is_async && dio->rw == READ && dio->should_dirty) {
-               bio_check_pages_dirty(bio);     /* transfers ownership */
                err = bio->bi_error;
+               bio_check_pages_dirty(bio);     /* transfers ownership */
        } else {
                bio_for_each_segment_all(bvec, bio, i) {
                        struct page *page = bvec->bv_page;
@@ -1157,12 +1157,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
                                        iocb->ki_filp->f_mapping;
 
                        /* will be released by direct_io_worker */
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
 
                        retval = filemap_write_and_wait_range(mapping, offset,
                                                              end - 1);
                        if (retval) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                kmem_cache_free(dio_cache, dio);
                                goto out;
                        }
@@ -1173,7 +1173,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        dio->i_size = i_size_read(inode);
        if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
                if (dio->flags & DIO_LOCKING)
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                kmem_cache_free(dio_cache, dio);
                retval = 0;
                goto out;
@@ -1295,7 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         * of protecting us from looking up uninitialized blocks.
         */
        if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
-               mutex_unlock(&dio->inode->i_mutex);
+               inode_unlock(dio->inode);
 
        /*
         * The only time we want to leave bios in flight is when a successful
index 1925d6d222b87f635b9925e05003a3c7c41d7580..58c2f4a21b7f4233b34b70efb0602b297d7f9012 100644 (file)
@@ -516,7 +516,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
                return -EINVAL;
 
        kbuf = memdup_user_nul(buf, count);
-       if (!IS_ERR(kbuf))
+       if (IS_ERR(kbuf))
                return PTR_ERR(kbuf);
 
        if (check_version(kbuf)) {
index 040aa879d634fe15d38f0ce9bf103c7b514122b4..4e685ac1024dc313e56ed8c8245fa5add9bb7c33 100644 (file)
@@ -41,13 +41,13 @@ static struct dentry *lock_parent(struct dentry *dentry)
        struct dentry *dir;
 
        dir = dget_parent(dentry);
-       mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
        return dir;
 }
 
 static void unlock_dir(struct dentry *dir)
 {
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(dir);
 }
 
@@ -397,11 +397,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        int rc = 0;
 
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dir_dentry));
        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
                                      lower_dir_dentry,
                                      ecryptfs_dentry->d_name.len);
-       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dir_dentry));
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -426,11 +426,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                       "filename; rc = [%d]\n", __func__, rc);
                goto out;
        }
-       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dir_dentry));
        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
                                      lower_dir_dentry,
                                      encrypted_and_encoded_name_size);
-       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dir_dentry));
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -869,9 +869,9 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
        if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
                struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 
-               mutex_lock(&d_inode(lower_dentry)->i_mutex);
+               inode_lock(d_inode(lower_dentry));
                rc = notify_change(lower_dentry, &lower_ia, NULL);
-               mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+               inode_unlock(d_inode(lower_dentry));
        }
        return rc;
 }
@@ -970,9 +970,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
                lower_ia.ia_valid &= ~ATTR_MODE;
 
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = notify_change(lower_dentry, &lower_ia, NULL);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        fsstack_copy_attr_all(inode, lower_inode);
        return rc;
@@ -1048,10 +1048,10 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
                                                   size);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
@@ -1075,9 +1075,9 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
@@ -1092,9 +1092,9 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
index caba848ac76335ef0f897cb7cb27c1bbf520c1de..c6ced4cbf0cff7d3b3f754ddbf0727d44bca8809 100644 (file)
@@ -436,7 +436,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
                rc = -ENOMEM;
                goto out;
        }
-       mutex_lock(&lower_inode->i_mutex);
+       inode_lock(lower_inode);
        size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
                                           xattr_virt, PAGE_CACHE_SIZE);
        if (size < 0)
@@ -444,7 +444,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
        put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
        rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
                                         xattr_virt, size, 0);
-       mutex_unlock(&lower_inode->i_mutex);
+       inode_unlock(lower_inode);
        if (rc)
                printk(KERN_ERR "Error whilst attempting to write inode size "
                       "to lower file xattr; rc = [%d]\n", rc);
index 90001da9abfdfe98b01a0eacdb5381425234b126..d48e0d261d78da6e1ce2830e65000e37ed4bd292 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/efi.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/mount.h>
 
 #include "internal.h"
 
@@ -50,9 +51,9 @@ static ssize_t efivarfs_file_write(struct file *file,
                d_delete(file->f_path.dentry);
                dput(file->f_path.dentry);
        } else {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, datasize + sizeof(attributes));
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        bytes = count;
@@ -103,9 +104,78 @@ out_free:
        return size;
 }
 
+static int
+efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+{
+       struct inode *inode = file->f_mapping->host;
+       unsigned int i_flags;
+       unsigned int flags = 0;
+
+       i_flags = inode->i_flags;
+       if (i_flags & S_IMMUTABLE)
+               flags |= FS_IMMUTABLE_FL;
+
+       if (copy_to_user(arg, &flags, sizeof(flags)))
+               return -EFAULT;
+       return 0;
+}
+
+static int
+efivarfs_ioc_setxflags(struct file *file, void __user *arg)
+{
+       struct inode *inode = file->f_mapping->host;
+       unsigned int flags;
+       unsigned int i_flags = 0;
+       int error;
+
+       if (!inode_owner_or_capable(inode))
+               return -EACCES;
+
+       if (copy_from_user(&flags, arg, sizeof(flags)))
+               return -EFAULT;
+
+       if (flags & ~FS_IMMUTABLE_FL)
+               return -EOPNOTSUPP;
+
+       if (!capable(CAP_LINUX_IMMUTABLE))
+               return -EPERM;
+
+       if (flags & FS_IMMUTABLE_FL)
+               i_flags |= S_IMMUTABLE;
+
+
+       error = mnt_want_write_file(file);
+       if (error)
+               return error;
+
+       inode_lock(inode);
+       inode_set_flags(inode, i_flags, S_IMMUTABLE);
+       inode_unlock(inode);
+
+       mnt_drop_write_file(file);
+
+       return 0;
+}
+
+long
+efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p)
+{
+       void __user *arg = (void __user *)p;
+
+       switch (cmd) {
+       case FS_IOC_GETFLAGS:
+               return efivarfs_ioc_getxflags(file, arg);
+       case FS_IOC_SETFLAGS:
+               return efivarfs_ioc_setxflags(file, arg);
+       }
+
+       return -ENOTTY;
+}
+
 const struct file_operations efivarfs_file_operations = {
        .open   = simple_open,
        .read   = efivarfs_file_read,
        .write  = efivarfs_file_write,
        .llseek = no_llseek,
+       .unlocked_ioctl = efivarfs_file_ioctl,
 };
index 3381b9da9ee6080881720f0500b71c74ecdf6058..e2ab6d0497f2bb86dee165421487d27e4b64ba0d 100644 (file)
@@ -15,7 +15,8 @@
 #include "internal.h"
 
 struct inode *efivarfs_get_inode(struct super_block *sb,
-                               const struct inode *dir, int mode, dev_t dev)
+                               const struct inode *dir, int mode,
+                               dev_t dev, bool is_removable)
 {
        struct inode *inode = new_inode(sb);
 
@@ -23,6 +24,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb,
                inode->i_ino = get_next_ino();
                inode->i_mode = mode;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+               inode->i_flags = is_removable ? 0 : S_IMMUTABLE;
                switch (mode & S_IFMT) {
                case S_IFREG:
                        inode->i_fop = &efivarfs_file_operations;
@@ -102,22 +104,17 @@ static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid)
 static int efivarfs_create(struct inode *dir, struct dentry *dentry,
                          umode_t mode, bool excl)
 {
-       struct inode *inode;
+       struct inode *inode = NULL;
        struct efivar_entry *var;
        int namelen, i = 0, err = 0;
+       bool is_removable = false;
 
        if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len))
                return -EINVAL;
 
-       inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
-       if (!inode)
-               return -ENOMEM;
-
        var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL);
-       if (!var) {
-               err = -ENOMEM;
-               goto out;
-       }
+       if (!var)
+               return -ENOMEM;
 
        /* length of the variable name itself: remove GUID and separator */
        namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1;
@@ -125,6 +122,16 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
        efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1,
                        &var->var.VendorGuid);
 
+       if (efivar_variable_is_removable(var->var.VendorGuid,
+                                        dentry->d_name.name, namelen))
+               is_removable = true;
+
+       inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0, is_removable);
+       if (!inode) {
+               err = -ENOMEM;
+               goto out;
+       }
+
        for (i = 0; i < namelen; i++)
                var->var.VariableName[i] = dentry->d_name.name[i];
 
@@ -138,7 +145,8 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
 out:
        if (err) {
                kfree(var);
-               iput(inode);
+               if (inode)
+                       iput(inode);
        }
        return err;
 }
index b5ff16addb7ce0984d14d2efe0324be884d20911..b4505188e799b9f5f50b2f792090775fd4244127 100644 (file)
@@ -15,7 +15,8 @@ extern const struct file_operations efivarfs_file_operations;
 extern const struct inode_operations efivarfs_dir_inode_operations;
 extern bool efivarfs_valid_name(const char *str, int len);
 extern struct inode *efivarfs_get_inode(struct super_block *sb,
-                       const struct inode *dir, int mode, dev_t dev);
+                       const struct inode *dir, int mode, dev_t dev,
+                       bool is_removable);
 
 extern struct list_head efivarfs_list;
 
index 86a2121828c312e53d64aedee9506319bbadf6c1..dd029d13ea6140f7df1ed99fb2634a9c858f801f 100644 (file)
@@ -118,8 +118,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
        struct dentry *dentry, *root = sb->s_root;
        unsigned long size = 0;
        char *name;
-       int len, i;
+       int len;
        int err = -ENOMEM;
+       bool is_removable = false;
 
        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
@@ -128,15 +129,17 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
        memcpy(entry->var.VariableName, name16, name_size);
        memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
 
-       len = ucs2_strlen(entry->var.VariableName);
+       len = ucs2_utf8size(entry->var.VariableName);
 
        /* name, plus '-', plus GUID, plus NUL*/
        name = kmalloc(len + 1 + EFI_VARIABLE_GUID_LEN + 1, GFP_KERNEL);
        if (!name)
                goto fail;
 
-       for (i = 0; i < len; i++)
-               name[i] = entry->var.VariableName[i] & 0xFF;
+       ucs2_as_utf8(name, entry->var.VariableName, len);
+
+       if (efivar_variable_is_removable(entry->var.VendorGuid, name, len))
+               is_removable = true;
 
        name[len] = '-';
 
@@ -144,7 +147,8 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
 
        name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
 
-       inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0);
+       inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
+                                  is_removable);
        if (!inode)
                goto fail_name;
 
@@ -160,10 +164,10 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
        efivar_entry_size(entry, &size);
        efivar_entry_add(entry, &efivarfs_list);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        inode->i_private = entry;
        i_size_write(inode, size + sizeof(entry->var.Attributes));
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        d_add(dentry, inode);
 
        return 0;
@@ -200,7 +204,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_d_op              = &efivarfs_d_ops;
        sb->s_time_gran         = 1;
 
-       inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0);
+       inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true);
        if (!inode)
                return -ENOMEM;
        inode->i_op = &efivarfs_dir_inode_operations;
index ae1dbcf47e979d48b67ee83e1ec413e4d119a48b..cde60741cad2c4cc429f04e80f29e788b0e0d673 100644 (file)
 /* Epoll private bits inside the event mask */
 #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
 
+#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
+
+#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
+                               EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
+
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
 
@@ -1068,7 +1073,22 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
         * wait list.
         */
        if (waitqueue_active(&ep->wq)) {
-               ewake = 1;
+               if ((epi->event.events & EPOLLEXCLUSIVE) &&
+                                       !((unsigned long)key & POLLFREE)) {
+                       switch ((unsigned long)key & EPOLLINOUT_BITS) {
+                       case POLLIN:
+                               if (epi->event.events & POLLIN)
+                                       ewake = 1;
+                               break;
+                       case POLLOUT:
+                               if (epi->event.events & POLLOUT)
+                                       ewake = 1;
+                               break;
+                       case 0:
+                               ewake = 1;
+                               break;
+                       }
+               }
                wake_up_locked(&ep->wq);
        }
        if (waitqueue_active(&ep->poll_wait))
@@ -1875,9 +1895,13 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
         * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
         * Also, we do not currently supported nested exclusive wakeups.
         */
-       if ((epds.events & EPOLLEXCLUSIVE) && (op == EPOLL_CTL_MOD ||
-               (op == EPOLL_CTL_ADD && is_file_epoll(tf.file))))
-               goto error_tgt_fput;
+       if (epds.events & EPOLLEXCLUSIVE) {
+               if (op == EPOLL_CTL_MOD)
+                       goto error_tgt_fput;
+               if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
+                               (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
+                       goto error_tgt_fput;
+       }
 
        /*
         * At this point it is safe to assume that the "private_data" contains
@@ -1950,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                break;
        case EPOLL_CTL_MOD:
                if (epi) {
-                       epds.events |= POLLERR | POLLHUP;
-                       error = ep_modify(ep, epi, &epds);
+                       if (!(epi->event.events & EPOLLEXCLUSIVE)) {
+                               epds.events |= POLLERR | POLLHUP;
+                               error = ep_modify(ep, epi, &epds);
+                       }
                } else
                        error = -ENOENT;
                break;
index 828ec5f07de00b7a21d016c022afc68ee0962331..dcd4ac7d3f1e77b45fde8a84585150a4862b84d9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1307,13 +1307,13 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
                return;
 
        /* Be careful if suid/sgid is set */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* reload atomically mode/uid/gid now that lock held */
        mode = inode->i_mode;
        uid = inode->i_uid;
        gid = inode->i_gid;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /* We ignore suid/sgid if there are no mappings for them in the ns */
        if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
index 906de66e8e7e067e6179aa3408c6e6aa8ea528b7..28645f0640f735b7f168b14258cc22c110bc0f1d 100644 (file)
@@ -52,9 +52,9 @@ static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = sync_inode_metadata(filp->f_mapping->host, 1);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 714cd37a6ba30fd970b8384a2c2b26fc5209351f..c46f1a190b8d9ecedd8157530ecfc3ec9ccf16c4 100644 (file)
@@ -124,10 +124,10 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
        int err;
 
        parent = ERR_PTR(-EACCES);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        if (mnt->mnt_sb->s_export_op->get_parent)
                parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
 
        if (IS_ERR(parent)) {
                dprintk("%s: get_parent of %ld failed, err %d\n",
@@ -143,9 +143,9 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
        if (err)
                goto out_err;
        dprintk("%s: found name: %s\n", __func__, nbuf);
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (IS_ERR(tmp)) {
                dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
                goto out_err;
@@ -503,10 +503,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
                 */
                err = exportfs_get_name(mnt, target_dir, nbuf, result);
                if (!err) {
-                       mutex_lock(&target_dir->d_inode->i_mutex);
+                       inode_lock(target_dir->d_inode);
                        nresult = lookup_one_len(nbuf, target_dir,
                                                 strlen(nbuf));
-                       mutex_unlock(&target_dir->d_inode->i_mutex);
+                       inode_unlock(target_dir->d_inode);
                        if (!IS_ERR(nresult)) {
                                if (nresult->d_inode) {
                                        dput(result);
index 11a42c5a09aee553bd85db0154be89dbd1b374cc..c1400b109805b8778d8ee947ecad3060539f5ebe 100644 (file)
@@ -80,30 +80,13 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
        return ret;
 }
 
-static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       struct inode *inode = file_inode(vma->vm_file);
-       struct ext2_inode_info *ei = EXT2_I(inode);
-       int ret;
-
-       sb_start_pagefault(inode->i_sb);
-       file_update_time(vma->vm_file);
-       down_read(&ei->dax_sem);
-
-       ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);
-
-       up_read(&ei->dax_sem);
-       sb_end_pagefault(inode->i_sb);
-       return ret;
-}
-
 static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
                struct vm_fault *vmf)
 {
        struct inode *inode = file_inode(vma->vm_file);
        struct ext2_inode_info *ei = EXT2_I(inode);
-       int ret = VM_FAULT_NOPAGE;
        loff_t size;
+       int ret;
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
@@ -113,6 +96,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
+       else
+               ret = dax_pfn_mkwrite(vma, vmf);
 
        up_read(&ei->dax_sem);
        sb_end_pagefault(inode->i_sb);
@@ -122,7 +107,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 static const struct vm_operations_struct ext2_dax_vm_ops = {
        .fault          = ext2_dax_fault,
        .pmd_fault      = ext2_dax_pmd_fault,
-       .page_mkwrite   = ext2_dax_mkwrite,
+       .page_mkwrite   = ext2_dax_fault,
        .pfn_mkwrite    = ext2_dax_pfn_mkwrite,
 };
 
index 338eefda70c6fc4e2055c0165b590febf1295acb..6bd58e6ff038657f75b93857bad455ebdfedfa0f 100644 (file)
@@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode,
                 * so that it's not found by another thread before it's
                 * initialised
                 */
-               err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
-                                               1 << inode->i_blkbits);
+               err = dax_clear_sectors(inode->i_sb->s_bdev,
+                               le32_to_cpu(chain[depth-1].key) <<
+                               (inode->i_blkbits - 9),
+                               1 << inode->i_blkbits);
                if (err) {
                        mutex_unlock(&ei->truncate_mutex);
                        goto cleanup;
@@ -874,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
+#ifdef CONFIG_FS_DAX
+       if (dax_mapping(mapping)) {
+               return dax_writeback_mapping_range(mapping,
+                                                  mapping->host->i_sb->s_bdev,
+                                                  wbc);
+       }
+#endif
+
        return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
@@ -1296,7 +1306,7 @@ void ext2_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_NOATIME;
        if (flags & EXT2_DIRSYNC_FL)
                inode->i_flags |= S_DIRSYNC;
-       if (test_opt(inode->i_sb, DAX))
+       if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
                inode->i_flags |= S_DAX;
 }
 
index 5d46c09863f09408258696171dfbec2b72eab994..b386af2e45f41fda4f965eafa5751ab10dc48bc3 100644 (file)
@@ -51,10 +51,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                flags = ext2_mask_flags(inode->i_mode, flags);
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                /* Is it quota file? Do not allow user to mess with it */
                if (IS_NOQUOTA(inode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        ret = -EPERM;
                        goto setflags_out;
                }
@@ -68,7 +68,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                 */
                if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                ret = -EPERM;
                                goto setflags_out;
                        }
@@ -80,7 +80,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                ext2_set_inode_flags(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                mark_inode_dirty(inode);
 setflags_out:
@@ -102,10 +102,10 @@ setflags_out:
                        goto setversion_out;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
                inode->i_generation = generation;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                mark_inode_dirty(inode);
 setversion_out:
index ec0668a60678d215dadc9baa97623ddbd3dbc160..fe1f50fe764ff9238354e2e30491c6e1e6d149b9 100644 (file)
@@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
        /* If checksum is bad mark all blocks used to prevent allocation
         * essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-               ext4_error(sb, "Checksum bad for group %u", block_group);
                grp = ext4_get_group_info(sb, block_group);
                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
                        percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
        }
        ext4_lock_group(sb, block_group);
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-
                err = ext4_init_block_bitmap(sb, bh, block_group, desc);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
-               if (err)
+               if (err) {
+                       ext4_error(sb, "Failed to init block bitmap for group "
+                                  "%u: %d", block_group, err);
                        goto out;
+               }
                goto verify;
        }
        ext4_unlock_group(sb, block_group);
index 1a0835073663ff3f1dad1f376328de5e2b1332ce..38f7562489bbb0ccdea3085bbfcbcf4098210203 100644 (file)
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
                                EXT4_DECRYPT, page->index, page, page);
 }
 
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len)
 {
        struct ext4_crypto_ctx  *ctx;
        struct page             *ciphertext_page = NULL;
        struct bio              *bio;
-       ext4_lblk_t             lblk = le32_to_cpu(ex->ee_block);
-       ext4_fsblk_t            pblk = ext4_ext_pblock(ex);
-       unsigned int            len = ext4_ext_get_actual_len(ex);
        int                     ret, err = 0;
 
 #if 0
@@ -469,3 +467,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
                return size;
        return 0;
 }
+
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+       struct inode *dir = d_inode(dentry->d_parent);
+       struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info;
+       int dir_has_key, cached_with_key;
+
+       if (!ext4_encrypted_inode(dir))
+               return 0;
+
+       if (ci && ci->ci_keyring_key &&
+           (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                                         (1 << KEY_FLAG_REVOKED) |
+                                         (1 << KEY_FLAG_DEAD))))
+               ci = NULL;
+
+       /* this should eventually be an flag in d_flags */
+       cached_with_key = dentry->d_fsdata != NULL;
+       dir_has_key = (ci != NULL);
+
+       /*
+        * If the dentry was cached without the key, and it is a
+        * negative dentry, it might be a valid name.  We can't check
+        * if the key has since been made available due to locking
+        * reasons, so we fail the validation so ext4_lookup() can do
+        * this check.
+        *
+        * We also fail the validation if the dentry was created with
+        * the key present, but we no longer have the key, or vice versa.
+        */
+       if ((!cached_with_key && d_is_negative(dentry)) ||
+           (!cached_with_key && dir_has_key) ||
+           (cached_with_key && !dir_has_key)) {
+#if 0                          /* Revalidation debug */
+               char buf[80];
+               char *cp = simple_dname(dentry, buf, sizeof(buf));
+
+               if (IS_ERR(cp))
+                       cp = (char *) "???";
+               pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
+                      cached_with_key, d_is_negative(dentry),
+                      dir_has_key);
+#endif
+               return 0;
+       }
+       return 1;
+}
+
+const struct dentry_operations ext4_encrypted_d_ops = {
+       .d_revalidate = ext4_d_revalidate,
+};
index c5882b36e5582d0005582d0fe3ac86cab70d9c9c..9a16d1e75a493f9e4663bb4ea05b9da9980ba65d 100644 (file)
@@ -213,9 +213,11 @@ retry:
                res = -ENOKEY;
                goto out;
        }
+       down_read(&keyring_key->sem);
        ukp = user_key_payload(keyring_key);
        if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
                res = -EINVAL;
+               up_read(&keyring_key->sem);
                goto out;
        }
        master_key = (struct ext4_encryption_key *)ukp->data;
@@ -226,10 +228,12 @@ retry:
                            "ext4: key size incorrect: %d\n",
                            master_key->size);
                res = -ENOKEY;
+               up_read(&keyring_key->sem);
                goto out;
        }
        res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
                                  raw_key);
+       up_read(&keyring_key->sem);
        if (res)
                goto out;
 got_key:
index 1d1bca74f84437172d96c26e648e6ed45e129725..33f5e2a50cf883c43842e1c89a868fcf70c4c2a9 100644 (file)
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
        int dir_has_error = 0;
        struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
 
+       if (ext4_encrypted_inode(inode)) {
+               err = ext4_get_encryption_info(inode);
+               if (err && err != -ENOKEY)
+                       return err;
+       }
+
        if (is_dx_dir(inode)) {
                err = ext4_dx_readdir(file, ctx);
                if (err != ERR_BAD_DX_DIR) {
@@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
                                        index, 1);
                        file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
                        bh = ext4_bread(NULL, inode, map.m_lblk, 0);
-                       if (IS_ERR(bh))
-                               return PTR_ERR(bh);
+                       if (IS_ERR(bh)) {
+                               err = PTR_ERR(bh);
+                               bh = NULL;
+                               goto errout;
+                       }
                }
 
                if (!bh) {
index cc7ca4e87144a540332213ce03b63e80e601f40e..157b458a69d4b7c334f28b80c37fd5d11c9f0c25 100644 (file)
@@ -378,14 +378,22 @@ struct flex_groups {
 #define EXT4_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
 #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE           0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE                0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE           0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE                0x204380FF /* User modifiable flags */
+
+#define EXT4_FL_XFLAG_VISIBLE          (EXT4_SYNC_FL | \
+                                        EXT4_IMMUTABLE_FL | \
+                                        EXT4_APPEND_FL | \
+                                        EXT4_NODUMP_FL | \
+                                        EXT4_NOATIME_FL | \
+                                        EXT4_PROJINHERIT_FL)
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
                           EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
-                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+                          EXT4_PROJINHERIT_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
 #define EXT4_GET_BLOCKS_NO_NORMALIZE           0x0040
        /* Request will not result in inode size update (user for fallocate) */
 #define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
-       /* Do not take i_data_sem locking in ext4_map_blocks */
-#define EXT4_GET_BLOCKS_NO_LOCK                        0x0100
        /* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0200
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0100
+       /* Write zeros to newly created written extents */
+#define EXT4_GET_BLOCKS_ZERO                   0x0200
+#define EXT4_GET_BLOCKS_CREATE_ZERO            (EXT4_GET_BLOCKS_CREATE |\
+                                       EXT4_GET_BLOCKS_ZERO)
 
 /*
  * The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
 #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
 #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
 
+#ifndef FS_IOC_FSGETXATTR
+/* Until the uapi changes get merged for project quota... */
+
+#define FS_IOC_FSGETXATTR              _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW('X', 32, struct fsxattr)
+
+/*
+ * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       unsigned char   fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this */
+#endif /* !defined(FS_IOC_FSGETXATTR) */
+
+#define EXT4_IOC_FSGETXATTR            FS_IOC_FSGETXATTR
+#define EXT4_IOC_FSSETXATTR            FS_IOC_FSSETXATTR
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
         * by other means, so we have i_data_sem.
         */
        struct rw_semaphore i_data_sem;
+       /*
+        * i_mmap_sem is for serializing page faults with truncate / punch hole
+        * operations. We have to make sure that new page cannot be faulted in
+        * a section of the inode that is being punched. We cannot easily use
+        * i_data_sem for this since we need protection for the whole punch
+        * operation and i_data_sem ranks below transaction start so we have
+        * to occasionally drop it.
+        */
+       struct rw_semaphore i_mmap_sem;
        struct inode vfs_inode;
        struct jbd2_inode *jinode;
 
@@ -993,6 +1052,7 @@ struct ext4_inode_info {
        /* Encryption params */
        struct ext4_crypt_info *i_crypt_info;
 #endif
+       kprojid_t i_projid;
 };
 
 /*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
 #endif
 
 /* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
 
 /*
  * fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt,              ENCRYPT)
                                         EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
                                         EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
                                         EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
-                                        EXT4_FEATURE_RO_COMPAT_QUOTA)
+                                        EXT4_FEATURE_RO_COMPAT_QUOTA |\
+                                        EXT4_FEATURE_RO_COMPAT_PROJECT)
 
 #define EXTN_FEATURE_FUNCS(ver) \
 static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
 #define        EXT4_DEF_RESUID         0
 #define        EXT4_DEF_RESGID         0
 
+/*
+ * Default project ID
+ */
+#define        EXT4_DEF_PROJID         0
+
 #define EXT4_DEF_INODE_READAHEAD_BLKS  32
 
 /*
@@ -2234,7 +2300,9 @@ void ext4_restore_control_page(struct page *data_page);
 struct page *ext4_encrypt(struct inode *inode,
                          struct page *plaintext_page);
 int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len);
+extern const struct dentry_operations ext4_encrypted_d_ops;
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_init_crypto(void);
@@ -2440,8 +2508,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_write(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create);
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                        struct buffer_head *bh_result, int create);
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
                                struct buffer_head *bh_result, int create);
 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
@@ -2484,9 +2552,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
                             loff_t lstart, loff_t lend);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
 extern void ext4_da_update_reserve_space(struct inode *inode,
                                        int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                             ext4_fsblk_t pblk, ext4_lblk_t len);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2825,7 +2897,7 @@ do {                                                              \
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
 {
        WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        down_write(&EXT4_I(inode)->i_data_sem);
        if (newsize > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = newsize;
@@ -2848,6 +2920,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
        return changed;
 }
 
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len);
+
 struct ext4_group_info {
        unsigned long   bb_state;
        struct rb_root  bb_free_root;
@@ -2986,8 +3061,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
                                         struct page *page);
 extern int ext4_try_add_inline_entry(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
-                                    struct inode *inode);
+                                    struct inode *dir, struct inode *inode);
 extern int ext4_try_create_inline_dir(handle_t *handle,
                                      struct inode *parent,
                                      struct inode *inode);
index 551353b1b17ab930ead8eff62ee3a699b4cfe10f..3753ceb0b0dd7b610c8e3ddefe4b992c07416fff 100644 (file)
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 {
        ext4_fsblk_t ee_pblock;
        unsigned int ee_len;
-       int ret;
 
        ee_len    = ext4_ext_get_actual_len(ex);
        ee_pblock = ext4_ext_pblock(ex);
-
-       if (ext4_encrypted_inode(inode))
-               return ext4_encrypted_zeroout(inode, ex);
-
-       ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
-       if (ret > 0)
-               ret = 0;
-
-       return ret;
+       return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+                                 ee_len);
 }
 
 /*
@@ -3936,7 +3928,7 @@ static int
 convert_initialized_extent(handle_t *handle, struct inode *inode,
                           struct ext4_map_blocks *map,
                           struct ext4_ext_path **ppath, int flags,
-                          unsigned int allocated, ext4_fsblk_t newblock)
+                          unsigned int allocated)
 {
        struct ext4_ext_path *path = *ppath;
        struct ext4_extent *ex;
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
        }
        /* IO end_io complete, convert the filled extent to written */
        if (flags & EXT4_GET_BLOCKS_CONVERT) {
+               if (flags & EXT4_GET_BLOCKS_ZERO) {
+                       if (allocated > map->m_len)
+                               allocated = map->m_len;
+                       err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
+                                                allocated);
+                       if (err < 0)
+                               goto out2;
+               }
                ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
                                                           ppath);
                if (ret >= 0) {
@@ -4347,7 +4347,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                            (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
                                allocated = convert_initialized_extent(
                                                handle, inode, map, &path,
-                                               flags, allocated, newblock);
+                                               flags, allocated);
                                goto out2;
                        } else if (!ext4_ext_is_unwritten(ex))
                                goto out;
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        if (len <= EXT_UNWRITTEN_MAX_LEN)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
-
        /*
         * credits to insert 1 extent into extent tree
         */
@@ -4752,8 +4748,6 @@ retry:
                goto retry;
        }
 
-       ext4_inode_resume_unlocked_dio(inode);
-
        return ret > 0 ? ret2 : ret;
 }
 
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        int partial_begin, partial_end;
        loff_t start, end;
        ext4_lblk_t lblk;
-       struct address_space *mapping = inode->i_mapping;
        unsigned int blkbits = inode->i_blkbits;
 
        trace_ext4_zero_range(inode, offset, len, mode);
@@ -4785,17 +4778,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                        return ret;
        }
 
-       /*
-        * Write out all dirty pages to avoid race conditions
-        * Then release them.
-        */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               ret = filemap_write_and_wait_range(mapping, offset,
-                                                  offset + len - 1);
-               if (ret)
-                       return ret;
-       }
-
        /*
         * Round up offset. This is not fallocate, we neet to zero out
         * blocks, so convert interior block aligned part of the range to
@@ -4817,7 +4799,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        else
                max_blocks -= lblk;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Indirect files do not support unwritten extnets
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        /* Preallocate the range including the unaligned edges */
        if (partial_begin || partial_end) {
                ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                                 round_down(offset, 1 << blkbits)) >> blkbits,
                                new_size, flags, mode);
                if (ret)
-                       goto out_mutex;
+                       goto out_dio;
 
        }
 
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
                          EXT4_EX_NOCACHE);
 
-               /* Now release the pages and zero block aligned part of pages*/
+               /*
+                * Prevent page faults from reinstantiating pages we have
+                * released from page cache.
+                */
+               down_write(&EXT4_I(inode)->i_mmap_sem);
+               ret = ext4_update_disksize_before_punch(inode, offset, len);
+               if (ret) {
+                       up_write(&EXT4_I(inode)->i_mmap_sem);
+                       goto out_dio;
+               }
+               /* Now release the pages and zero block aligned part of pages */
                truncate_pagecache_range(inode, start, end - 1);
                inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 
-               /* Wait all existing dio workers, newcomers will block on i_mutex */
-               ext4_inode_block_unlocked_dio(inode);
-               inode_dio_wait(inode);
-
                ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                             flags, mode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
                if (ret)
                        goto out_dio;
        }
@@ -4909,7 +4902,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 out_dio:
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -4980,7 +4973,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * We only support preallocation for extent-based files only
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                        goto out;
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                     flags, mode);
+       ext4_inode_resume_unlocked_dio(inode);
        if (ret)
                goto out;
 
@@ -5008,7 +5006,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                                                EXT4_I(inode)->i_sync_tid);
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
        return ret;
 }
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down offset to be aligned with page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                                          LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
-       mutex_lock(&inode->i_mutex);
-
+       inode_lock(inode);
        /*
         * There is no need to overlap collapse range with EOF, in which case
         * it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down offset to be aligned with page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /*
+        * Write tail of the last page before removed range since it will get
+        * removed from the page cache below.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+       if (ret)
+               goto out_mmap;
+       /*
+        * Write data that will be shifted to preserve them when discarding
+        * page cache below. We are also protected from pages becoming dirty
+        * by i_mmap_sem.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+                                          LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,10 +5583,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down to align start offset to page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                       LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
-       mutex_lock(&inode->i_mutex);
-
+       inode_lock(inode);
        /* Currently just for extent based files */
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down to align start offset to page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /* Write out all dirty pages */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                       LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        /* Expand file to avoid data loss if there is error while shifting */
@@ -5741,10 +5753,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -5779,8 +5792,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
 
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
-       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
-       BUG_ON(!mutex_is_locked(&inode2->i_mutex));
+       BUG_ON(!inode_is_locked(inode1));
+       BUG_ON(!inode_is_locked(inode2));
 
        *erp = ext4_es_remove_extent(inode1, lblk1, count);
        if (unlikely(*erp))
index 113837e7ba98d5cf866ee365a40a89d7fc781a71..4cd318f31cbeff21979d552cab7390c6df76a5c9 100644 (file)
@@ -113,7 +113,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                ext4_unwritten_wait(inode);
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret <= 0)
                goto out;
@@ -169,7 +169,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        }
 
        ret = __generic_file_write_iter(iocb, from);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (ret > 0) {
                ssize_t err;
@@ -186,50 +186,42 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        return ret;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (aio_mutex)
                mutex_unlock(aio_mutex);
        return ret;
 }
 
 #ifdef CONFIG_FS_DAX
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
-       struct inode *inode = bh->b_assoc_map->host;
-       /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
-       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
-       int err;
-       if (!uptodate)
-               return;
-       WARN_ON(!buffer_unwritten(bh));
-       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        int result;
        handle_t *handle = NULL;
-       struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
 
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                                EXT4_DATA_TRANS_BLOCKS(sb));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
-               result = __dax_fault(vma, vmf, ext4_get_block_dax,
-                                               ext4_end_io_unwritten);
+               result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
@@ -246,44 +238,73 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                ext4_chunk_trans_blocks(inode,
                                                        PMD_SIZE / PAGE_SIZE));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
                result = __dax_pmd_fault(vma, addr, pmd, flags,
-                               ext4_get_block_dax, ext4_end_io_unwritten);
+                               ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
 
-static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+                               struct vm_fault *vmf)
 {
-       return dax_mkwrite(vma, vmf, ext4_get_block_dax,
-                               ext4_end_io_unwritten);
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
+       loff_t size;
+       int ret;
+
+       sb_start_pagefault(sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       if (vmf->pgoff >= size)
+               ret = VM_FAULT_SIGBUS;
+       else
+               ret = dax_pfn_mkwrite(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(sb);
+
+       return ret;
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
        .fault          = ext4_dax_fault,
        .pmd_fault      = ext4_dax_pmd_fault,
-       .page_mkwrite   = ext4_dax_mkwrite,
-       .pfn_mkwrite    = dax_pfn_mkwrite,
+       .page_mkwrite   = ext4_dax_fault,
+       .pfn_mkwrite    = ext4_dax_pfn_mkwrite,
 };
 #else
 #define ext4_dax_vm_ops        ext4_file_vm_ops
 #endif
 
 static const struct vm_operations_struct ext4_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = ext4_filemap_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = ext4_page_mkwrite,
 };
@@ -314,6 +335,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
        struct super_block *sb = inode->i_sb;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct vfsmount *mnt = filp->f_path.mnt;
+       struct inode *dir = filp->f_path.dentry->d_parent->d_inode;
        struct path path;
        char buf[64], *cp;
        int ret;
@@ -357,6 +379,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                if (ext4_encryption_info(inode) == NULL)
                        return -ENOKEY;
        }
+       if (ext4_encrypted_inode(dir) &&
+           !ext4_is_child_context_consistent_with_parent(dir, inode)) {
+               ext4_warning(inode->i_sb,
+                            "Inconsistent encryption contexts: %lu/%lu\n",
+                            (unsigned long) dir->i_ino,
+                            (unsigned long) inode->i_ino);
+               return -EPERM;
+       }
        /*
         * Set up the jbd2_inode if we are opening the inode for
         * writing and the journal is present
@@ -527,11 +557,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
        int blkbits;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return -ENXIO;
        }
 
@@ -579,7 +609,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                dataoff = (loff_t)last << blkbits;
        } while (last <= end);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (dataoff > isize)
                return -ENXIO;
@@ -600,11 +630,11 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
        int blkbits;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return -ENXIO;
        }
 
@@ -655,7 +685,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                break;
        } while (last <= end);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (holeoff > isize)
                holeoff = isize;
index 1b8024d26f654c5458c7e84db5a2c439adc6500e..acc0ad56bf2f43c5c3a95502342e83a28f8b4148 100644 (file)
@@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb,
        /* If checksum is bad mark all blocks and inodes use to prevent
         * allocation, essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-               ext4_error(sb, "Checksum bad for group %u", block_group);
                grp = ext4_get_group_info(sb, block_group);
                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
                        percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                set_buffer_verified(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
-               if (err)
+               if (err) {
+                       ext4_error(sb, "Failed to init inode bitmap for group "
+                                  "%u: %d", block_group, err);
                        goto out;
+               }
                return bh;
        }
        ext4_unlock_group(sb, block_group);
@@ -799,6 +801,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
                inode->i_gid = dir->i_gid;
        } else
                inode_init_owner(inode, dir, mode);
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+               ei->i_projid = EXT4_I(dir)->i_projid;
+       else
+               ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
        err = dquot_initialize(inode);
        if (err)
                goto out;
index d884989cc83dd99238a710f8131ab38b1139c7ca..dfe3b9bafc0d2b9cb4957c189290f8da68fb4dc4 100644 (file)
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
  */
 static int ext4_add_dirent_to_inline(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
+                                    struct inode *dir,
                                     struct inode *inode,
                                     struct ext4_iloc *iloc,
                                     void *inline_start, int inline_size)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        int             err;
        struct ext4_dir_entry_2 *de;
 
@@ -1245,12 +1244,11 @@ out:
  * the new created block.
  */
 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
-                             struct dentry *dentry, struct inode *inode)
+                             struct inode *dir, struct inode *inode)
 {
        int ret, inline_size;
        void *inline_start;
        struct ext4_iloc iloc;
-       struct inode *dir = d_inode(dentry->d_parent);
 
        ret = ext4_get_inode_loc(dir, &iloc);
        if (ret)
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
                                                 EXT4_INLINE_DOTDOT_SIZE;
        inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
 
-       ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+       ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
                                        inline_start, inline_size);
        if (ret != -ENOSPC)
                goto out;
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
        if (inline_size) {
                inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
 
-               ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+               ret = ext4_add_dirent_to_inline(handle, fname, dir,
                                                inode, &iloc, inline_start,
                                                inline_size);
 
index b3bd912df6bfaf475f9304eba7fb1b9ce6368e87..aee960b1af347e3407dd05f5a7095a7af4d8a96e 100644 (file)
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
        return 0;
 }
 
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+                      ext4_lblk_t len)
+{
+       int ret;
+
+       if (ext4_encrypted_inode(inode))
+               return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+       ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+       if (ret > 0)
+               ret = 0;
+
+       return ret;
+}
+
 #define check_block_validity(inode, map)       \
        __check_block_validity((inode), __func__, __LINE__, (map))
 
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
         * out taking i_data_sem.  So at the time the unwritten extent
         * could be converted.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
                retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
        /*
         * We don't check m_len because extent will be collpased in status
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                if (ret < 0)
                        retval = ret;
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
 found:
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -625,6 +636,22 @@ found:
                        WARN_ON(1);
                }
 
+               /*
+                * We have to zeroout blocks before inserting them into extent
+                * status tree. Otherwise someone could look them up there and
+                * use them before they are really zeroed.
+                */
+               if (flags & EXT4_GET_BLOCKS_ZERO &&
+                   map->m_flags & EXT4_MAP_MAPPED &&
+                   map->m_flags & EXT4_MAP_NEW) {
+                       ret = ext4_issue_zeroout(inode, map->m_lblk,
+                                                map->m_pblk, map->m_len);
+                       if (ret) {
+                               retval = ret;
+                               goto out_sem;
+                       }
+               }
+
                /*
                 * If the extent has been zeroed out, we don't need to update
                 * extent status tree.
@@ -632,7 +659,7 @@ found:
                if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
                    ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                        if (ext4_es_is_written(&es))
-                               goto has_zeroout;
+                               goto out_sem;
                }
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -643,11 +670,13 @@ found:
                        status |= EXTENT_STATUS_DELAYED;
                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
                                            map->m_pblk, status);
-               if (ret < 0)
+               if (ret < 0) {
                        retval = ret;
+                       goto out_sem;
+               }
        }
 
-has_zeroout:
+out_sem:
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                ret = check_block_validity(inode, map);
@@ -657,6 +686,34 @@ has_zeroout:
        return retval;
 }
 
+/*
+ * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages
+ * we have to be careful as someone else may be manipulating b_state as well.
+ */
+static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
+{
+       unsigned long old_state;
+       unsigned long new_state;
+
+       flags &= EXT4_MAP_FLAGS;
+
+       /* Dummy buffer_head? Set non-atomically. */
+       if (!bh->b_page) {
+               bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags;
+               return;
+       }
+       /*
+        * Someone else may be modifying b_state. Be careful! This is ugly but
+        * once we get rid of using bh as a container for mapping information
+        * to pass to / from get_block functions, this can go away.
+        */
+       do {
+               old_state = READ_ONCE(bh->b_state);
+               new_state = (old_state & ~EXT4_MAP_FLAGS) | flags;
+       } while (unlikely(
+                cmpxchg(&bh->b_state, old_state, new_state) != old_state));
+}
+
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
 
@@ -674,7 +731,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
 
-       if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+       if (flags && !handle) {
                /* Direct IO write... */
                if (map.m_len > DIO_MAX_BLOCKS)
                        map.m_len = DIO_MAX_BLOCKS;
@@ -693,17 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
                ext4_io_end_t *io_end = ext4_inode_aio(inode);
 
                map_bh(bh, inode->i_sb, map.m_pblk);
-               bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
-               if (IS_DAX(inode) && buffer_unwritten(bh)) {
-                       /*
-                        * dgc: I suspect unwritten conversion on ext4+DAX is
-                        * fundamentally broken here when there are concurrent
-                        * read/write in progress on this inode.
-                        */
-                       WARN_ON_ONCE(io_end);
-                       bh->b_assoc_map = inode->i_mapping;
-                       bh->b_private = (void *)(unsigned long)iblock;
-               }
+               ext4_update_bh_state(bh, map.m_flags);
                if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                        set_buffer_defer_completion(bh);
                bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -879,9 +926,6 @@ int do_journal_get_write_access(handle_t *handle,
        return ret;
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create);
-
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
                                  get_block_t *get_block)
@@ -1669,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                return ret;
 
        map_bh(bh, inode->i_sb, map.m_pblk);
-       bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+       ext4_update_bh_state(bh, map.m_flags);
 
        if (buffer_unwritten(bh)) {
                /* A delayed write to unwritten bh should be marked
@@ -2434,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping,
 
        trace_ext4_writepages(inode, wbc);
 
+       if (dax_mapping(mapping))
+               return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
+                                                  wbc);
+
        /*
         * No pages to write? This is mainly a kludge to avoid starting
         * a transaction for special inodes like journal inode on last iput()
@@ -3054,25 +3102,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
                               EXT4_GET_BLOCKS_IO_CREATE_EXT);
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create)
 {
-       ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+       int ret;
+
+       ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result,
-                              EXT4_GET_BLOCKS_NO_LOCK);
+       ret = _ext4_get_block(inode, iblock, bh_result, 0);
+       /*
+        * Blocks should have been preallocated! ext4_file_write_iter() checks
+        * that.
+        */
+       WARN_ON_ONCE(!buffer_mapped(bh_result));
+
+       return ret;
 }
 
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+#ifdef CONFIG_FS_DAX
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create)
 {
-       int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+       int ret, err;
+       int credits;
+       struct ext4_map_blocks map;
+       handle_t *handle = NULL;
+       int flags = 0;
+
+       ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result, flags);
+       map.m_lblk = iblock;
+       map.m_len = bh_result->b_size >> inode->i_blkbits;
+       credits = ext4_chunk_trans_blocks(inode, map.m_len);
+       if (create) {
+               flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       return ret;
+               }
+       }
+
+       ret = ext4_map_blocks(handle, inode, &map, flags);
+       if (create) {
+               err = ext4_journal_stop(handle);
+               if (ret >= 0 && err < 0)
+                       ret = err;
+       }
+       if (ret <= 0)
+               goto out;
+       if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+               int err2;
+
+               /*
+                * We are protected by i_mmap_sem so we know block cannot go
+                * away from under us even though we dropped i_data_sem.
+                * Convert extent to written and write zeros there.
+                *
+                * Note: We may get here even when create == 0.
+                */
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       goto out;
+               }
+
+               err = ext4_map_blocks(handle, inode, &map,
+                     EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+               if (err < 0)
+                       ret = err;
+               err2 = ext4_journal_stop(handle);
+               if (err2 < 0 && ret > 0)
+                       ret = err2;
+       }
+out:
+       WARN_ON_ONCE(ret == 0 && create);
+       if (ret > 0) {
+               map_bh(bh_result, inode->i_sb, map.m_pblk);
+               bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+                                       map.m_flags;
+               /*
+                * At least for now we have to clear BH_New so that DAX code
+                * doesn't attempt to zero blocks again in a racy way.
+                */
+               bh_result->b_state &= ~(1 << BH_New);
+               bh_result->b_size = map.m_len << inode->i_blkbits;
+               ret = 0;
+       }
+       return ret;
 }
+#endif
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private)
@@ -3143,10 +3262,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        /* If we do a overwrite dio, i_mutex locking can be released */
        overwrite = *((int *)iocb->private);
 
-       if (overwrite) {
-               down_read(&EXT4_I(inode)->i_data_sem);
-               mutex_unlock(&inode->i_mutex);
-       }
+       if (overwrite)
+               inode_unlock(inode);
 
        /*
         * We could direct write to holes and fallocate.
@@ -3168,29 +3285,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         * case, we allocate an io_end structure to hook to the iocb.
         */
        iocb->private = NULL;
-       ext4_inode_aio_set(inode, NULL);
-       if (!is_sync_kiocb(iocb)) {
-               io_end = ext4_init_io_end(inode, GFP_NOFS);
-               if (!io_end) {
-                       ret = -ENOMEM;
-                       goto retake_lock;
-               }
-               /*
-                * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
-                */
-               iocb->private = ext4_get_io_end(io_end);
-               /*
-                * we save the io structure for current async direct
-                * IO, so that later ext4_map_blocks() could flag the
-                * io structure whether there is a unwritten extents
-                * needs to be converted when IO is completed.
-                */
-               ext4_inode_aio_set(inode, io_end);
-       }
-
        if (overwrite) {
-               get_block_func = ext4_get_block_write_nolock;
+               get_block_func = ext4_get_block_overwrite;
        } else {
+               ext4_inode_aio_set(inode, NULL);
+               if (!is_sync_kiocb(iocb)) {
+                       io_end = ext4_init_io_end(inode, GFP_NOFS);
+                       if (!io_end) {
+                               ret = -ENOMEM;
+                               goto retake_lock;
+                       }
+                       /*
+                        * Grab reference for DIO. Will be dropped in
+                        * ext4_end_io_dio()
+                        */
+                       iocb->private = ext4_get_io_end(io_end);
+                       /*
+                        * we save the io structure for current async direct
+                        * IO, so that later ext4_map_blocks() could flag the
+                        * io structure whether there is a unwritten extents
+                        * needs to be converted when IO is completed.
+                        */
+                       ext4_inode_aio_set(inode, io_end);
+               }
                get_block_func = ext4_get_block_write;
                dio_flags = DIO_LOCKING;
        }
@@ -3245,10 +3362,8 @@ retake_lock:
        if (iov_iter_rw(iter) == WRITE)
                inode_dio_end(inode);
        /* take i_mutex locking again if we do a ovewrite dio */
-       if (overwrite) {
-               up_read(&EXT4_I(inode)->i_data_sem);
-               mutex_lock(&inode->i_mutex);
-       }
+       if (overwrite)
+               inode_lock(inode);
 
        return ret;
 }
@@ -3558,6 +3673,35 @@ int ext4_can_truncate(struct inode *inode)
        return 0;
 }
 
+/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len)
+{
+       handle_t *handle;
+       loff_t size = i_size_read(inode);
+
+       WARN_ON(!inode_is_locked(inode));
+       if (offset > size || offset + len < size)
+               return 0;
+
+       if (EXT4_I(inode)->i_disksize >= size)
+               return 0;
+
+       handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+       ext4_update_i_disksize(inode, size);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+
+       return 0;
+}
+
 /*
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
  * associated with the given offset and length
@@ -3595,7 +3739,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                        return ret;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* No need to punch hole beyond i_size */
        if (offset >= inode->i_size)
@@ -3623,17 +3767,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        first_block_offset = round_up(offset, sb->s_blocksize);
        last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
        /* Now release the pages and zero block aligned part of pages*/
-       if (last_block_offset > first_block_offset)
+       if (last_block_offset > first_block_offset) {
+               ret = ext4_update_disksize_before_punch(inode, offset, length);
+               if (ret)
+                       goto out_dio;
                truncate_pagecache_range(inode, first_block_offset,
                                         last_block_offset);
-
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
+       }
 
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_writepage_trans_blocks(inode);
@@ -3680,19 +3833,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
-       /* Now release the pages again to reduce race window */
-       if (last_block_offset > first_block_offset)
-               truncate_pagecache_range(inode, first_block_offset,
-                                        last_block_offset);
-
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 out_stop:
        ext4_journal_stop(handle);
 out_dio:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -3762,7 +3911,7 @@ void ext4_truncate(struct inode *inode)
         * have i_mutex locked because it's not necessary.
         */
        if (!(inode->i_state & (I_NEW|I_FREEING)))
-               WARN_ON(!mutex_is_locked(&inode->i_mutex));
+               WARN_ON(!inode_is_locked(inode));
        trace_ext4_truncate_enter(inode);
 
        if (!ext4_can_truncate(inode))
@@ -4010,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode)
                new_fl |= S_NOATIME;
        if (flags & EXT4_DIRSYNC_FL)
                new_fl |= S_DIRSYNC;
-       if (test_opt(inode->i_sb, DAX))
+       if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
                new_fl |= S_DAX;
        inode_set_flags(inode, new_fl,
                        S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@ -4076,6 +4225,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
                EXT4_I(inode)->i_inline_off = 0;
 }
 
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+               return -EOPNOTSUPP;
+       *projid = EXT4_I(inode)->i_projid;
+       return 0;
+}
+
 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 {
        struct ext4_iloc iloc;
@@ -4087,6 +4244,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        int block;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        inode = iget_locked(sb, ino);
        if (!inode)
@@ -4136,12 +4294,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
        i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+       else
+               i_projid = EXT4_DEF_PROJID;
+
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
        i_uid_write(inode, i_uid);
        i_gid_write(inode, i_gid);
+       ei->i_projid = make_kprojid(&init_user_ns, i_projid);
        set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 
        ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
@@ -4440,6 +4606,7 @@ static int ext4_do_update_inode(handle_t *handle,
        int need_datasync = 0, set_large_file = 0;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        spin_lock(&ei->i_raw_lock);
 
@@ -4452,6 +4619,7 @@ static int ext4_do_update_inode(handle_t *handle,
        raw_inode->i_mode = cpu_to_le16(inode->i_mode);
        i_uid = i_uid_read(inode);
        i_gid = i_gid_read(inode);
+       i_projid = from_kprojid(&init_user_ns, ei->i_projid);
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
                raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4529,6 +4697,15 @@ static int ext4_do_update_inode(handle_t *handle,
                                cpu_to_le16(ei->i_extra_isize);
                }
        }
+
+       BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+              i_projid != EXT4_DEF_PROJID);
+
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               raw_inode->i_projid = cpu_to_le32(i_projid);
+
        ext4_inode_csum_set(inode, raw_inode, ei);
        spin_unlock(&ei->i_raw_lock);
        if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4824,6 +5001,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        } else
                                ext4_wait_for_tail_page_commit(inode);
                }
+               down_write(&EXT4_I(inode)->i_mmap_sem);
                /*
                 * Truncate pagecache after we've waited for commit
                 * in data=journal mode to make pages freeable.
@@ -4831,6 +5009,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                truncate_pagecache(inode, inode->i_size);
                if (shrink)
                        ext4_truncate(inode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
        }
 
        if (!rc) {
@@ -5279,6 +5458,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
@@ -5348,6 +5529,19 @@ retry_alloc:
 out_ret:
        ret = block_page_mkwrite_return(ret);
 out:
+       up_read(&EXT4_I(inode)->i_mmap_sem);
        sb_end_pagefault(inode->i_sb);
        return ret;
 }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       int err;
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = filemap_fault(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+
+       return err;
+}
index 5e872fd40e5e38655435fbcf91802244c9ce8959..eae5917c534e51b2fddf7bcebfa1293ed76a2841 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mount.h>
 #include <linux/file.h>
 #include <linux/random.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16])
        return 1;
 }
 
+static int ext4_ioctl_setflags(struct inode *inode,
+                              unsigned int flags)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       handle_t *handle = NULL;
+       int err = -EPERM, migrate = 0;
+       struct ext4_iloc iloc;
+       unsigned int oldflags, mask, i;
+       unsigned int jflag;
+
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto flags_out;
+
+       oldflags = ei->i_flags;
+
+       /* The JOURNAL_DATA flag is modifiable only by root */
+       jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+       /*
+        * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+        * the relevant capability.
+        *
+        * This test looks nicer. Thanks to Pauline Middelink
+        */
+       if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+               if (!capable(CAP_LINUX_IMMUTABLE))
+                       goto flags_out;
+       }
+
+       /*
+        * The JOURNAL_DATA flag can only be changed by
+        * the relevant capability.
+        */
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+               if (!capable(CAP_SYS_RESOURCE))
+                       goto flags_out;
+       }
+       if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+               migrate = 1;
+
+       if (flags & EXT4_EOFBLOCKS_FL) {
+               /* we don't support adding EOFBLOCKS flag */
+               if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+                       err = -EOPNOTSUPP;
+                       goto flags_out;
+               }
+       } else if (oldflags & EXT4_EOFBLOCKS_FL)
+               ext4_truncate(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto flags_out;
+       }
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto flags_err;
+
+       for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+               if (!(mask & EXT4_FL_USER_MODIFIABLE))
+                       continue;
+               if (mask & flags)
+                       ext4_set_inode_flag(inode, i);
+               else
+                       ext4_clear_inode_flag(inode, i);
+       }
+
+       ext4_set_inode_flags(inode);
+       inode->i_ctime = ext4_current_time(inode);
+
+       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+       ext4_journal_stop(handle);
+       if (err)
+               goto flags_out;
+
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+               err = ext4_change_inode_journal_flag(inode, jflag);
+       if (err)
+               goto flags_out;
+       if (migrate) {
+               if (flags & EXT4_EXTENTS_FL)
+                       err = ext4_ext_migrate(inode);
+               else
+                       err = ext4_ind_migrate(inode);
+       }
+
+flags_out:
+       return err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       struct inode *inode = file_inode(filp);
+       struct super_block *sb = inode->i_sb;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       int err, rc;
+       handle_t *handle;
+       kprojid_t kprojid;
+       struct ext4_iloc iloc;
+       struct ext4_inode *raw_inode;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+               if (projid != EXT4_DEF_PROJID)
+                       return -EOPNOTSUPP;
+               else
+                       return 0;
+       }
+
+       if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
+               return -EOPNOTSUPP;
+
+       kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+
+       if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
+               return 0;
+
+       err = mnt_want_write_file(filp);
+       if (err)
+               return err;
+
+       err = -EPERM;
+       inode_lock(inode);
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto out_unlock;
+
+       err = ext4_get_inode_loc(inode, &iloc);
+       if (err)
+               goto out_unlock;
+
+       raw_inode = ext4_raw_inode(&iloc);
+       if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+               err = -EOVERFLOW;
+               brelse(iloc.bh);
+               goto out_unlock;
+       }
+       brelse(iloc.bh);
+
+       dquot_initialize(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+               EXT4_QUOTA_INIT_BLOCKS(sb) +
+               EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto out_unlock;
+       }
+
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto out_stop;
+
+       if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
+               struct dquot *transfer_to[MAXQUOTAS] = { };
+
+               transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+               if (transfer_to[PRJQUOTA]) {
+                       err = __dquot_transfer(inode, transfer_to);
+                       dqput(transfer_to[PRJQUOTA]);
+                       if (err)
+                               goto out_dirty;
+               }
+       }
+       EXT4_I(inode)->i_projid = kprojid;
+       inode->i_ctime = ext4_current_time(inode);
+out_dirty:
+       rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+       if (!err)
+               err = rc;
+out_stop:
+       ext4_journal_stop(handle);
+out_unlock:
+       inode_unlock(inode);
+       mnt_drop_write_file(filp);
+       return err;
+}
+#else
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       if (projid != EXT4_DEF_PROJID)
+               return -EOPNOTSUPP;
+       return 0;
+}
+#endif
+
+/* Transfer internal flags to xflags */
+static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
+{
+       __u32 xflags = 0;
+
+       if (iflags & EXT4_SYNC_FL)
+               xflags |= FS_XFLAG_SYNC;
+       if (iflags & EXT4_IMMUTABLE_FL)
+               xflags |= FS_XFLAG_IMMUTABLE;
+       if (iflags & EXT4_APPEND_FL)
+               xflags |= FS_XFLAG_APPEND;
+       if (iflags & EXT4_NODUMP_FL)
+               xflags |= FS_XFLAG_NODUMP;
+       if (iflags & EXT4_NOATIME_FL)
+               xflags |= FS_XFLAG_NOATIME;
+       if (iflags & EXT4_PROJINHERIT_FL)
+               xflags |= FS_XFLAG_PROJINHERIT;
+       return xflags;
+}
+
+/* Transfer xflags flags to internal */
+static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
+{
+       unsigned long iflags = 0;
+
+       if (xflags & FS_XFLAG_SYNC)
+               iflags |= EXT4_SYNC_FL;
+       if (xflags & FS_XFLAG_IMMUTABLE)
+               iflags |= EXT4_IMMUTABLE_FL;
+       if (xflags & FS_XFLAG_APPEND)
+               iflags |= EXT4_APPEND_FL;
+       if (xflags & FS_XFLAG_NODUMP)
+               iflags |= EXT4_NODUMP_FL;
+       if (xflags & FS_XFLAG_NOATIME)
+               iflags |= EXT4_NOATIME_FL;
+       if (xflags & FS_XFLAG_PROJINHERIT)
+               iflags |= EXT4_PROJINHERIT_FL;
+
+       return iflags;
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
                return put_user(flags, (int __user *) arg);
        case EXT4_IOC_SETFLAGS: {
-               handle_t *handle = NULL;
-               int err, migrate = 0;
-               struct ext4_iloc iloc;
-               unsigned int oldflags, mask, i;
-               unsigned int jflag;
+               int err;
 
                if (!inode_owner_or_capable(inode))
                        return -EACCES;
@@ -235,90 +464,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                flags = ext4_mask_flags(inode->i_mode, flags);
 
-               err = -EPERM;
-               mutex_lock(&inode->i_mutex);
-               /* Is it quota file? Do not allow user to mess with it */
-               if (IS_NOQUOTA(inode))
-                       goto flags_out;
-
-               oldflags = ei->i_flags;
-
-               /* The JOURNAL_DATA flag is modifiable only by root */
-               jflag = flags & EXT4_JOURNAL_DATA_FL;
-
-               /*
-                * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-                * the relevant capability.
-                *
-                * This test looks nicer. Thanks to Pauline Middelink
-                */
-               if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-                       if (!capable(CAP_LINUX_IMMUTABLE))
-                               goto flags_out;
-               }
-
-               /*
-                * The JOURNAL_DATA flag can only be changed by
-                * the relevant capability.
-                */
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-                       if (!capable(CAP_SYS_RESOURCE))
-                               goto flags_out;
-               }
-               if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
-                       migrate = 1;
-
-               if (flags & EXT4_EOFBLOCKS_FL) {
-                       /* we don't support adding EOFBLOCKS flag */
-                       if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
-                               err = -EOPNOTSUPP;
-                               goto flags_out;
-                       }
-               } else if (oldflags & EXT4_EOFBLOCKS_FL)
-                       ext4_truncate(inode);
-
-               handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
-               if (IS_ERR(handle)) {
-                       err = PTR_ERR(handle);
-                       goto flags_out;
-               }
-               if (IS_SYNC(inode))
-                       ext4_handle_sync(handle);
-               err = ext4_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       goto flags_err;
-
-               for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
-                       if (!(mask & EXT4_FL_USER_MODIFIABLE))
-                               continue;
-                       if (mask & flags)
-                               ext4_set_inode_flag(inode, i);
-                       else
-                               ext4_clear_inode_flag(inode, i);
-               }
-
-               ext4_set_inode_flags(inode);
-               inode->i_ctime = ext4_current_time(inode);
-
-               err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
-               ext4_journal_stop(handle);
-               if (err)
-                       goto flags_out;
-
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
-                       err = ext4_change_inode_journal_flag(inode, jflag);
-               if (err)
-                       goto flags_out;
-               if (migrate) {
-                       if (flags & EXT4_EXTENTS_FL)
-                               err = ext4_ext_migrate(inode);
-                       else
-                               err = ext4_ind_migrate(inode);
-               }
-
-flags_out:
-               mutex_unlock(&inode->i_mutex);
+               inode_lock(inode);
+               err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
                mnt_drop_write_file(filp);
                return err;
        }
@@ -349,7 +497,7 @@ flags_out:
                        goto setversion_out;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
                if (IS_ERR(handle)) {
                        err = PTR_ERR(handle);
@@ -364,7 +512,7 @@ flags_out:
                ext4_journal_stop(handle);
 
 unlock_out:
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 setversion_out:
                mnt_drop_write_file(filp);
                return err;
@@ -435,6 +583,11 @@ group_extend_out:
                                 "Online defrag not supported with bigalloc");
                        err = -EOPNOTSUPP;
                        goto mext_out;
+               } else if (IS_DAX(inode)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online defrag not supported with DAX");
+                       err = -EOPNOTSUPP;
+                       goto mext_out;
                }
 
                err = mnt_want_write_file(filp);
@@ -510,9 +663,9 @@ group_add_out:
                 * ext4_ext_swap_inode_data before we switch the
                 * inode format to prevent read.
                 */
-               mutex_lock(&(inode->i_mutex));
+               inode_lock((inode));
                err = ext4_ext_migrate(inode);
-               mutex_unlock(&(inode->i_mutex));
+               inode_unlock((inode));
                mnt_drop_write_file(filp);
                return err;
        }
@@ -689,6 +842,60 @@ encryption_policy_out:
                return -EOPNOTSUPP;
 #endif
        }
+       case EXT4_IOC_FSGETXATTR:
+       {
+               struct fsxattr fa;
+
+               memset(&fa, 0, sizeof(struct fsxattr));
+               ext4_get_inode_flags(ei);
+               fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
+
+               if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                               EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+                       fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
+                               EXT4_I(inode)->i_projid);
+               }
+
+               if (copy_to_user((struct fsxattr __user *)arg,
+                                &fa, sizeof(fa)))
+                       return -EFAULT;
+               return 0;
+       }
+       case EXT4_IOC_FSSETXATTR:
+       {
+               struct fsxattr fa;
+               int err;
+
+               if (copy_from_user(&fa, (struct fsxattr __user *)arg,
+                                  sizeof(fa)))
+                       return -EFAULT;
+
+               /* Make sure caller has proper permission */
+               if (!inode_owner_or_capable(inode))
+                       return -EACCES;
+
+               err = mnt_want_write_file(filp);
+               if (err)
+                       return err;
+
+               flags = ext4_xflags_to_iflags(fa.fsx_xflags);
+               flags = ext4_mask_flags(inode->i_mode, flags);
+
+               inode_lock(inode);
+               flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+                        (flags & EXT4_FL_XFLAG_VISIBLE);
+               err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
+               mnt_drop_write_file(filp);
+               if (err)
+                       return err;
+
+               err = ext4_ioctl_setproject(filp, fa.fsx_projid);
+               if (err)
+                       return err;
+
+               return 0;
+       }
        default:
                return -ENOTTY;
        }
index 61eaf74dca3794b122a9e782e87c568ac99a8a65..4424b7bf8ac64e431a11570e247a1d828634d29e 100644 (file)
@@ -2285,7 +2285,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
        if (group == 0)
                seq_puts(seq, "#group: free  frags first ["
                              " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
-                             " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]");
+                             " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
 
        i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
                sizeof(struct ext4_group_info);
index fb6f11709ae62b4d0452352422bb6f3f50660ba3..e032a0423e351cabfd6fe6156cdfc9c53ebff216 100644 (file)
@@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        ext4_lblk_t orig_blk_offset, donor_blk_offset;
        unsigned long blocksize = orig_inode->i_sb->s_blocksize;
        unsigned int tmp_data_size, data_size, replaced_size;
-       int err2, jblocks, retries = 0;
+       int i, err2, jblocks, retries = 0;
        int replaced_count = 0;
        int from = data_offset_in_page << orig_inode->i_blkbits;
        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
        struct super_block *sb = orig_inode->i_sb;
+       struct buffer_head *bh = NULL;
 
        /*
         * It needs twice the amount of ordinary journal buffers because
@@ -380,8 +381,16 @@ data_copy:
        }
        /* Perform all necessary steps similar write_begin()/write_end()
         * but keeping in mind that i_size will not change */
-       *err = __block_write_begin(pagep[0], from, replaced_size,
-                                  ext4_get_block);
+       if (!page_has_buffers(pagep[0]))
+               create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
+       bh = page_buffers(pagep[0]);
+       for (i = 0; i < data_offset_in_page; i++)
+               bh = bh->b_this_page;
+       for (i = 0; i < block_len_in_page; i++) {
+               *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
+               if (*err < 0)
+                       break;
+       }
        if (!*err)
                *err = block_commit_write(pagep[0], from, from + replaced_size);
 
index f27e0c2598c59edb5685c27310c2766d0860838d..48e4b8907826eca52a1e94e14bdfdb0fd2240c56 100644 (file)
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
                struct ext4_filename *fname,
                struct ext4_dir_entry_2 **res_dir);
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode);
+                            struct inode *dir, struct inode *inode);
 
 /* checksumming functions */
 void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
        struct ext4_dir_entry_2 *de;
        struct buffer_head *bh;
 
+       if (ext4_encrypted_inode(dir)) {
+               int res = ext4_get_encryption_info(dir);
+
+               /*
+                * This should be a properly defined flag for
+                * dentry->d_flags when we uplift this to the VFS.
+                * d_fsdata is set to (void *) 1 if if the dentry is
+                * created while the directory was encrypted and we
+                * don't have access to the key.
+                */
+              dentry->d_fsdata = NULL;
+              if (ext4_encryption_info(dir))
+                      dentry->d_fsdata = (void *) 1;
+              d_set_d_op(dentry, &ext4_encrypted_d_ops);
+              if (res && res != -ENOKEY)
+                      return ERR_PTR(res);
+       }
+
        if (dentry->d_name.len > EXT4_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
 
@@ -1585,11 +1603,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
                        return ERR_PTR(-EFSCORRUPTED);
                }
                if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
-                   (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-                    S_ISLNK(inode->i_mode)) &&
+                   (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
                    !ext4_is_child_context_consistent_with_parent(dir,
                                                                  inode)) {
+                       int nokey = ext4_encrypted_inode(inode) &&
+                               !ext4_encryption_info(inode);
+
                        iput(inode);
+                       if (nokey)
+                               return ERR_PTR(-ENOKEY);
                        ext4_warning(inode->i_sb,
                                     "Inconsistent encryption contexts: %lu/%lu\n",
                                     (unsigned long) dir->i_ino,
@@ -1928,10 +1950,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
  * directory, and adds the dentry to the indexed directory.
  */
 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
-                           struct dentry *dentry,
+                           struct inode *dir,
                            struct inode *inode, struct buffer_head *bh)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        struct buffer_head *bh2;
        struct dx_root  *root;
        struct dx_frame frames[2], *frame;
@@ -2086,8 +2107,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                return retval;
 
        if (ext4_has_inline_data(dir)) {
-               retval = ext4_try_add_inline_entry(handle, &fname,
-                                                  dentry, inode);
+               retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
                if (retval < 0)
                        goto out;
                if (retval == 1) {
@@ -2097,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        }
 
        if (is_dx(dir)) {
-               retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+               retval = ext4_dx_add_entry(handle, &fname, dir, inode);
                if (!retval || (retval != ERR_BAD_DX_DIR))
                        goto out;
                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2119,7 +2139,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 
                if (blocks == 1 && !dx_fallback &&
                    ext4_has_feature_dir_index(sb)) {
-                       retval = make_indexed_dir(handle, &fname, dentry,
+                       retval = make_indexed_dir(handle, &fname, dir,
                                                  inode, bh);
                        bh = NULL; /* make_indexed_dir releases bh */
                        goto out;
@@ -2154,12 +2174,11 @@ out:
  * Returns 0 for success, or a negative error value
  */
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode)
+                            struct inode *dir, struct inode *inode)
 {
        struct dx_frame frames[2], *frame;
        struct dx_entry *entries, *at;
        struct buffer_head *bh;
-       struct inode *dir = d_inode(dentry->d_parent);
        struct super_block *sb = dir->i_sb;
        struct ext4_dir_entry_2 *de;
        int err;
@@ -2756,7 +2775,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
                return 0;
 
        WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        /*
         * Exit early if inode already is on orphan list. This is a big speedup
         * since we don't have to contend on the global s_orphan_lock.
@@ -2838,7 +2857,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
                return 0;
 
        WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        /* Do this quick check before taking global s_orphan_lock. */
        if (list_empty(&ei->i_orphan))
                return 0;
@@ -3212,6 +3231,12 @@ static int ext4_link(struct dentry *old_dentry,
        if (ext4_encrypted_inode(dir) &&
            !ext4_is_child_context_consistent_with_parent(dir, inode))
                return -EPERM;
+
+       if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+          (!projid_eq(EXT4_I(dir)->i_projid,
+                      EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        err = dquot_initialize(dir);
        if (err)
                return err;
@@ -3492,6 +3517,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        int credits;
        u8 old_file_type;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+           (!projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
@@ -3701,6 +3731,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                           new.inode)))
                return -EPERM;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)) ||
+           (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(old_dir)->i_projid,
+                       EXT4_I(new_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
index ad62d7acc31578df85c3b97ac7839295a4454008..34038e3598d59fa2b4bcaf2304d31602e803d5e0 100644 (file)
@@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
        if (flex_gd == NULL)
                goto out3;
 
-       if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
+       if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
                goto out2;
        flex_gd->count = flexbg_size;
 
index f1b56ff0120894e3e4cc79886928ea7852ac5e25..3ed01ec011d75067579a4b894b4b0623af265a2d 100644 (file)
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 static void ext4_clear_request_list(void);
 
+/*
+ * Lock ordering
+ *
+ * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+ * i_mmap_rwsem (inode->i_mmap_rwsem)!
+ *
+ * page fault path:
+ * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ *   page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ *   i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
+
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 static struct file_system_type ext2_fs_type = {
        .owner          = THIS_MODULE,
@@ -958,6 +988,7 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&ei->i_orphan);
        init_rwsem(&ei->xattr_sem);
        init_rwsem(&ei->i_data_sem);
+       init_rwsem(&ei->i_mmap_sem);
        inode_init_once(&ei->vfs_inode);
 }
 
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 }
 
 #ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
 
 static int ext4_write_dquot(struct dquot *dquot);
 static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
        .write_info     = ext4_write_info,
        .alloc_dquot    = dquot_alloc,
        .destroy_dquot  = dquot_destroy,
+       .get_projid     = ext4_get_projid,
 };
 
 static const struct quotactl_ops ext4_qctl_operations = {
@@ -2254,10 +2286,10 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                                        __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                  inode->i_ino, inode->i_size);
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        truncate_inode_pages(inode->i_mapping, inode->i_size);
                        ext4_truncate(inode);
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        nr_truncates++;
                } else {
                        if (test_opt(sb, DEBUG))
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
                         "without CONFIG_QUOTA");
                return 0;
        }
+       if (ext4_has_feature_project(sb) && !readonly) {
+               ext4_msg(sb, KERN_ERR,
+                        "Filesystem with project quota feature cannot be mounted RDWR "
+                        "without CONFIG_QUOTA");
+               return 0;
+       }
 #endif  /* CONFIG_QUOTA */
        return 1;
 }
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sb->s_qcop = &dquot_quotactl_sysfile_ops;
        else
                sb->s_qcop = &ext4_qctl_operations;
-       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
@@ -4790,6 +4828,48 @@ restore_opts:
        return err;
 }
 
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+                              kprojid_t projid, struct kstatfs *buf)
+{
+       struct kqid qid;
+       struct dquot *dquot;
+       u64 limit;
+       u64 curblock;
+
+       qid = make_kqid_projid(projid);
+       dquot = dqget(sb, qid);
+       if (IS_ERR(dquot))
+               return PTR_ERR(dquot);
+       spin_lock(&dq_data_lock);
+
+       limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+                dquot->dq_dqb.dqb_bsoftlimit :
+                dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+       if (limit && buf->f_blocks > limit) {
+               curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+               buf->f_blocks = limit;
+               buf->f_bfree = buf->f_bavail =
+                       (buf->f_blocks > curblock) ?
+                        (buf->f_blocks - curblock) : 0;
+       }
+
+       limit = dquot->dq_dqb.dqb_isoftlimit ?
+               dquot->dq_dqb.dqb_isoftlimit :
+               dquot->dq_dqb.dqb_ihardlimit;
+       if (limit && buf->f_files > limit) {
+               buf->f_files = limit;
+               buf->f_ffree =
+                       (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+                        (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+       }
+
+       spin_unlock(&dq_data_lock);
+       dqput(dquot);
+       return 0;
+}
+#endif
+
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 
+#ifdef CONFIG_QUOTA
+       if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+           sb_has_quota_limits_enabled(sb, PRJQUOTA))
+               ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
        return 0;
 }
 
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
        struct inode *qf_inode;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        BUG_ON(!ext4_has_feature_quota(sb));
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
        int type, err = 0;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
index 011ba6670d990285f6f61b56ea3fb8b421b70e59..c70d06a383e28819cc556f1027ea118272e5f738 100644 (file)
  */
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        truncate_inode_pages(inode->i_mapping, inode->i_size);
        ext4_truncate(inode);
+       up_write(&EXT4_I(inode)->i_mmap_sem);
 }
 
 /*
index ac9e7c6aac74df601ddf3043c7ce364ad0cf2294..5c06db17e41fa267f5b270061d2959b2a36803e4 100644 (file)
@@ -794,7 +794,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        return ret;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (start >= isize)
@@ -860,7 +860,7 @@ out:
        if (ret == 1)
                ret = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 18ddb1e5182a16e016224d449c6eb281c582b00e..ea272be62677004d29c8018691ea442c2c992bb1 100644 (file)
@@ -333,7 +333,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
        loff_t isize;
        int err = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize)
@@ -388,10 +388,10 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 found:
        if (whence == SEEK_HOLE && data_ofs > isize)
                data_ofs = isize;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return vfs_setpos(file, data_ofs, maxbytes);
 fail:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return -ENXIO;
 }
 
@@ -1219,7 +1219,7 @@ static long f2fs_fallocate(struct file *file, int mode,
                        FALLOC_FL_INSERT_RANGE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                if (offset >= inode->i_size)
@@ -1243,7 +1243,7 @@ static long f2fs_fallocate(struct file *file, int mode,
        }
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        trace_f2fs_fallocate(inode, mode, offset, len, ret);
        return ret;
@@ -1307,13 +1307,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
        flags = f2fs_mask_flags(inode->i_mode, flags);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        oldflags = fi->i_flags;
 
        if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
                if (!capable(CAP_LINUX_IMMUTABLE)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        ret = -EPERM;
                        goto out;
                }
@@ -1322,7 +1322,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
        flags = flags & FS_FL_USER_MODIFIABLE;
        flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
        fi->i_flags = flags;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        f2fs_set_inode_flags(inode);
        inode->i_ctime = CURRENT_TIME;
@@ -1667,7 +1667,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 
        f2fs_balance_fs(sbi, true);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* writeback all dirty pages in the range */
        err = filemap_write_and_wait_range(inode->i_mapping, range->start,
@@ -1778,7 +1778,7 @@ do_map:
 clear_out:
        clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!err)
                range->len = (u64)total << PAGE_CACHE_SHIFT;
        return err;
index 7def96caec5f7fc4abd82e51746bce4dce1f30ee..d0b95c95079bbdc085a9746226427ceb3805bc45 100644 (file)
@@ -769,7 +769,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
 
        buf.dirent = dirent;
        buf.result = 0;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        buf.ctx.pos = file->f_pos;
        ret = -ENOENT;
        if (!IS_DEADDIR(inode)) {
@@ -777,7 +777,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
                                    short_only, both ? &buf : NULL);
                file->f_pos = buf.ctx.pos;
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret >= 0)
                ret = buf.result;
        return ret;
index 43d3475da83a79c8857e0861bf298df38a9d7ba7..f7018566883241bc65a981cf0d6e533e4d331241 100644 (file)
@@ -24,9 +24,9 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 {
        u32 attr;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        attr = fat_make_attrs(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return put_user(attr, user_attr);
 }
@@ -47,7 +47,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        err = mnt_want_write_file(file);
        if (err)
                goto out;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -109,7 +109,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        fat_save_attrs(inode, attr);
        mark_inode_dirty(inode);
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(file);
 out:
        return err;
@@ -246,7 +246,7 @@ static long fat_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (mode & FALLOC_FL_KEEP_SIZE) {
                ondisksize = inode->i_blocks << 9;
                if ((offset + len) <= ondisksize)
@@ -272,7 +272,7 @@ static long fat_fallocate(struct file *file, int mode,
        }
 
 error:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 5797d45a78cb29fddcff617a8fc03139ed609369..c5618db110be454781d6c7b8a70f8284fa00a477 100644 (file)
@@ -46,9 +46,9 @@ void put_filesystem(struct file_system_type *fs)
 static struct file_system_type **find_filesystem(const char *name, unsigned len)
 {
        struct file_system_type **p;
-       for (p=&file_systems; *p; p=&(*p)->next)
-               if (strlen((*p)->name) == len &&
-                   strncmp((*p)->name, name, len) == 0)
+       for (p = &file_systems; *p; p = &(*p)->next)
+               if (strncmp((*p)->name, name, len) == 0 &&
+                   !(*p)->name[len])
                        break;
        return p;
 }
index 6915c950e6e8aeefb7c6162e7e7410de98f4e309..1f76d8950a57fc7bbdb3f8d98ec056fe89970b5c 100644 (file)
@@ -317,6 +317,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
        struct inode_switch_wbs_context *isw =
                container_of(work, struct inode_switch_wbs_context, work);
        struct inode *inode = isw->inode;
+       struct super_block *sb = inode->i_sb;
        struct address_space *mapping = inode->i_mapping;
        struct bdi_writeback *old_wb = inode->i_wb;
        struct bdi_writeback *new_wb = isw->new_wb;
@@ -423,6 +424,7 @@ skip_switch:
        wb_put(new_wb);
 
        iput(inode);
+       deactivate_super(sb);
        kfree(isw);
 }
 
@@ -469,11 +471,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 
        /* while holding I_WB_SWITCH, no one else can update the association */
        spin_lock(&inode->i_lock);
+
        if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
-           inode_to_wb(inode) == isw->new_wb) {
-               spin_unlock(&inode->i_lock);
-               goto out_free;
-       }
+           inode_to_wb(inode) == isw->new_wb)
+               goto out_unlock;
+
+       if (!atomic_inc_not_zero(&inode->i_sb->s_active))
+               goto out_unlock;
+
        inode->i_state |= I_WB_SWITCH;
        spin_unlock(&inode->i_lock);
 
@@ -489,6 +494,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
        call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
        return;
 
+out_unlock:
+       spin_unlock(&inode->i_lock);
 out_free:
        if (isw->new_wb)
                wb_put(isw->new_wb);
index 712601f299b8a1436d69f0a31362ab57032f24b7..4b855b65d4577a2fd15389e22ddb40b2993f2e7a 100644 (file)
@@ -944,7 +944,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        if (!parent)
                return -ENOENT;
 
-       mutex_lock(&parent->i_mutex);
+       inode_lock(parent);
        if (!S_ISDIR(parent->i_mode))
                goto unlock;
 
@@ -962,7 +962,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        fuse_invalidate_entry(entry);
 
        if (child_nodeid != 0 && d_really_is_positive(entry)) {
-               mutex_lock(&d_inode(entry)->i_mutex);
+               inode_lock(d_inode(entry));
                if (get_node_id(d_inode(entry)) != child_nodeid) {
                        err = -ENOENT;
                        goto badentry;
@@ -983,7 +983,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
                clear_nlink(d_inode(entry));
                err = 0;
  badentry:
-               mutex_unlock(&d_inode(entry)->i_mutex);
+               inode_unlock(d_inode(entry));
                if (!err)
                        d_delete(entry);
        } else {
@@ -992,7 +992,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        dput(entry);
 
  unlock:
-       mutex_unlock(&parent->i_mutex);
+       inode_unlock(parent);
        iput(parent);
        return err;
 }
@@ -1504,7 +1504,7 @@ void fuse_set_nowrite(struct inode *inode)
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
 
-       BUG_ON(!mutex_is_locked(&inode->i_mutex));
+       BUG_ON(!inode_is_locked(inode));
 
        spin_lock(&fc->lock);
        BUG_ON(fi->writectr < 0);
index aa03aab6a24f085b210e39b9d093c84c76235c28..b03d253ece159c4340765e46993127a53354b21b 100644 (file)
@@ -207,7 +207,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
                return err;
 
        if (lock_inode)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        err = fuse_do_open(fc, get_node_id(inode), file, isdir);
 
@@ -215,7 +215,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
                fuse_finish_open(inode, file);
 
        if (lock_inode)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        return err;
 }
@@ -413,9 +413,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        fuse_sync_writes(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
@@ -450,7 +450,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
        if (is_bad_inode(inode))
                return -EIO;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Start writeback against all dirty pages of the inode, then
@@ -486,7 +486,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
                err = 0;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
@@ -1160,7 +1160,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return generic_file_write_iter(iocb, from);
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1210,7 +1210,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        }
 out:
        current->backing_dev_info = NULL;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return written ? written : err;
 }
@@ -1322,10 +1322,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 
        if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
                if (!write)
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                fuse_sync_writes(inode);
                if (!write)
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
        }
 
        while (count) {
@@ -1413,14 +1413,14 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return -EIO;
 
        /* Don't allow parallel writes to the same file */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        res = generic_write_checks(iocb, from);
        if (res > 0)
                res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
        fuse_invalidate_attr(inode);
        if (res > 0)
                fuse_write_update_size(inode, iocb->ki_pos);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return res;
 }
@@ -2287,17 +2287,17 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
                retval = generic_file_llseek(file, offset, whence);
                break;
        case SEEK_END:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = fuse_update_attributes(inode, NULL, file, NULL);
                if (!retval)
                        retval = generic_file_llseek(file, offset, whence);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                break;
        case SEEK_HOLE:
        case SEEK_DATA:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = fuse_lseek(file, offset, whence);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                break;
        default:
                retval = -EINVAL;
@@ -2944,7 +2944,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
                return -EOPNOTSUPP;
 
        if (lock_inode) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (mode & FALLOC_FL_PUNCH_HOLE) {
                        loff_t endbyte = offset + length - 1;
                        err = filemap_write_and_wait_range(inode->i_mapping,
@@ -2990,7 +2990,7 @@ out:
                clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
        if (lock_inode)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        return err;
 }
index 7412863cda1e52736552ef0deea6aa84ff8c844f..c9384f932975efb8075d2fd2013116e8be9f055a 100644 (file)
@@ -914,7 +914,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
        if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
@@ -946,7 +946,7 @@ out_unlock:
        gfs2_glock_dq(&gh);
 out_uninit:
        gfs2_holder_uninit(&gh);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 3e94400d587c4e1efdea6afee130b7b35c0a16f5..352f958769e15b150575c54b09d82b51d34f258e 100644 (file)
@@ -2067,7 +2067,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
        if (ret)
@@ -2094,7 +2094,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
        gfs2_glock_dq_uninit(&gh);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index be6d9c450b22d212cfcdfa8d42e07f76705347e2..a398913442591892f6b7e24e14d2b1a8e30b7394 100644 (file)
@@ -888,7 +888,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                return -ENOMEM;
 
        sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
-       mutex_lock(&ip->i_inode.i_mutex);
+       inode_lock(&ip->i_inode);
        for (qx = 0; qx < num_qd; qx++) {
                error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
                                           GL_NOCACHE, &ghs[qx]);
@@ -953,7 +953,7 @@ out_alloc:
 out:
        while (qx--)
                gfs2_glock_dq_uninit(&ghs[qx]);
-       mutex_unlock(&ip->i_inode.i_mutex);
+       inode_unlock(&ip->i_inode);
        kfree(ghs);
        gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
        return error;
@@ -1674,7 +1674,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
        if (error)
                goto out_put;
 
-       mutex_lock(&ip->i_inode.i_mutex);
+       inode_lock(&ip->i_inode);
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
        if (error)
                goto out_unlockput;
@@ -1739,7 +1739,7 @@ out_i:
 out_q:
        gfs2_glock_dq_uninit(&q_gh);
 out_unlockput:
-       mutex_unlock(&ip->i_inode.i_mutex);
+       inode_unlock(&ip->i_inode);
 out_put:
        qd_put(qd);
        return error;
index 70788e03820ae3e1c34689c95493798c20781928..e9f2b855f83160d3798e4ef62f6d86a7f0e69547 100644 (file)
@@ -173,9 +173,9 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
 {
        struct hfs_readdir_data *rd = file->private_data;
        if (rd) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                list_del(&rd->list);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                kfree(rd);
        }
        return 0;
index b99ebddb10cb5b5e2d422c0f2151fc926455d26b..6686bf39a5b5a0b4bb83de60527cd873b612fc35 100644 (file)
@@ -570,13 +570,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
        if (HFS_IS_RSRC(inode))
                inode = HFS_I(inode)->rsrc_inode;
        if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                hfs_file_truncate(inode);
                //if (inode->i_flags & S_DEAD) {
                //      hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
                //      hfs_delete_inode(inode);
                //}
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
@@ -656,7 +656,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret)
                return ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* sync the inode to buffers */
        ret = write_inode_now(inode, 0);
@@ -668,7 +668,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        err = sync_blockdev(sb->s_bdev);
        if (!ret)
                ret = err;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index d0f39dcbb58e86057c91c2e32a9d538178da39c5..a4e867e089478ddd3edaf24ce9be7d485d66ad34 100644 (file)
@@ -284,9 +284,9 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file)
 {
        struct hfsplus_readdir_data *rd = file->private_data;
        if (rd) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                list_del(&rd->list);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                kfree(rd);
        }
        return 0;
index 19b33f8151f1ab2cb0957b7e5772ff1206659aad..1a6394cdb54ef59520dbcd7f7577aee5d93d2561 100644 (file)
@@ -229,14 +229,14 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
        if (HFSPLUS_IS_RSRC(inode))
                inode = HFSPLUS_I(inode)->rsrc_inode;
        if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                hfsplus_file_truncate(inode);
                if (inode->i_flags & S_DEAD) {
                        hfsplus_delete_cat(inode->i_ino,
                                           HFSPLUS_SB(sb)->hidden_dir, NULL);
                        hfsplus_delete_inode(inode);
                }
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
@@ -286,7 +286,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
        error = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (error)
                return error;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Sync inode metadata into the catalog and extent trees.
@@ -327,7 +327,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
        if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return error;
 }
index 0624ce4e07022e3f047683a70cbbf1cdad6ff174..32a49e292b6a6b5d94421eaa8eb59541d15b12b5 100644 (file)
@@ -93,7 +93,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
                goto out_drop_write;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
            inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
@@ -126,7 +126,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
        mark_inode_dirty(inode);
 
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out_drop_write:
        mnt_drop_write_file(file);
 out:
index cfaa18c7a33796e0ee330d7b67e4b98bb6a95072..d1abbee281d19d8f51f1417bdb56f2adebc178f0 100644 (file)
@@ -378,9 +378,9 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index dc540bfcee1d8e7842b6b0f0ca402214f9bf5609..e57a53c13d864a74431ee4a98653df083009a317 100644 (file)
@@ -33,7 +33,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
        if (whence == SEEK_DATA || whence == SEEK_HOLE)
                return -EINVAL;
 
-       mutex_lock(&i->i_mutex);
+       inode_lock(i);
        hpfs_lock(s);
 
        /*pr_info("dir lseek\n");*/
@@ -48,12 +48,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
 ok:
        filp->f_pos = new_off;
        hpfs_unlock(s);
-       mutex_unlock(&i->i_mutex);
+       inode_unlock(i);
        return new_off;
 fail:
        /*pr_warn("illegal lseek: %016llx\n", new_off);*/
        hpfs_unlock(s);
-       mutex_unlock(&i->i_mutex);
+       inode_unlock(i);
        return -ESPIPE;
 }
 
index 506765afa1a3135d1ff9bccc48f06634439fa69e..bb8d67e2740ac70d7c9c87c078429475f9e0b79d 100644 (file)
@@ -376,12 +376,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
        struct inode *inode = d_inode(dentry);
        dnode_secno dno;
        int r;
-       int rep = 0;
        int err;
 
        hpfs_lock(dir->i_sb);
        hpfs_adjust_length(name, &len);
-again:
+
        err = -ENOENT;
        de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
        if (!de)
@@ -401,33 +400,9 @@ again:
                hpfs_error(dir->i_sb, "there was error when removing dirent");
                err = -EFSERROR;
                break;
-       case 2:         /* no space for deleting, try to truncate file */
-
+       case 2:         /* no space for deleting */
                err = -ENOSPC;
-               if (rep++)
-                       break;
-
-               dentry_unhash(dentry);
-               if (!d_unhashed(dentry)) {
-                       hpfs_unlock(dir->i_sb);
-                       return -ENOSPC;
-               }
-               if (generic_permission(inode, MAY_WRITE) ||
-                   !S_ISREG(inode->i_mode) ||
-                   get_write_access(inode)) {
-                       d_rehash(dentry);
-               } else {
-                       struct iattr newattrs;
-                       /*pr_info("truncating file before delete.\n");*/
-                       newattrs.ia_size = 0;
-                       newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-                       err = notify_change(dentry, &newattrs, NULL);
-                       put_write_access(inode);
-                       if (!err)
-                               goto again;
-               }
-               hpfs_unlock(dir->i_sb);
-               return -ENOSPC;
+               break;
        default:
                drop_nlink(inode);
                err = 0;
index 8bbf7f3e2a27e0669e7f7fd5624a2cb5b50ce0fe..e1f465a389d5be1b27f8fd98451312bb94b454b8 100644 (file)
@@ -141,7 +141,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
        vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        file_accessed(file);
 
        ret = -ENOMEM;
@@ -157,7 +157,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        if (vma->vm_flags & VM_WRITE && inode->i_size < len)
                inode->i_size = len;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
@@ -530,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        if (hole_end > hole_start) {
                struct address_space *mapping = inode->i_mapping;
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_mmap_lock_write(mapping);
                if (!RB_EMPTY_ROOT(&mapping->i_mmap))
                        hugetlb_vmdelete_list(&mapping->i_mmap,
@@ -538,7 +538,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                                                hole_end  >> PAGE_SHIFT);
                i_mmap_unlock_write(mapping);
                remove_inode_hugepages(inode, hole_start, hole_end);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        return 0;
@@ -572,7 +572,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
        start = offset >> hpage_shift;
        end = (offset + len + hpage_size - 1) >> hpage_shift;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
        error = inode_newsize_ok(inode, offset + len);
@@ -659,7 +659,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                i_size_write(inode, offset + len);
        inode->i_ctime = CURRENT_TIME;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 
index e491e54d243025999e42fdec674b082a765188fd..69b8b526c1946c455c8b5ad0c8939542d9c2e82d 100644 (file)
@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        inode->i_rdev = 0;
        inode->dirtied_when = 0;
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+       inode->i_wb_frn_winner = 0;
+       inode->i_wb_frn_avg_time = 0;
+       inode->i_wb_frn_history = 0;
+#endif
+
        if (security_inode_alloc(inode))
                goto out;
        spin_lock_init(&inode->i_lock);
@@ -495,7 +501,7 @@ void clear_inode(struct inode *inode)
         */
        spin_lock_irq(&inode->i_data.tree_lock);
        BUG_ON(inode->i_data.nrpages);
-       BUG_ON(inode->i_data.nrshadows);
+       BUG_ON(inode->i_data.nrexceptional);
        spin_unlock_irq(&inode->i_data.tree_lock);
        BUG_ON(!list_empty(&inode->i_data.private_list));
        BUG_ON(!(inode->i_state & I_FREEING));
@@ -966,9 +972,9 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
                swap(inode1, inode2);
 
        if (inode1 && !S_ISDIR(inode1->i_mode))
-               mutex_lock(&inode1->i_mutex);
+               inode_lock(inode1);
        if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
-               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
+               inode_lock_nested(inode2, I_MUTEX_NONDIR2);
 }
 EXPORT_SYMBOL(lock_two_nondirectories);
 
@@ -980,9 +986,9 @@ EXPORT_SYMBOL(lock_two_nondirectories);
 void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 {
        if (inode1 && !S_ISDIR(inode1->i_mode))
-               mutex_unlock(&inode1->i_mutex);
+               inode_unlock(inode1);
        if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
-               mutex_unlock(&inode2->i_mutex);
+               inode_unlock(inode2);
 }
 EXPORT_SYMBOL(unlock_two_nondirectories);
 
index 29466c380958ac411f4dcd46706837f2c55b03cd..116a333e9c773c10b76a8030c268284869c2533e 100644 (file)
@@ -434,9 +434,9 @@ int generic_block_fiemap(struct inode *inode,
                         u64 len, get_block_t *get_block)
 {
        int ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 EXPORT_SYMBOL(generic_block_fiemap);
index a3750f902adcbae8f8cabbad066f9d629b805155..0ae91ad6df2dc916f077495876ec7fe1e47902ed 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
 #include "nodelist.h"
 
 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
        return 0;
 
  out_free:
-#ifndef __ECOS
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-#endif
-               kfree(c->blocks);
+       kvfree(c->blocks);
 
        return ret;
 }
index f509f62e12f6ef85e95b5c159353cba757ef4af1..c5ac5944bc1bd8ef1362e5911656f3bfb68ea70b 100644 (file)
@@ -39,10 +39,10 @@ int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Trigger GC to flush any pending writes for this inode */
        jffs2_flush_wbuf_gc(c, inode->i_ino);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 2caf1682036dc1914aaa887565b0a54edbe0ed35..bead25ae8fe4a563d257995102da2cbe7336eeb3 100644 (file)
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 out_root:
        jffs2_free_ino_caches(c);
        jffs2_free_raw_node_refs(c);
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-               kfree(c->blocks);
+       kvfree(c->blocks);
  out_inohash:
        jffs2_clear_xattr_subsystem(c);
        kfree(c->inocache_list);
index bb080c272149312ea382ee578ffdaa903620a43a..0a9a114bb9d1138b3060365986304cdb2df1428f 100644 (file)
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
 
        jffs2_free_ino_caches(c);
        jffs2_free_raw_node_refs(c);
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-               kfree(c->blocks);
+       kvfree(c->blocks);
        jffs2_flash_cleanup(c);
        kfree(c->inocache_list);
        jffs2_clear_xattr_subsystem(c);
index 0e026a7bdcd4d9512b8074016b9f56a31de3b8f9..4ce7735dd042267bd086fd420066592e3b6855d3 100644 (file)
@@ -38,17 +38,17 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (rc)
                return rc;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (!(inode->i_state & I_DIRTY_ALL) ||
            (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
                /* Make sure committed changes hit the disk */
                jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return rc;
        }
 
        rc |= jfs_commit_inode(inode, 1);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return rc ? -EIO : 0;
 }
index 8db8b7d61e4048cf97ac289f263bc78c8685294d..8653cac7e12eed46e0bc360741004dfd3f1ce54d 100644 (file)
@@ -96,7 +96,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                }
 
                /* Lock against other parallel changes of flags */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
                jfs_get_inode_flags(jfs_inode);
                oldflags = jfs_inode->mode2;
@@ -109,7 +109,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        ((flags ^ oldflags) &
                        (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                err = -EPERM;
                                goto setflags_out;
                        }
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                jfs_inode->mode2 = flags;
 
                jfs_set_inode_flags(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
 setflags_out:
index 900925b5eb8c225b151b58cf8133ee6a6e11e1b1..4f5d85ba8e237e91d3f314b330e53e26ad2cc22b 100644 (file)
@@ -792,7 +792,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
        struct buffer_head tmp_bh;
        struct buffer_head *bh;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        while (towrite > 0) {
                tocopy = sb->s_blocksize - offset < towrite ?
                                sb->s_blocksize - offset : towrite;
@@ -824,7 +824,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
        }
 out:
        if (len == towrite) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return err;
        }
        if (inode->i_size < off+len-towrite)
@@ -832,7 +832,7 @@ out:
        inode->i_version++;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        mark_inode_dirty(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return len - towrite;
 }
 
index 821973853340a106874da673618bf57bfbcc1178..996b7742c90b2eaf1e8b154ae3cc580293d6f94b 100644 (file)
@@ -1511,9 +1511,9 @@ static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
        struct inode *inode = file_inode(file);
        loff_t ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_file_llseek(file, offset, whence);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index 01491299f348c965adc27cbcd70340ab2d980946..0ca80b2af42015c309718b3328315471808cfa4c 100644 (file)
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(dcache_dir_close);
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry *dentry = file->f_path.dentry;
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -97,7 +97,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -124,7 +124,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&dentry->d_lock);
                }
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return offset;
 }
 EXPORT_SYMBOL(dcache_dir_lseek);
@@ -941,7 +941,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = sync_mapping_buffers(inode->i_mapping);
        if (!(inode->i_state & I_DIRTY_ALL))
                goto out;
@@ -953,7 +953,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
                ret = err;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 EXPORT_SYMBOL(__generic_file_fsync);
index af1ed74a657fbc93f64386bcad54d9826bc34684..7c5f91be9b65c4ddabe441d00bbda616ca0c2f26 100644 (file)
@@ -1650,12 +1650,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
         * bother, maybe that's a sign this just isn't a good file to
         * hand out a delegation on.
         */
-       if (is_deleg && !mutex_trylock(&inode->i_mutex))
+       if (is_deleg && !inode_trylock(inode))
                return -EAGAIN;
 
        if (is_deleg && arg == F_WRLCK) {
                /* Write delegations are not currently supported: */
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                WARN_ON_ONCE(1);
                return -EINVAL;
        }
@@ -1732,7 +1732,7 @@ out:
        spin_unlock(&ctx->flc_lock);
        locks_dispose_list(&dispose);
        if (is_deleg)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        if (!error && !my_fl)
                *flp = NULL;
        return error;
index 1a6f0167b16a7a4b657138ed7c366e8fe3151dfd..61eaeb1b6cac10e664539b2132607180d8fe24d0 100644 (file)
@@ -204,12 +204,12 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                if (err)
                        return err;
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                oldflags = li->li_flags;
                flags &= LOGFS_FL_USER_MODIFIABLE;
                flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
                li->li_flags = flags;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                inode->i_ctime = CURRENT_TIME;
                mark_inode_dirty_sync(inode);
@@ -230,11 +230,11 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        logfs_get_wblocks(sb, NULL, WF_LOCK);
        logfs_write_anchor(sb);
        logfs_put_wblocks(sb, NULL, WF_LOCK);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index bceefd5588a2bf908626a0a6282a5f810ecfc5c1..9c590e0f66e9c2189f360bf1b742f54f969713aa 100644 (file)
@@ -1629,9 +1629,9 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
        parent = nd->path.dentry;
        BUG_ON(nd->inode != parent->d_inode);
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        dentry = __lookup_hash(&nd->last, parent, nd->flags);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
        path->mnt = nd->path.mnt;
@@ -1712,6 +1712,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link,
                return 0;
        if (!follow)
                return 0;
+       /* make sure that d_is_symlink above matches inode */
+       if (nd->flags & LOOKUP_RCU) {
+               if (read_seqcount_retry(&link->dentry->d_seq, seq))
+                       return -ECHILD;
+       }
        return pick_link(nd, link, inode, seq);
 }
 
@@ -1743,11 +1748,11 @@ static int walk_component(struct nameidata *nd, int flags)
                if (err < 0)
                        return err;
 
-               inode = d_backing_inode(path.dentry);
                seq = 0;        /* we are already out of RCU mode */
                err = -ENOENT;
                if (d_is_negative(path.dentry))
                        goto out_path_put;
+               inode = d_backing_inode(path.dentry);
        }
 
        if (flags & WALK_PUT)
@@ -2229,10 +2234,10 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
                putname(filename);
                return ERR_PTR(-EINVAL);
        }
-       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        d = __lookup_hash(&last, path->dentry, 0);
        if (IS_ERR(d)) {
-               mutex_unlock(&path->dentry->d_inode->i_mutex);
+               inode_unlock(path->dentry->d_inode);
                path_put(path);
        }
        putname(filename);
@@ -2282,7 +2287,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        unsigned int c;
        int err;
 
-       WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(base->d_inode));
 
        this.name = name;
        this.len = len;
@@ -2380,9 +2385,9 @@ struct dentry *lookup_one_len_unlocked(const char *name,
        if (ret)
                return ret;
 
-       mutex_lock(&base->d_inode->i_mutex);
+       inode_lock(base->d_inode);
        ret =  __lookup_hash(&this, base, 0);
-       mutex_unlock(&base->d_inode->i_mutex);
+       inode_unlock(base->d_inode);
        return ret;
 }
 EXPORT_SYMBOL(lookup_one_len_unlocked);
@@ -2463,7 +2468,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                goto done;
        }
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        dentry = d_lookup(dir, &nd->last);
        if (!dentry) {
                /*
@@ -2473,16 +2478,16 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                 */
                dentry = d_alloc(dir, &nd->last);
                if (!dentry) {
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       inode_unlock(dir->d_inode);
                        return -ENOMEM;
                }
                dentry = lookup_real(dir->d_inode, dentry, nd->flags);
                if (IS_ERR(dentry)) {
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       inode_unlock(dir->d_inode);
                        return PTR_ERR(dentry);
                }
        }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
 done:
        if (d_is_negative(dentry)) {
@@ -2672,7 +2677,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
        struct dentry *p;
 
        if (p1 == p2) {
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
                return NULL;
        }
 
@@ -2680,29 +2685,29 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 
        p = d_ancestor(p2, p1);
        if (p) {
-               mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+               inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
                return p;
        }
 
        p = d_ancestor(p1, p2);
        if (p) {
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+               inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
                return p;
        }
 
-       mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-       mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
+       inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+       inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
        return NULL;
 }
 EXPORT_SYMBOL(lock_rename);
 
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
-       mutex_unlock(&p1->d_inode->i_mutex);
+       inode_unlock(p1->d_inode);
        if (p1 != p2) {
-               mutex_unlock(&p2->d_inode->i_mutex);
+               inode_unlock(p2->d_inode);
                mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
        }
 }
@@ -3141,9 +3146,9 @@ retry_lookup:
                 * dropping this one anyway.
                 */
        }
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        error = lookup_open(nd, &path, file, op, got_write, opened);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        if (error <= 0) {
                if (error)
@@ -3192,12 +3197,12 @@ retry_lookup:
                return error;
 
        BUG_ON(nd->flags & LOOKUP_RCU);
-       inode = d_backing_inode(path.dentry);
        seq = 0;        /* out of RCU mode, so the value doesn't matter */
        if (unlikely(d_is_negative(path.dentry))) {
                path_to_nameidata(&path, nd);
                return -ENOENT;
        }
+       inode = d_backing_inode(path.dentry);
 finish_lookup:
        if (nd->depth)
                put_link(nd);
@@ -3206,11 +3211,6 @@ finish_lookup:
        if (unlikely(error))
                return error;
 
-       if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
-               path_to_nameidata(&path, nd);
-               return -ELOOP;
-       }
-
        if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
                path_to_nameidata(&path, nd);
        } else {
@@ -3229,6 +3229,10 @@ finish_open:
                return error;
        }
        audit_inode(nd->name, nd->path.dentry, 0);
+       if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
+               error = -ELOOP;
+               goto out;
+       }
        error = -EISDIR;
        if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
                goto out;
@@ -3273,6 +3277,10 @@ opened:
                        goto exit_fput;
        }
 out:
+       if (unlikely(error > 0)) {
+               WARN_ON(1);
+               error = -EINVAL;
+       }
        if (got_write)
                mnt_drop_write(nd->path.mnt);
        path_put(&save_parent);
@@ -3489,7 +3497,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
         * Do the final lookup.
         */
        lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
-       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path->dentry, lookup_flags);
        if (IS_ERR(dentry))
                goto unlock;
@@ -3518,7 +3526,7 @@ fail:
        dput(dentry);
        dentry = ERR_PTR(error);
 unlock:
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        if (!err2)
                mnt_drop_write(path->mnt);
 out:
@@ -3538,7 +3546,7 @@ EXPORT_SYMBOL(kern_path_create);
 void done_path_create(struct path *path, struct dentry *dentry)
 {
        dput(dentry);
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        mnt_drop_write(path->mnt);
        path_put(path);
 }
@@ -3735,7 +3743,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
                return -EPERM;
 
        dget(dentry);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
 
        error = -EBUSY;
        if (is_local_mountpoint(dentry))
@@ -3755,7 +3763,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
        detach_mounts(dentry);
 
 out:
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
        dput(dentry);
        if (!error)
                d_delete(dentry);
@@ -3794,7 +3802,7 @@ retry:
        if (error)
                goto exit1;
 
-       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -3810,7 +3818,7 @@ retry:
 exit3:
        dput(dentry);
 exit2:
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
+       inode_unlock(path.dentry->d_inode);
        mnt_drop_write(path.mnt);
 exit1:
        path_put(&path);
@@ -3856,7 +3864,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
        if (!dir->i_op->unlink)
                return -EPERM;
 
-       mutex_lock(&target->i_mutex);
+       inode_lock(target);
        if (is_local_mountpoint(dentry))
                error = -EBUSY;
        else {
@@ -3873,7 +3881,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
                }
        }
 out:
-       mutex_unlock(&target->i_mutex);
+       inode_unlock(target);
 
        /* We don't d_delete() NFS sillyrenamed files--they still exist. */
        if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -3916,7 +3924,7 @@ retry:
        if (error)
                goto exit1;
 retry_deleg:
-       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
@@ -3934,7 +3942,7 @@ retry_deleg:
 exit2:
                dput(dentry);
        }
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
+       inode_unlock(path.dentry->d_inode);
        if (inode)
                iput(inode);    /* truncate the inode here */
        inode = NULL;
@@ -4086,7 +4094,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Make sure we don't allow creating hardlink to an unlinked file */
        if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
                error =  -ENOENT;
@@ -4103,7 +4111,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                inode->i_state &= ~I_LINKABLE;
                spin_unlock(&inode->i_lock);
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!error)
                fsnotify_link(dir, inode, new_dentry);
        return error;
@@ -4303,7 +4311,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (!is_dir || (flags & RENAME_EXCHANGE))
                lock_two_nondirectories(source, target);
        else if (target)
-               mutex_lock(&target->i_mutex);
+               inode_lock(target);
 
        error = -EBUSY;
        if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4356,7 +4364,7 @@ out:
        if (!is_dir || (flags & RENAME_EXCHANGE))
                unlock_two_nondirectories(source, target);
        else if (target)
-               mutex_unlock(&target->i_mutex);
+               inode_unlock(target);
        dput(new_dentry);
        if (!error) {
                fsnotify_move(old_dir, new_dir, old_name, is_dir,
index a830e1463704b590b72947d313cd50b454555668..4fb1691b435552c044134b56aa2032da35684b5b 100644 (file)
@@ -1961,9 +1961,9 @@ static struct mountpoint *lock_mount(struct path *path)
        struct vfsmount *mnt;
        struct dentry *dentry = path->dentry;
 retry:
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        if (unlikely(cant_mount(dentry))) {
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               inode_unlock(dentry->d_inode);
                return ERR_PTR(-ENOENT);
        }
        namespace_lock();
@@ -1974,13 +1974,13 @@ retry:
                        mp = new_mountpoint(dentry);
                if (IS_ERR(mp)) {
                        namespace_unlock();
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       inode_unlock(dentry->d_inode);
                        return mp;
                }
                return mp;
        }
        namespace_unlock();
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        path_put(path);
        path->mnt = mnt;
        dentry = path->dentry = dget(mnt->mnt_root);
@@ -1992,7 +1992,7 @@ static void unlock_mount(struct mountpoint *where)
        struct dentry *dentry = where->m_dentry;
        put_mountpoint(where);
        namespace_unlock();
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
 }
 
 static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
index f0e3e9e747dd72aaa756611cb0c881d19b902d5a..26c2de2de13fd0ce7bb55287c58e268cf4bf37e6 100644 (file)
@@ -369,7 +369,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
        if (!res) {
                struct inode *inode = d_inode(dentry);
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
                        ncp_new_dentry(dentry);
                        val=1;
@@ -377,7 +377,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                        ncp_dbg(2, "found, but dirEntNum changed\n");
 
                ncp_update_inode2(inode, &finfo);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
 finished:
@@ -639,9 +639,9 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
        } else {
                struct inode *inode = d_inode(newdent);
 
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(inode, I_MUTEX_CHILD);
                ncp_update_inode2(inode, entry);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        if (ctl.idx >= NCP_DIRCACHE_SIZE) {
index 011324ce9df21181d6983f1fe81c659e1120a814..dd38ca1f2ecb9a181668496cc056653e1e470be2 100644 (file)
@@ -224,10 +224,10 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        iocb->ki_pos = pos;
 
        if (pos > i_size_read(inode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (pos > i_size_read(inode))
                        i_size_write(inode, pos);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        ncp_dbg(1, "exit %pD2\n", file);
 outrel:
index c59a59c37f3dabae1211db9efcb963442c013b36..35ab51c04814d67baecf83d3b3627d66fdd79359 100644 (file)
@@ -476,6 +476,7 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
 
                for (i = 0; i < nr_pages; i++)
                        put_page(arg->layoutupdate_pages[i]);
+               vfree(arg->start_p);
                kfree(arg->layoutupdate_pages);
        } else {
                put_page(arg->layoutupdate_page);
@@ -559,10 +560,15 @@ retry:
 
        if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
                void *p = start_p, *end = p + arg->layoutupdate_len;
+               struct page *page = NULL;
                int i = 0;
 
-               for ( ; p < end; p += PAGE_SIZE)
-                       arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
+               arg->start_p = start_p;
+               for ( ; p < end; p += PAGE_SIZE) {
+                       page = vmalloc_to_page(p);
+                       arg->layoutupdate_pages[i++] = page;
+                       get_page(page);
+               }
        }
 
        dprintk("%s found %zu ranges\n", __func__, count);
index c82a21228a342a8f4e6c42ea0d0366b225cb0c74..9cce67043f92a589193625ca3005d0b1e0f4e36b 100644 (file)
@@ -940,7 +940,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
        dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
                        filp, offset, whence);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
                case 1:
                        offset += filp->f_pos;
@@ -957,7 +957,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
                dir_ctx->duped = 0;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -972,9 +972,9 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
 
        dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return 0;
 }
 
index 7ab7ec9f4eed8a0212d2079d7c2c79cb331d7a20..7a0cfd3266e561620577bef3cf449171f10d9f92 100644 (file)
@@ -580,7 +580,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
        if (!count)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        result = nfs_sync_mapping(mapping);
        if (result)
                goto out_unlock;
@@ -608,7 +608,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
        NFS_I(inode)->read_io += count;
        result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (!result) {
                result = nfs_direct_wait(dreq);
@@ -622,7 +622,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 out_release:
        nfs_direct_req_release(dreq);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return result;
 }
@@ -1005,7 +1005,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
        pos = iocb->ki_pos;
        end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        result = nfs_sync_mapping(mapping);
        if (result)
@@ -1045,7 +1045,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
                                              pos >> PAGE_CACHE_SHIFT, end);
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (!result) {
                result = nfs_direct_wait(dreq);
@@ -1066,7 +1066,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 out_release:
        nfs_direct_req_release(dreq);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return result;
 }
 
index 4ef8f5addcadf8a7d8c67dbb4bd2a9583d26a487..748bb813b8ecd63095f2f50b8e733fe960e188ec 100644 (file)
@@ -278,9 +278,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
                        break;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_file_fsync_commit(file, start, end, datasync);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * If nfs_file_fsync_commit detected a server reboot, then
                 * resend all dirty pages that might have been covered by
index bb1f4e7a3270e0ff641ebea34c51f3080e01354c..3384dc8e66836c1fe4bc4e0a507a7e9250c5e719 100644 (file)
@@ -971,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req,
        u32 i, j;
 
        if (fl->commit_through_mds) {
-               nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+               nfs_request_add_commit_list(req, cinfo);
        } else {
                /* Note that we are calling nfs4_fl_calc_j_index on each page
                 * that ends up being committed to a data server.  An attractive
index 6594e9f903a0315dde75ac3fbccecf64521d22e9..0cb1abd535e38469201478ee597e74045e593bc5 100644 (file)
@@ -1215,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
                                        hdr->pgio_mirror_idx + 1,
                                        &hdr->pgio_mirror_idx))
                        goto out_eagain;
-               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                        &hdr->lseg->pls_layout->plh_flags);
                pnfs_read_resend_pnfs(hdr);
                return task->tk_status;
@@ -1948,11 +1948,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
        start = xdr_reserve_space(xdr, 4);
        BUG_ON(!start);
 
-       if (ff_layout_encode_ioerr(flo, xdr, args))
-               goto out;
-
+       ff_layout_encode_ioerr(flo, xdr, args);
        ff_layout_encode_iostats(flo, xdr, args);
-out:
+
        *start = cpu_to_be32((xdr->p - start - 1) * 4);
        dprintk("%s: Return\n", __func__);
 }
index bd0327541366c2066b7720933373d0c6758ad4e0..eb370460ce203c11c8461e88115cc0fab9c417fa 100644 (file)
@@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
        err->length = end - err->offset;
 }
 
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err,  u64 offset,
-                              u64 length, int status, enum nfs_opnum4 opnum,
-                              nfs4_stateid *stateid,
-                              struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+               const struct nfs4_ff_layout_ds_err *e2)
 {
-       return err->status == status && err->opnum == opnum &&
-              nfs4_stateid_match(&err->stateid, stateid) &&
-              !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
-              end_offset(err->offset, err->length) >= offset &&
-              err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
-                          struct nfs4_ff_layout_ds_err *new)
-{
-       if (!ds_error_can_merge(old, new->offset, new->length, new->status,
-                               new->opnum, &new->stateid, &new->deviceid))
-               return false;
-
-       extend_ds_error(old, new->offset, new->length);
-       return true;
+       int ret;
+
+       if (e1->opnum != e2->opnum)
+               return e1->opnum < e2->opnum ? -1 : 1;
+       if (e1->status != e2->status)
+               return e1->status < e2->status ? -1 : 1;
+       ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+       if (ret != 0)
+               return ret;
+       ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+       if (ret != 0)
+               return ret;
+       if (end_offset(e1->offset, e1->length) < e2->offset)
+               return -1;
+       if (e1->offset > end_offset(e2->offset, e2->length))
+               return 1;
+       /* If ranges overlap or are contiguous, they are the same */
+       return 0;
 }
 
-static bool
+static void
 ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
                              struct nfs4_ff_layout_ds_err *dserr)
 {
-       struct nfs4_ff_layout_ds_err *err;
-
-       list_for_each_entry(err, &flo->error_list, list) {
-               if (merge_ds_error(err, dserr)) {
-                       return true;
-               }
-       }
-
-       list_add(&dserr->list, &flo->error_list);
-       return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
-                         u64 length, int status, enum nfs_opnum4 opnum,
-                         nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
-       bool found = false;
-       struct nfs4_ff_layout_ds_err *err;
-
-       list_for_each_entry(err, &flo->error_list, list) {
-               if (ds_error_can_merge(err, offset, length, status, opnum,
-                                      stateid, deviceid)) {
-                       found = true;
-                       extend_ds_error(err, offset, length);
+       struct nfs4_ff_layout_ds_err *err, *tmp;
+       struct list_head *head = &flo->error_list;
+       int match;
+
+       /* Do insertion sort w/ merges */
+       list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+               match = ff_ds_error_match(err, dserr);
+               if (match < 0)
+                       continue;
+               if (match > 0) {
+                       /* Add entry "dserr" _before_ entry "err" */
+                       head = &err->list;
                        break;
                }
+               /* Entries match, so merge "err" into "dserr" */
+               extend_ds_error(dserr, err->offset, err->length);
+               list_del(&err->list);
+               kfree(err);
        }
 
-       return found;
+       list_add_tail(&dserr->list, head);
 }
 
 int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
@@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
                             gfp_t gfp_flags)
 {
        struct nfs4_ff_layout_ds_err *dserr;
-       bool needfree;
 
        if (status == 0)
                return 0;
@@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
        if (mirror->mirror_ds == NULL)
                return -EINVAL;
 
-       spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-       if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
-                                     &mirror->stateid,
-                                     &mirror->mirror_ds->id_node.deviceid)) {
-               spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-               return 0;
-       }
-       spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
        dserr = kmalloc(sizeof(*dserr), gfp_flags);
        if (!dserr)
                return -ENOMEM;
@@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
               NFS4_DEVICEID4_SIZE);
 
        spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-       needfree = ff_layout_add_ds_error_locked(flo, dserr);
+       ff_layout_add_ds_error_locked(flo, dserr);
        spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-       if (needfree)
-               kfree(dserr);
 
        return 0;
 }
@@ -431,7 +412,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                                         OP_ILLEGAL, GFP_NOIO);
                if (!fail_return) {
                        if (ff_layout_has_available_ds(lseg))
-                               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                        &lseg->pls_layout->plh_flags);
                        else
                                pnfs_error_mark_layout_for_return(ino, lseg);
index 8e24d886d2c5991c044965c3fa3093f3142632b8..86faecf8f328f2639bab1c5f7391df6d659610f2 100644 (file)
@@ -661,9 +661,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        trace_nfs_getattr_enter(inode);
        /* Flush out writes to the server in order to update c/mtime.  */
        if (S_ISREG(inode->i_mode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                err = nfs_sync_inode(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                if (err)
                        goto out;
        }
@@ -1178,9 +1178,9 @@ static int __nfs_revalidate_mapping(struct inode *inode,
        spin_unlock(&inode->i_lock);
        trace_nfs_invalidate_mapping_enter(inode);
        if (may_lock) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_invalidate_mapping(inode, mapping);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        } else
                ret = nfs_invalidate_mapping(inode, mapping);
        trace_nfs_invalidate_mapping_exit(inode, ret);
index 4e8cc942336c83dcbf9b5b615eb21dfa14be758d..9a547aa3ec8e873160da5a36906eb3813650034d 100644 (file)
@@ -484,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list,
                      struct nfs_commit_info *cinfo,
                      u32 ds_commit_idx);
 void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
                                 struct nfs_commit_info *cinfo);
 void nfs_request_add_commit_list_locked(struct nfs_page *req,
                struct list_head *dst,
index 6e8174930a48eb705979c93fca0d674fba72c0e7..dff83460e5a63ff0b6578f419a793d70720c3997 100644 (file)
 
 #define NFSDBG_FACILITY NFSDBG_PROC
 
-static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
-                               fmode_t fmode)
-{
-       struct nfs_open_context *open;
-       struct nfs_lock_context *lock;
-       int ret;
-
-       open = get_nfs_open_context(nfs_file_open_context(file));
-       lock = nfs_get_lock_context(open);
-       if (IS_ERR(lock)) {
-               put_nfs_open_context(open);
-               return PTR_ERR(lock);
-       }
-
-       ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
-
-       nfs_put_lock_context(lock);
-       put_nfs_open_context(open);
-       return ret;
-}
-
 static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
-                                loff_t offset, loff_t len)
+               struct nfs_lock_context *lock, loff_t offset, loff_t len)
 {
        struct inode *inode = file_inode(filep);
        struct nfs_server *server = NFS_SERVER(inode);
@@ -56,7 +35,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
        msg->rpc_argp = &args;
        msg->rpc_resp = &res;
 
-       status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+       status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
+                       lock, FMODE_WRITE);
        if (status)
                return status;
 
@@ -78,15 +58,26 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
 {
        struct nfs_server *server = NFS_SERVER(file_inode(filep));
        struct nfs4_exception exception = { };
+       struct nfs_lock_context *lock;
        int err;
 
+       lock = nfs_get_lock_context(nfs_file_open_context(filep));
+       if (IS_ERR(lock))
+               return PTR_ERR(lock);
+
+       exception.inode = file_inode(filep);
+       exception.state = lock->open_context->state;
+
        do {
-               err = _nfs42_proc_fallocate(msg, filep, offset, len);
-               if (err == -ENOTSUPP)
-                       return -EOPNOTSUPP;
+               err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
+               if (err == -ENOTSUPP) {
+                       err = -EOPNOTSUPP;
+                       break;
+               }
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
 
+       nfs_put_lock_context(lock);
        return err;
 }
 
@@ -101,13 +92,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
        if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
@@ -123,7 +114,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
                return -EOPNOTSUPP;
 
        nfs_wb_all(inode);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
        if (err == 0)
@@ -131,11 +122,12 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
-static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+static loff_t _nfs42_proc_llseek(struct file *filep,
+               struct nfs_lock_context *lock, loff_t offset, int whence)
 {
        struct inode *inode = file_inode(filep);
        struct nfs42_seek_args args = {
@@ -156,7 +148,8 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
        if (!nfs_server_capable(inode, NFS_CAP_SEEK))
                return -ENOTSUPP;
 
-       status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+       status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
+                       lock, FMODE_READ);
        if (status)
                return status;
 
@@ -175,17 +168,28 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 {
        struct nfs_server *server = NFS_SERVER(file_inode(filep));
        struct nfs4_exception exception = { };
+       struct nfs_lock_context *lock;
        loff_t err;
 
+       lock = nfs_get_lock_context(nfs_file_open_context(filep));
+       if (IS_ERR(lock))
+               return PTR_ERR(lock);
+
+       exception.inode = file_inode(filep);
+       exception.state = lock->open_context->state;
+
        do {
-               err = _nfs42_proc_llseek(filep, offset, whence);
+               err = _nfs42_proc_llseek(filep, lock, offset, whence);
                if (err >= 0)
                        break;
-               if (err == -ENOTSUPP)
-                       return -EOPNOTSUPP;
+               if (err == -ENOTSUPP) {
+                       err = -EOPNOTSUPP;
+                       break;
+               }
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
 
+       nfs_put_lock_context(lock);
        return err;
 }
 
@@ -298,8 +302,9 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
 }
 
 static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
-                            struct file *dst_f, loff_t src_offset,
-                            loff_t dst_offset, loff_t count)
+               struct file *dst_f, struct nfs_lock_context *src_lock,
+               struct nfs_lock_context *dst_lock, loff_t src_offset,
+               loff_t dst_offset, loff_t count)
 {
        struct inode *src_inode = file_inode(src_f);
        struct inode *dst_inode = file_inode(dst_f);
@@ -320,11 +325,13 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
        msg->rpc_argp = &args;
        msg->rpc_resp = &res;
 
-       status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+       status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+                       src_lock, FMODE_READ);
        if (status)
                return status;
 
-       status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+       status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+                       dst_lock, FMODE_WRITE);
        if (status)
                return status;
 
@@ -349,22 +356,48 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
        };
        struct inode *inode = file_inode(src_f);
        struct nfs_server *server = NFS_SERVER(file_inode(src_f));
-       struct nfs4_exception exception = { };
-       int err;
+       struct nfs_lock_context *src_lock;
+       struct nfs_lock_context *dst_lock;
+       struct nfs4_exception src_exception = { };
+       struct nfs4_exception dst_exception = { };
+       int err, err2;
 
        if (!nfs_server_capable(inode, NFS_CAP_CLONE))
                return -EOPNOTSUPP;
 
+       src_lock = nfs_get_lock_context(nfs_file_open_context(src_f));
+       if (IS_ERR(src_lock))
+               return PTR_ERR(src_lock);
+
+       src_exception.inode = file_inode(src_f);
+       src_exception.state = src_lock->open_context->state;
+
+       dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f));
+       if (IS_ERR(dst_lock)) {
+               err = PTR_ERR(dst_lock);
+               goto out_put_src_lock;
+       }
+
+       dst_exception.inode = file_inode(dst_f);
+       dst_exception.state = dst_lock->open_context->state;
+
        do {
-               err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
-                                       dst_offset, count);
+               err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock,
+                                       src_offset, dst_offset, count);
                if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
                        NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
-                       return -EOPNOTSUPP;
+                       err = -EOPNOTSUPP;
+                       break;
                }
-               err = nfs4_handle_exception(server, err, &exception);
-       } while (exception.retry);
 
-       return err;
+               err2 = nfs4_handle_exception(server, err, &src_exception);
+               err = nfs4_handle_exception(server, err, &dst_exception);
+               if (!err)
+                       err = err2;
+       } while (src_exception.retry || dst_exception.retry);
 
+       nfs_put_lock_context(dst_lock);
+out_put_src_lock:
+       nfs_put_lock_context(src_lock);
+       return err;
 }
index 26f9a23e2b254998d6b0c053e2e0892d2cd16050..57ca1c8039c1e00bfcb6e9bb4d8fe7e40f4578d5 100644 (file)
@@ -141,11 +141,11 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
                        break;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_file_fsync_commit(file, start, end, datasync);
                if (!ret)
                        ret = pnfs_sync_inode(inode, !!datasync);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * If nfs_file_fsync_commit detected a server reboot, then
                 * resend all dirty pages that might have been covered by
@@ -219,13 +219,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 
        /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
        if (same_inode) {
-               mutex_lock(&src_inode->i_mutex);
+               inode_lock(src_inode);
        } else if (dst_inode < src_inode) {
-               mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(dst_inode, I_MUTEX_PARENT);
+               inode_lock_nested(src_inode, I_MUTEX_CHILD);
        } else {
-               mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(src_inode, I_MUTEX_PARENT);
+               inode_lock_nested(dst_inode, I_MUTEX_CHILD);
        }
 
        /* flush all pending writes on both src and dst so that server
@@ -246,13 +246,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 
 out_unlock:
        if (same_inode) {
-               mutex_unlock(&src_inode->i_mutex);
+               inode_unlock(src_inode);
        } else if (dst_inode < src_inode) {
-               mutex_unlock(&src_inode->i_mutex);
-               mutex_unlock(&dst_inode->i_mutex);
+               inode_unlock(src_inode);
+               inode_unlock(dst_inode);
        } else {
-               mutex_unlock(&dst_inode->i_mutex);
-               mutex_unlock(&src_inode->i_mutex);
+               inode_unlock(dst_inode);
+               inode_unlock(src_inode);
        }
 out:
        return ret;
index 4bfc33ad05637f58f9f0b675f058fd90085d4271..14881594dd07b9944a494d388722f4acf4b5457a 100644 (file)
@@ -2466,9 +2466,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                dentry = d_add_unique(dentry, igrab(state->inode));
                if (dentry == NULL) {
                        dentry = opendata->dentry;
-               } else if (dentry != ctx->dentry) {
+               } else {
                        dput(ctx->dentry);
-                       ctx->dentry = dget(dentry);
+                       ctx->dentry = dentry;
                }
                nfs_set_verifier(dentry,
                                nfs_save_change_attribute(d_inode(opendata->dir)));
index a3592cc34a20b76341c9c9b9af3378bdddebd477..2fa483e6dbe2e4a0e4333bf2d76cfd861073232b 100644 (file)
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
  */
 static LIST_HEAD(pnfs_modules_tbl);
 
-static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
-                      enum pnfs_iomode iomode, bool sync);
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
 
 /* Return the registered pnfs layout driver module matching given id */
 static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 {
        struct inode *inode = lo->plh_inode;
 
+       pnfs_layoutreturn_before_put_layout_hdr(lo);
+
        if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
                if (!list_empty(&lo->plh_segs))
                        WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -252,6 +252,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
        }
 }
 
+/*
+ * Mark a pnfs_layout_hdr and all associated layout segments as invalid
+ *
+ * In order to continue using the pnfs_layout_hdr, a full recovery
+ * is required.
+ * Note that caller must hold inode->i_lock.
+ */
+static int
+pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+               struct list_head *lseg_list)
+{
+       struct pnfs_layout_range range = {
+               .iomode = IOMODE_ANY,
+               .offset = 0,
+               .length = NFS4_MAX_UINT64,
+       };
+
+       set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+       return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range);
+}
+
 static int
 pnfs_iomode_to_fail_bit(u32 iomode)
 {
@@ -345,58 +366,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
        rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
-/* Return true if layoutreturn is needed */
-static bool
-pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
-                       struct pnfs_layout_segment *lseg)
-{
-       struct pnfs_layout_segment *s;
-
-       if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               return false;
-
-       list_for_each_entry(s, &lo->plh_segs, pls_list)
-               if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
-                       return false;
-
-       return true;
-}
-
-static bool
-pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
-{
-       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
-               return false;
-       lo->plh_return_iomode = 0;
-       pnfs_get_layout_hdr(lo);
-       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
-       return true;
-}
-
-static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
-               struct pnfs_layout_hdr *lo, struct inode *inode)
-{
-       lo = lseg->pls_layout;
-       inode = lo->plh_inode;
-
-       spin_lock(&inode->i_lock);
-       if (pnfs_layout_need_return(lo, lseg)) {
-               nfs4_stateid stateid;
-               enum pnfs_iomode iomode;
-               bool send;
-
-               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
-               iomode = lo->plh_return_iomode;
-               send = pnfs_prepare_layoutreturn(lo);
-               spin_unlock(&inode->i_lock);
-               if (send) {
-                       /* Send an async layoutreturn so we dont deadlock */
-                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
-               }
-       } else
-               spin_unlock(&inode->i_lock);
-}
-
 void
 pnfs_put_lseg(struct pnfs_layout_segment *lseg)
 {
@@ -410,15 +379,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
                atomic_read(&lseg->pls_refcount),
                test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 
-       /* Handle the case where refcount != 1 */
-       if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
-               return;
-
        lo = lseg->pls_layout;
        inode = lo->plh_inode;
-       /* Do we need a layoutreturn? */
-       if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
 
        if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -613,9 +575,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
        spin_lock(&nfsi->vfs_inode.i_lock);
        lo = nfsi->layout;
        if (lo) {
-               lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-               pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
                pnfs_get_layout_hdr(lo);
+               pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
                pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
                pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
                spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -676,11 +637,6 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
 {
        struct pnfs_layout_hdr *lo;
        struct inode *inode;
-       struct pnfs_layout_range range = {
-               .iomode = IOMODE_ANY,
-               .offset = 0,
-               .length = NFS4_MAX_UINT64,
-       };
        LIST_HEAD(lseg_list);
        int ret = 0;
 
@@ -695,11 +651,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
 
                spin_lock(&inode->i_lock);
                list_del_init(&lo->plh_bulk_destroy);
-               lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-               if (is_bulk_recall)
-                       set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
-               if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
+               if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+                       if (is_bulk_recall)
+                               set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
                        ret = -EAGAIN;
+               }
                spin_unlock(&inode->i_lock);
                pnfs_free_lseg_list(&lseg_list);
                /* Free all lsegs that are attached to commit buckets */
@@ -937,6 +893,17 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               return false;
+       lo->plh_return_iomode = 0;
+       pnfs_get_layout_hdr(lo);
+       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+       return true;
+}
+
 static int
 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
                       enum pnfs_iomode iomode, bool sync)
@@ -971,6 +938,48 @@ out:
        return status;
 }
 
+/* Return true if layoutreturn is needed */
+static bool
+pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
+{
+       struct pnfs_layout_segment *s;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return false;
+
+       /* Defer layoutreturn until all lsegs are done */
+       list_for_each_entry(s, &lo->plh_segs, pls_list) {
+               if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
+                       return false;
+       }
+
+       return true;
+}
+
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+       struct inode *inode= lo->plh_inode;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return;
+       spin_lock(&inode->i_lock);
+       if (pnfs_layout_need_return(lo)) {
+               nfs4_stateid stateid;
+               enum pnfs_iomode iomode;
+               bool send;
+
+               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+               iomode = lo->plh_return_iomode;
+               send = pnfs_prepare_layoutreturn(lo);
+               spin_unlock(&inode->i_lock);
+               if (send) {
+                       /* Send an async layoutreturn so we dont deadlock */
+                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+               }
+       } else
+               spin_unlock(&inode->i_lock);
+}
+
 /*
  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
  * when the layout segment list is empty.
@@ -1091,7 +1100,7 @@ bool pnfs_roc(struct inode *ino)
 
        nfs4_stateid_copy(&stateid, &lo->plh_stateid);
        /* always send layoutreturn if being marked so */
-       if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+       if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                   &lo->plh_flags))
                layoutreturn = pnfs_prepare_layoutreturn(lo);
 
@@ -1744,8 +1753,19 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
        if (lo->plh_return_iomode != 0)
                iomode = IOMODE_ANY;
        lo->plh_return_iomode = iomode;
+       set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
 }
 
+/**
+ * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
+ * @lo: pointer to layout header
+ * @tmp_list: list header to be used with pnfs_free_lseg_list()
+ * @return_range: describe layout segment ranges to be returned
+ *
+ * This function is mainly intended for use by layoutrecall. It attempts
+ * to free the layout segment immediately, or else to mark it for return
+ * as soon as its reference count drops to zero.
+ */
 int
 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                                struct list_head *tmp_list,
@@ -1768,12 +1788,11 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                                lseg, lseg->pls_range.iomode,
                                lseg->pls_range.offset,
                                lseg->pls_range.length);
+                       if (mark_lseg_invalid(lseg, tmp_list))
+                               continue;
+                       remaining++;
                        set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
                        pnfs_set_plh_return_iomode(lo, return_range->iomode);
-                       if (!mark_lseg_invalid(lseg, tmp_list))
-                               remaining++;
-                       set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-                                       &lo->plh_flags);
                }
        return remaining;
 }
index 9f4e2a47f4aa4ff41a590178dde2ddf160ae244f..1ac1db5f6dadb6508cf8658a53385add279eb9fa 100644 (file)
@@ -94,8 +94,8 @@ enum {
        NFS_LAYOUT_RO_FAILED = 0,       /* get ro layout failed stop trying */
        NFS_LAYOUT_RW_FAILED,           /* get rw layout failed stop trying */
        NFS_LAYOUT_BULK_RECALL,         /* bulk recall affecting layout */
-       NFS_LAYOUT_RETURN,              /* Return this layout ASAP */
-       NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
+       NFS_LAYOUT_RETURN,              /* layoutreturn in progress */
+       NFS_LAYOUT_RETURN_REQUESTED,    /* Return this layout ASAP */
        NFS_LAYOUT_INVALID_STID,        /* layout stateid id is invalid */
        NFS_LAYOUT_FIRST_LAYOUTGET,     /* Serialize first layoutget */
 };
index ce43cd6d88c634773ae86a2922d3700d6f97ad33..5754835a288608bd7dc3b58b8f92a2a6ee8a20bc 100644 (file)
@@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
  * holding the nfs_page lock.
  */
 void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
-                           struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
        spin_lock(cinfo->lock);
-       nfs_request_add_commit_list_locked(req, dst, cinfo);
+       nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
        spin_unlock(cinfo->lock);
        nfs_mark_page_unstable(req->wb_page, cinfo);
 }
@@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 {
        if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
                return;
-       nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+       nfs_request_add_commit_list(req, cinfo);
 }
 
 static void
index 819ad812c71b903200784f7c65622ff07ce59d72..4cba7865f4966b825e3d47c7d423fdb8ca11a262 100644 (file)
@@ -55,10 +55,10 @@ nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u
        struct inode *inode = d_inode(resfh->fh_dentry);
        int status;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        status = security_inode_setsecctx(resfh->fh_dentry,
                label->data, label->len);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (status)
                /*
index 79f0307a5ec8331426ef331c5c843d59966dd5ff..dc8ebecf561866a2d6fcee729813528852be6313 100644 (file)
@@ -192,7 +192,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 
        dir = nn->rec_file->f_path.dentry;
        /* lock the parent */
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
 
        dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
        if (IS_ERR(dentry)) {
@@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 out_put:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        if (status == 0) {
                if (nn->in_grace) {
                        crp = nfs4_client_to_reclaim(dname, nn);
@@ -286,7 +286,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
        }
 
        status = iterate_dir(nn->rec_file, &ctx.ctx);
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
                if (!status) {
@@ -302,7 +302,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
                list_del(&entry->list);
                kfree(entry);
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        nfs4_reset_creds(original_cred);
 
        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -322,7 +322,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
        dir = nn->rec_file->f_path.dentry;
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
        dentry = lookup_one_len(name, dir, namlen);
        if (IS_ERR(dentry)) {
                status = PTR_ERR(dentry);
@@ -335,7 +335,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
 out:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        return status;
 }
 
index 0770bcb543c81d843106825a2c9c0da6be3d15a4..f84fe6bf9aee144cd7227711540f0027e24ef6c2 100644 (file)
@@ -288,7 +288,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
        }
 
        inode = d_inode(dentry);
-       mutex_lock_nested(&inode->i_mutex, subclass);
+       inode_lock_nested(inode, subclass);
        fill_pre_wcc(fhp);
        fhp->fh_locked = true;
 }
@@ -307,7 +307,7 @@ fh_unlock(struct svc_fh *fhp)
 {
        if (fhp->fh_locked) {
                fill_post_wcc(fhp);
-               mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
+               inode_unlock(d_inode(fhp->fh_dentry));
                fhp->fh_locked = false;
        }
 }
index 6739077f17fe7827beee7b7469b2c06450862362..5d2a57e4c03ad3494e0b08fc3dfbc0e13fb192df 100644 (file)
@@ -493,9 +493,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
        dentry = fhp->fh_dentry;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        host_error = security_inode_setsecctx(dentry, label->data, label->len);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return nfserrno(host_error);
 }
 #else
index 10b22527a617dcdefc36d36ce4fc3d3658e20330..21a1e2e0d92fe22696ebac0bd702238fb95ec02f 100644 (file)
@@ -1003,7 +1003,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
 
@@ -1113,6 +1113,6 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret == 1)
                ret = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
index aba43811d6ef1221c68b275afda0174f072343bf..e8fe24882b5baa417e3002ada770882f4771534b 100644 (file)
@@ -158,7 +158,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
 
        flags = nilfs_mask_flags(inode->i_mode, flags);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        oldflags = NILFS_I(inode)->i_flags;
 
@@ -186,7 +186,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
        nilfs_mark_inode_dirty(inode);
        ret = nilfs_transaction_commit(inode->i_sb);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(filp);
        return ret;
 }
index cfcbf114676ed9e5ee439563acc0a407e3541522..7115c5d7d373c63df1512eba5ea90351ea5c7c45 100644 (file)
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
+#define FSNOTIFY_REAPER_DELAY  (1)     /* 1 jiffy */
+
 struct srcu_struct fsnotify_mark_srcu;
+static DEFINE_SPINLOCK(destroy_lock);
+static LIST_HEAD(destroy_list);
+
+static void fsnotify_mark_destroy(struct work_struct *work);
+static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy);
 
 void fsnotify_get_mark(struct fsnotify_mark *mark)
 {
@@ -165,19 +172,10 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
        atomic_dec(&group->num_marks);
 }
 
-static void
-fsnotify_mark_free_rcu(struct rcu_head *rcu)
-{
-       struct fsnotify_mark    *mark;
-
-       mark = container_of(rcu, struct fsnotify_mark, g_rcu);
-       fsnotify_put_mark(mark);
-}
-
 /*
- * Free fsnotify mark. The freeing is actually happening from a call_srcu
- * callback. Caller must have a reference to the mark or be protected by
- * fsnotify_mark_srcu.
+ * Free fsnotify mark. The freeing is actually happening from a kthread which
+ * first waits for srcu period end. Caller must have a reference to the mark
+ * or be protected by fsnotify_mark_srcu.
  */
 void fsnotify_free_mark(struct fsnotify_mark *mark)
 {
@@ -192,7 +190,11 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
        spin_unlock(&mark->lock);
 
-       call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
+       spin_lock(&destroy_lock);
+       list_add(&mark->g_list, &destroy_list);
+       spin_unlock(&destroy_lock);
+       queue_delayed_work(system_unbound_wq, &reaper_work,
+                               FSNOTIFY_REAPER_DELAY);
 
        /*
         * Some groups like to know that marks are being freed.  This is a
@@ -388,7 +390,12 @@ err:
 
        spin_unlock(&mark->lock);
 
-       call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
+       spin_lock(&destroy_lock);
+       list_add(&mark->g_list, &destroy_list);
+       spin_unlock(&destroy_lock);
+       queue_delayed_work(system_unbound_wq, &reaper_work,
+                               FSNOTIFY_REAPER_DELAY);
+
        return ret;
 }
 
@@ -491,3 +498,21 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
        atomic_set(&mark->refcnt, 1);
        mark->free_mark = free_mark;
 }
+
+static void fsnotify_mark_destroy(struct work_struct *work)
+{
+       struct fsnotify_mark *mark, *next;
+       struct list_head private_destroy_list;
+
+       spin_lock(&destroy_lock);
+       /* exchange the list head */
+       list_replace_init(&destroy_list, &private_destroy_list);
+       spin_unlock(&destroy_lock);
+
+       synchronize_srcu(&fsnotify_mark_srcu);
+
+       list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
+               list_del_init(&mark->g_list);
+               fsnotify_put_mark(mark);
+       }
+}
index 9e38dafa3bc78ec71acc7acec733d9cd52c4f40a..b2eff5816adc3e915f03665c4b283f116a7b1eab 100644 (file)
@@ -1509,7 +1509,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        err = filemap_write_and_wait_range(vi->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
 
        BUG_ON(!S_ISDIR(vi->i_mode));
        /* If the bitmap attribute inode is in memory sync it, too. */
@@ -1532,7 +1532,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        else
                ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
                                "%u.", datasync ? "data" : "", vi->i_ino, -ret);
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        return ret;
 }
 
index 9d383e5eff0ea5519bffb34528b5aeccbe6f3831..bed4d427dfaee110c7da84c70f32c673fbdc80aa 100644 (file)
@@ -1944,14 +1944,14 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        ssize_t written = 0;
        ssize_t err;
 
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
        /* We can write back this queue in page reclaim. */
        current->backing_dev_info = inode_to_bdi(vi);
        err = ntfs_prepare_file_for_write(iocb, from);
        if (iov_iter_count(from) && !err)
                written = ntfs_perform_write(file, from, iocb->ki_pos);
        current->backing_dev_info = NULL;
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        if (likely(written > 0)) {
                err = generic_write_sync(file, iocb->ki_pos, written);
                if (err < 0)
@@ -1996,7 +1996,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        err = filemap_write_and_wait_range(vi->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
 
        BUG_ON(S_ISDIR(vi->i_mode));
        if (!datasync || !NInoNonResident(NTFS_I(vi)))
@@ -2015,7 +2015,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        else
                ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
                                "%u.", datasync ? "data" : "", vi->i_ino, -ret);
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        return ret;
 }
 
index d80e3315cab0ec73cfdd9b668594d588f559273e..9793e68ba1ddf47091d0dc80a324dfe27a39dbc1 100644 (file)
@@ -48,7 +48,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
                ntfs_error(vol->sb, "Quota inodes are not open.");
                return false;
        }
-       mutex_lock(&vol->quota_q_ino->i_mutex);
+       inode_lock(vol->quota_q_ino);
        ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
        if (!ictx) {
                ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
        ntfs_index_entry_mark_dirty(ictx);
 set_done:
        ntfs_index_ctx_put(ictx);
-       mutex_unlock(&vol->quota_q_ino->i_mutex);
+       inode_unlock(vol->quota_q_ino);
        /*
         * We set the flag so we do not try to mark the quotas out of date
         * again on remount.
@@ -110,7 +110,7 @@ done:
 err_out:
        if (ictx)
                ntfs_index_ctx_put(ictx);
-       mutex_unlock(&vol->quota_q_ino->i_mutex);
+       inode_unlock(vol->quota_q_ino);
        return false;
 }
 
index 2f77f8dfb861415c9e5196c129d6d4902e042ce8..1b38abdaa3ed3da7522f461ae49a77ccf5ee5982 100644 (file)
@@ -1284,10 +1284,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
         * Find the inode number for the hibernation file by looking up the
         * filename hiberfil.sys in the root directory.
         */
-       mutex_lock(&vol->root_ino->i_mutex);
+       inode_lock(vol->root_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
                        &name);
-       mutex_unlock(&vol->root_ino->i_mutex);
+       inode_unlock(vol->root_ino);
        if (IS_ERR_MREF(mref)) {
                ret = MREF_ERR(mref);
                /* If the file does not exist, Windows is not hibernated. */
@@ -1377,10 +1377,10 @@ static bool load_and_init_quota(ntfs_volume *vol)
         * Find the inode number for the quota file by looking up the filename
         * $Quota in the extended system files directory $Extend.
         */
-       mutex_lock(&vol->extend_ino->i_mutex);
+       inode_lock(vol->extend_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
                        &name);
-       mutex_unlock(&vol->extend_ino->i_mutex);
+       inode_unlock(vol->extend_ino);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, quotas are disabled and have
@@ -1460,10 +1460,10 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
         * Find the inode number for the transaction log file by looking up the
         * filename $UsnJrnl in the extended system files directory $Extend.
         */
-       mutex_lock(&vol->extend_ino->i_mutex);
+       inode_lock(vol->extend_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
                        &name);
-       mutex_unlock(&vol->extend_ino->i_mutex);
+       inode_unlock(vol->extend_ino);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, transaction logging is disabled,
index a3ded88718c93eec4bf9767018826c960285a07d..d002579c6f2b03a62209110a1ce84e7605d2651a 100644 (file)
@@ -5719,7 +5719,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
                goto bail;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -5776,7 +5776,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 out_commit:
        ocfs2_commit_trans(osb, handle);
 out:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 bail:
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
@@ -5832,7 +5832,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
 
-       BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+       BUG_ON(inode_trylock(tl_inode));
 
        start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
 
@@ -5980,7 +5980,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
 
-       BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+       BUG_ON(inode_trylock(tl_inode));
 
        di = (struct ocfs2_dinode *) tl_bh->b_data;
 
@@ -6008,7 +6008,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
                goto out;
        }
 
-       mutex_lock(&data_alloc_inode->i_mutex);
+       inode_lock(data_alloc_inode);
 
        status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
        if (status < 0) {
@@ -6035,7 +6035,7 @@ out_unlock:
        ocfs2_inode_unlock(data_alloc_inode, 1);
 
 out_mutex:
-       mutex_unlock(&data_alloc_inode->i_mutex);
+       inode_unlock(data_alloc_inode);
        iput(data_alloc_inode);
 
 out:
@@ -6047,9 +6047,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        int status;
        struct inode *tl_inode = osb->osb_tl_inode;
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
        status = __ocfs2_flush_truncate_log(osb);
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        return status;
 }
@@ -6208,7 +6208,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
                (unsigned long long)le64_to_cpu(tl_copy->i_blkno),
                num_recs);
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
        for(i = 0; i < num_recs; i++) {
                if (ocfs2_truncate_log_needs_flush(osb)) {
                        status = __ocfs2_flush_truncate_log(osb);
@@ -6239,7 +6239,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
        }
 
 bail_up:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        return status;
 }
@@ -6346,7 +6346,7 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
                goto out;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = ocfs2_inode_lock(inode, &di_bh, 1);
        if (ret) {
@@ -6395,7 +6395,7 @@ out_unlock:
        ocfs2_inode_unlock(inode, 1);
        brelse(di_bh);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        iput(inode);
 out:
        while(head) {
@@ -6439,7 +6439,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
        handle_t *handle;
        int ret = 0;
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        while (head) {
                if (ocfs2_truncate_log_needs_flush(osb)) {
@@ -6471,7 +6471,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
                }
        }
 
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        while (head) {
                /* Premature exit may have left some dangling items. */
@@ -7355,7 +7355,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
        if (ret < 0) {
@@ -7422,7 +7422,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 0);
        brelse(main_bm_bh);
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 out:
        return ret;
index 7f604727f487b3680d7f145161288656dd1aaf80..cda0361e95a403e887d912174ec296aa4cbb6e4d 100644 (file)
@@ -956,6 +956,7 @@ clean_orphan:
                tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
                                update_isize, end);
                if (tmp_ret < 0) {
+                       ocfs2_inode_unlock(inode, 1);
                        ret = tmp_ret;
                        mlog_errno(ret);
                        brelse(di_bh);
@@ -2046,9 +2047,9 @@ static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
        int ret = 0;
        unsigned int truncated_clusters;
 
-       mutex_lock(&osb->osb_tl_inode->i_mutex);
+       inode_lock(osb->osb_tl_inode);
        truncated_clusters = osb->truncated_clusters;
-       mutex_unlock(&osb->osb_tl_inode->i_mutex);
+       inode_unlock(osb->osb_tl_inode);
 
        /*
         * Check whether we can succeed in allocating if we free
index a3cc6d2fc896cea05a8605dcc84e09b7840a5e1a..a76b9ea7722e6d7d00d8c0c58217a4a0387f0ff0 100644 (file)
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = {
 
 void o2hb_exit(void)
 {
-       kfree(o2hb_db_livenodes);
-       kfree(o2hb_db_liveregions);
-       kfree(o2hb_db_quorumregions);
-       kfree(o2hb_db_failedregions);
        debugfs_remove(o2hb_debug_failedregions);
        debugfs_remove(o2hb_debug_quorumregions);
        debugfs_remove(o2hb_debug_liveregions);
        debugfs_remove(o2hb_debug_livenodes);
        debugfs_remove(o2hb_debug_dir);
+       kfree(o2hb_db_livenodes);
+       kfree(o2hb_db_liveregions);
+       kfree(o2hb_db_quorumregions);
+       kfree(o2hb_db_failedregions);
 }
 
 static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item)
 
        kfree(reg->hr_slots);
 
-       kfree(reg->hr_db_regnum);
-       kfree(reg->hr_db_livenodes);
        debugfs_remove(reg->hr_debug_livenodes);
        debugfs_remove(reg->hr_debug_regnum);
        debugfs_remove(reg->hr_debug_elapsed_time);
        debugfs_remove(reg->hr_debug_pinned);
        debugfs_remove(reg->hr_debug_dir);
+       kfree(reg->hr_db_livenodes);
+       kfree(reg->hr_db_regnum);
+       kfree(reg->hr_debug_elapsed_time);
+       kfree(reg->hr_debug_pinned);
 
        spin_lock(&o2hb_live_lock);
        list_del(&reg->hr_all_item);
index ffecf89c8c1cd23d532a9155660a6609e9d38551..e1adf285fc3142ab8c0856ea0a28d766c914ce3f 100644 (file)
@@ -4361,7 +4361,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
                mlog_errno(ret);
                goto out;
        }
-       mutex_lock(&dx_alloc_inode->i_mutex);
+       inode_lock(dx_alloc_inode);
 
        ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
        if (ret) {
@@ -4410,7 +4410,7 @@ out_unlock:
        ocfs2_inode_unlock(dx_alloc_inode, 1);
 
 out_mutex:
-       mutex_unlock(&dx_alloc_inode->i_mutex);
+       inode_unlock(dx_alloc_inode);
        brelse(dx_alloc_bh);
 out:
        iput(dx_alloc_inode);
index c5bdf02c213bd7ad5c086331a1ea8194bde40578..b94a425f0175fa84818d78742659c42293d96dba 100644 (file)
@@ -2367,6 +2367,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
                                                break;
                                        }
                                }
+                               dlm_lockres_clear_refmap_bit(dlm, res,
+                                               dead_node);
                                spin_unlock(&res->spinlock);
                                continue;
                        }
index f92612e4b9d61144b13021c83dd8b4b1e9553ac5..474e57f834e6caff12eaca03c8ece4d35303d1e0 100644 (file)
@@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
        unsigned int gen;
        int noqueue_attempted = 0;
        int dlm_locked = 0;
+       int kick_dc = 0;
 
        if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
                mlog_errno(-EINVAL);
@@ -1524,7 +1525,12 @@ update_holders:
 unlock:
        lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
 
+       /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
+       kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
+
        spin_unlock_irqrestore(&lockres->l_lock, flags);
+       if (kick_dc)
+               ocfs2_wake_downconvert_thread(osb);
 out:
        /*
         * This is helping work around a lock inversion between the page lock
index d6312793250927b1efacf4f09c18c38f7da75e5d..7cb38fdca229f050f89aed34c93c85db11a99ff8 100644 (file)
@@ -1872,7 +1872,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
        if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
                return -EROFS;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * This prevents concurrent writes on other nodes
@@ -1991,7 +1991,7 @@ out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -2299,7 +2299,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
        direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
 relock:
        /*
@@ -2435,7 +2435,7 @@ out:
                ocfs2_rw_unlock(inode, rw_level);
 
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (written)
                ret = written;
@@ -2547,7 +2547,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file->f_mapping->host;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        switch (whence) {
        case SEEK_SET:
@@ -2585,7 +2585,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret)
                return ret;
        return offset;
index 97a563bab9a871ee3e371a9796b990c24835f47f..36294446d9607ee9f0d07814552fd6f0bf97e6ab 100644 (file)
@@ -630,10 +630,10 @@ static int ocfs2_remove_inode(struct inode *inode,
                goto bail;
        }
 
-       mutex_lock(&inode_alloc_inode->i_mutex);
+       inode_lock(inode_alloc_inode);
        status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
        if (status < 0) {
-               mutex_unlock(&inode_alloc_inode->i_mutex);
+               inode_unlock(inode_alloc_inode);
 
                mlog_errno(status);
                goto bail;
@@ -680,7 +680,7 @@ bail_commit:
        ocfs2_commit_trans(osb, handle);
 bail_unlock:
        ocfs2_inode_unlock(inode_alloc_inode, 1);
-       mutex_unlock(&inode_alloc_inode->i_mutex);
+       inode_unlock(inode_alloc_inode);
        brelse(inode_alloc_bh);
 bail:
        iput(inode_alloc_inode);
@@ -751,10 +751,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
                /* Lock the orphan dir. The lock will be held for the entire
                 * delete_inode operation. We do this now to avoid races with
                 * recovery completion on other nodes. */
-               mutex_lock(&orphan_dir_inode->i_mutex);
+               inode_lock(orphan_dir_inode);
                status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
                if (status < 0) {
-                       mutex_unlock(&orphan_dir_inode->i_mutex);
+                       inode_unlock(orphan_dir_inode);
 
                        mlog_errno(status);
                        goto bail;
@@ -803,7 +803,7 @@ bail_unlock_dir:
                return status;
 
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        brelse(orphan_dir_bh);
 bail:
        iput(orphan_dir_inode);
index 16b0bb482ea7cb68540c8fe7ea133b1bc89185ce..4506ec5ec2ea624235a36c06cb755ef2211e0914 100644 (file)
@@ -86,7 +86,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
        unsigned oldflags;
        int status;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        status = ocfs2_inode_lock(inode, &bh, 1);
        if (status < 0) {
@@ -135,7 +135,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 bail_unlock:
        ocfs2_inode_unlock(inode, 1);
 bail:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        brelse(bh);
 
@@ -287,7 +287,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
        struct ocfs2_dinode *dinode_alloc = NULL;
 
        if (inode_alloc)
-               mutex_lock(&inode_alloc->i_mutex);
+               inode_lock(inode_alloc);
 
        if (o2info_coherent(&fi->ifi_req)) {
                status = ocfs2_inode_lock(inode_alloc, &bh, 0);
@@ -317,7 +317,7 @@ bail:
                ocfs2_inode_unlock(inode_alloc, 0);
 
        if (inode_alloc)
-               mutex_unlock(&inode_alloc->i_mutex);
+               inode_unlock(inode_alloc);
 
        brelse(bh);
 
@@ -547,7 +547,7 @@ static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
        struct ocfs2_dinode *gb_dinode = NULL;
 
        if (gb_inode)
-               mutex_lock(&gb_inode->i_mutex);
+               inode_lock(gb_inode);
 
        if (o2info_coherent(&ffg->iff_req)) {
                status = ocfs2_inode_lock(gb_inode, &bh, 0);
@@ -604,7 +604,7 @@ bail:
                ocfs2_inode_unlock(gb_inode, 0);
 
        if (gb_inode)
-               mutex_unlock(&gb_inode->i_mutex);
+               inode_unlock(gb_inode);
 
        iput(gb_inode);
        brelse(bh);
index 3772a2dbb980c950279349146f14c4ec3887ead2..61b833b721d89e4184453760507996d5f208800a 100644 (file)
@@ -2088,7 +2088,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
                return status;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
        status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
        if (status < 0) {
                mlog_errno(status);
@@ -2106,7 +2106,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 out_cluster:
        ocfs2_inode_unlock(orphan_dir_inode, 0);
 out:
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
        return status;
 }
@@ -2196,7 +2196,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
                oi->ip_next_orphan = NULL;
 
                if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        ret = ocfs2_rw_lock(inode, 1);
                        if (ret < 0) {
                                mlog_errno(ret);
@@ -2235,7 +2235,7 @@ unlock_inode:
 unlock_rw:
                        ocfs2_rw_unlock(inode, 1);
 unlock_mutex:
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
 
                        /* clear dio flag in ocfs2_inode_info */
                        oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
index e9c99e35f5ea74e160c158ca05b01371057d24d3..7d62c43a2c3e24934965ba5df66f44e33fc0488a 100644 (file)
@@ -414,7 +414,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (status < 0) {
@@ -468,7 +468,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
@@ -506,7 +506,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        status = ocfs2_read_inode_block_full(inode, &alloc_bh,
                                             OCFS2_BH_IGNORE_CACHE);
@@ -539,7 +539,7 @@ bail:
        brelse(alloc_bh);
 
        if (inode) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                iput(inode);
        }
 
@@ -571,7 +571,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (status < 0) {
@@ -601,7 +601,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
 
        brelse(main_bm_bh);
 
@@ -643,7 +643,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&local_alloc_inode->i_mutex);
+       inode_lock(local_alloc_inode);
 
        /*
         * We must double check state and allocator bits because
@@ -709,7 +709,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
        status = 0;
 bail:
        if (status < 0 && local_alloc_inode) {
-               mutex_unlock(&local_alloc_inode->i_mutex);
+               inode_unlock(local_alloc_inode);
                iput(local_alloc_inode);
        }
 
index 124471d26a73f4fe79738e2b89662d82a7ad0561..e3d05d9901a3da26b123865545446f5ef96b08a5 100644 (file)
@@ -276,7 +276,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
         *      context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
         */
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -338,7 +338,7 @@ out_commit:
        ocfs2_commit_trans(osb, handle);
 
 out_unlock_mutex:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        if (context->data_ac) {
                ocfs2_free_alloc_context(context->data_ac);
@@ -632,7 +632,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
                goto out;
        }
 
-       mutex_lock(&gb_inode->i_mutex);
+       inode_lock(gb_inode);
 
        ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
        if (ret) {
@@ -640,7 +640,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
                goto out_unlock_gb_mutex;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        handle = ocfs2_start_trans(osb, credits);
        if (IS_ERR(handle)) {
@@ -708,11 +708,11 @@ out_commit:
        brelse(gd_bh);
 
 out_unlock_tl_inode:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        ocfs2_inode_unlock(gb_inode, 1);
 out_unlock_gb_mutex:
-       mutex_unlock(&gb_inode->i_mutex);
+       inode_unlock(gb_inode);
        brelse(gb_bh);
        iput(gb_inode);
 
@@ -905,7 +905,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
        if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
                return -EROFS;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * This prevents concurrent writes from other nodes
@@ -969,7 +969,7 @@ out_inode_unlock:
 out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return status;
 }
index ab42c38031b17dccb5a872781cec7784ca2d4cfa..6b3e87189a6467fd3c72533db5c52d149ba2064d 100644 (file)
@@ -1045,7 +1045,7 @@ leave:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -1664,7 +1664,7 @@ bail:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -2121,11 +2121,11 @@ static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
                return ret;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
 
        ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (ret < 0) {
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
 
                mlog_errno(ret);
@@ -2226,7 +2226,7 @@ out:
 
        if (ret) {
                ocfs2_inode_unlock(orphan_dir_inode, 1);
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
        }
 
@@ -2495,7 +2495,7 @@ out:
                        ocfs2_free_alloc_context(inode_ac);
 
                /* Unroll orphan dir locking */
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                ocfs2_inode_unlock(orphan_dir, 1);
                iput(orphan_dir);
        }
@@ -2602,7 +2602,7 @@ leave:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -2689,7 +2689,7 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
 
 bail_unlock_orphan:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
 
        ocfs2_free_dir_lookup_result(&orphan_insert);
@@ -2721,10 +2721,10 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
        status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (status < 0) {
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
                mlog_errno(status);
                goto bail;
@@ -2770,7 +2770,7 @@ bail_commit:
 
 bail_unlock_orphan:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        brelse(orphan_dir_bh);
        iput(orphan_dir_inode);
 
@@ -2834,12 +2834,12 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
                goto leave;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
 
        status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (status < 0) {
                mlog_errno(status);
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
                goto leave;
        }
@@ -2901,7 +2901,7 @@ out_commit:
        ocfs2_commit_trans(osb, handle);
 orphan_unlock:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
 leave:
 
index fde9ef18cff3c4ba59d7891ab395bc739fdbf81e..9c9dd30bc94541fa89baed2b0ac8c2a3c8a72e31 100644 (file)
@@ -308,7 +308,7 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
                WARN_ON(bh != oinfo->dqi_gqi_bh);
        spin_unlock(&dq_data_lock);
        if (ex) {
-               mutex_lock(&oinfo->dqi_gqinode->i_mutex);
+               inode_lock(oinfo->dqi_gqinode);
                down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
        } else {
                down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
@@ -320,7 +320,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
 {
        if (ex) {
                up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
-               mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
+               inode_unlock(oinfo->dqi_gqinode);
        } else {
                up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
        }
index 252119860e6c1ae36fbfe7d5b5b84010341f8e5b..3eff031aaf264df58b74c4165749b0c2385c6312 100644 (file)
@@ -807,7 +807,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
                        mlog_errno(ret);
                        goto out;
                }
-               mutex_lock(&alloc_inode->i_mutex);
+               inode_lock(alloc_inode);
 
                ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
                if (ret) {
@@ -867,7 +867,7 @@ out_unlock:
        }
 out_mutex:
        if (alloc_inode) {
-               mutex_unlock(&alloc_inode->i_mutex);
+               inode_unlock(alloc_inode);
                iput(alloc_inode);
        }
 out:
@@ -4197,7 +4197,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
                goto out;
        }
 
-       mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(new_inode, I_MUTEX_CHILD);
        ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
                                      OI_LS_REFLINK_TARGET);
        if (ret) {
@@ -4231,7 +4231,7 @@ inode_unlock:
        ocfs2_inode_unlock(new_inode, 1);
        brelse(new_bh);
 out_unlock:
-       mutex_unlock(&new_inode->i_mutex);
+       inode_unlock(new_inode);
 out:
        if (!ret) {
                ret = filemap_fdatawait(inode->i_mapping);
@@ -4402,11 +4402,11 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
                        return error;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = dquot_initialize(dir);
        if (!error)
                error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!error)
                fsnotify_create(dir, new_dentry);
        return error;
index 79b8021302b3eed84b9c59637cddd0e4844eb78b..576b9a04873f2f064fe3f66842713768b8f03f83 100644 (file)
@@ -301,7 +301,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (ret < 0) {
@@ -375,7 +375,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
@@ -486,7 +486,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (ret < 0) {
@@ -590,7 +590,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
index fc6d25f6d4442c79dd376da24f18c3e3355232ed..2f19aeec5482106de43b65bc6598c42fb4ed2e63 100644 (file)
@@ -141,7 +141,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
                if (ac->ac_which != OCFS2_AC_USE_LOCAL)
                        ocfs2_inode_unlock(inode, 1);
 
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                iput(inode);
                ac->ac_inode = NULL;
@@ -797,11 +797,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
                return -EINVAL;
        }
 
-       mutex_lock(&alloc_inode->i_mutex);
+       inode_lock(alloc_inode);
 
        status = ocfs2_inode_lock(alloc_inode, &bh, 1);
        if (status < 0) {
-               mutex_unlock(&alloc_inode->i_mutex);
+               inode_unlock(alloc_inode);
                iput(alloc_inode);
 
                mlog_errno(status);
@@ -2875,10 +2875,10 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
                goto bail;
        }
 
-       mutex_lock(&inode_alloc_inode->i_mutex);
+       inode_lock(inode_alloc_inode);
        status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
        if (status < 0) {
-               mutex_unlock(&inode_alloc_inode->i_mutex);
+               inode_unlock(inode_alloc_inode);
                iput(inode_alloc_inode);
                mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
                     (u32)suballoc_slot, status);
@@ -2891,7 +2891,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
                mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
 
        ocfs2_inode_unlock(inode_alloc_inode, 0);
-       mutex_unlock(&inode_alloc_inode->i_mutex);
+       inode_unlock(inode_alloc_inode);
 
        iput(inode_alloc_inode);
        brelse(alloc_bh);
index f0e241ffd94fc9d9e4ffb3d9afbda6e6eb932c3b..7d3d979f57d9142169f93f2c88b27a612dc06dc4 100644 (file)
@@ -2524,7 +2524,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
                mlog_errno(ret);
                goto out;
        }
-       mutex_lock(&xb_alloc_inode->i_mutex);
+       inode_lock(xb_alloc_inode);
 
        ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
        if (ret < 0) {
@@ -2549,7 +2549,7 @@ out_unlock:
        ocfs2_inode_unlock(xb_alloc_inode, 1);
        brelse(xb_alloc_bh);
 out_mutex:
-       mutex_unlock(&xb_alloc_inode->i_mutex);
+       inode_unlock(xb_alloc_inode);
        iput(xb_alloc_inode);
 out:
        brelse(blk_bh);
@@ -3619,17 +3619,17 @@ int ocfs2_xattr_set(struct inode *inode,
                }
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
                if (ret < 0) {
-                       mutex_unlock(&tl_inode->i_mutex);
+                       inode_unlock(tl_inode);
                        mlog_errno(ret);
                        goto cleanup;
                }
        }
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
                                        &xbs, &ctxt, ref_meta, &credits);
@@ -5460,7 +5460,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
                return ret;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -5504,7 +5504,7 @@ out_commit:
 out:
        ocfs2_schedule_truncate_log_flush(osb, 1);
 
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
index b25b1542c5304a74999db5ad52af57e89133a68e..55bdc75e2172a38487ea2942176c127fdad0eb82 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -58,10 +58,10 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
        if (ret)
                newattrs.ia_valid |= ret | ATTR_FORCE;
 
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        /* Note any delegations or leases have already been broken: */
        ret = notify_change(dentry, &newattrs, NULL);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
        return ret;
 }
 
@@ -510,7 +510,7 @@ static int chmod_common(struct path *path, umode_t mode)
        if (error)
                return error;
 retry_deleg:
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_path_chmod(path, mode);
        if (error)
                goto out_unlock;
@@ -518,7 +518,7 @@ retry_deleg:
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
        error = notify_change(path->dentry, &newattrs, &delegated_inode);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
@@ -593,11 +593,11 @@ retry_deleg:
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |=
                        ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_path_chown(path, uid, gid);
        if (!error)
                error = notify_change(path->dentry, &newattrs, &delegated_inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
index eff6319d50373c05d4d829b0fd83fa784d5f187b..d894e7cd9a864239ea19dc86d405239ee2405607 100644 (file)
@@ -248,9 +248,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
        if (err)
                goto out_cleanup;
 
-       mutex_lock(&newdentry->d_inode->i_mutex);
+       inode_lock(newdentry->d_inode);
        err = ovl_set_attr(newdentry, stat);
-       mutex_unlock(&newdentry->d_inode->i_mutex);
+       inode_unlock(newdentry->d_inode);
        if (err)
                goto out_cleanup;
 
index 692ceda3bc21f6976b65f3e2d5aa4b7ef2e9c5e8..ed95272d57a61af3f26a58c71cf789d3c94e28ce 100644 (file)
@@ -167,7 +167,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
        struct dentry *newdentry;
        int err;
 
-       mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(udir, I_MUTEX_PARENT);
        newdentry = lookup_one_len(dentry->d_name.name, upperdir,
                                   dentry->d_name.len);
        err = PTR_ERR(newdentry);
@@ -185,7 +185,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 out_dput:
        dput(newdentry);
 out_unlock:
-       mutex_unlock(&udir->i_mutex);
+       inode_unlock(udir);
        return err;
 }
 
@@ -258,9 +258,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
        if (err)
                goto out_cleanup;
 
-       mutex_lock(&opaquedir->d_inode->i_mutex);
+       inode_lock(opaquedir->d_inode);
        err = ovl_set_attr(opaquedir, &stat);
-       mutex_unlock(&opaquedir->d_inode->i_mutex);
+       inode_unlock(opaquedir->d_inode);
        if (err)
                goto out_cleanup;
 
@@ -599,7 +599,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
        struct dentry *upper = ovl_dentry_upper(dentry);
        int err;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        err = -ESTALE;
        if (upper->d_parent == upperdir) {
                /* Don't let d_delete() think it can reset d_inode */
@@ -619,7 +619,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
         * now.
         */
        d_drop(dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 
        return err;
 }
index bf996e574f3d842995847f72b8ee7bb7466f2539..49e204560655a8f1737a2a03a38b7a88604a5a4c 100644 (file)
@@ -63,9 +63,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
        if (!err) {
                upperdentry = ovl_dentry_upper(dentry);
 
-               mutex_lock(&upperdentry->d_inode->i_mutex);
+               inode_lock(upperdentry->d_inode);
                err = notify_change(upperdentry, attr, NULL);
-               mutex_unlock(&upperdentry->d_inode->i_mutex);
+               inode_unlock(upperdentry->d_inode);
        }
        ovl_drop_write(dentry);
 out:
index adcb1398c48128682875b2ff4a6b42d95c1e116e..fdaf28f75e12acf1f4dba023c31017c81747744b 100644 (file)
@@ -228,7 +228,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
                                dput(dentry);
                        }
                }
-               mutex_unlock(&dir->d_inode->i_mutex);
+               inode_unlock(dir->d_inode);
        }
        revert_creds(old_cred);
        put_cred(override_cred);
@@ -399,7 +399,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
        loff_t res;
        struct ovl_dir_file *od = file->private_data;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        if (!file->f_pos)
                ovl_dir_reset(file);
 
@@ -429,7 +429,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
                res = offset;
        }
 out_unlock:
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
 
        return res;
 }
@@ -454,10 +454,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
                        ovl_path_upper(dentry, &upperpath);
                        realfile = ovl_path_open(&upperpath, O_RDONLY);
                        smp_mb__before_spinlock();
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        if (!od->upperfile) {
                                if (IS_ERR(realfile)) {
-                                       mutex_unlock(&inode->i_mutex);
+                                       inode_unlock(inode);
                                        return PTR_ERR(realfile);
                                }
                                od->upperfile = realfile;
@@ -467,7 +467,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
                                        fput(realfile);
                                realfile = od->upperfile;
                        }
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                }
        }
 
@@ -479,9 +479,9 @@ static int ovl_dir_release(struct inode *inode, struct file *file)
        struct ovl_dir_file *od = file->private_data;
 
        if (od->cache) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ovl_cache_put(od, file->f_path.dentry);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        fput(od->realfile);
        if (od->upperfile)
@@ -557,7 +557,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 {
        struct ovl_cache_entry *p;
 
-       mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
        list_for_each_entry(p, list, l_node) {
                struct dentry *dentry;
 
@@ -575,5 +575,5 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
                        ovl_cleanup(upper->d_inode, dentry);
                dput(dentry);
        }
-       mutex_unlock(&upper->d_inode->i_mutex);
+       inode_unlock(upper->d_inode);
 }
index d250604f985ab91abaa60e70a6a4025049016916..8d826bd56b26b10d641d7ce1f7a47b747851fac9 100644 (file)
@@ -229,7 +229,7 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
        WARN_ON(oe->__upperdentry);
        BUG_ON(!upperdentry->d_inode);
        /*
@@ -244,7 +244,7 @@ void ovl_dentry_version_inc(struct dentry *dentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(dentry->d_inode));
        oe->version++;
 }
 
@@ -252,7 +252,7 @@ u64 ovl_dentry_version_get(struct dentry *dentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(dentry->d_inode));
        return oe->version;
 }
 
@@ -375,9 +375,9 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
 {
        struct dentry *dentry;
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        dentry = lookup_one_len(name->name, dir, name->len);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        if (IS_ERR(dentry)) {
                if (PTR_ERR(dentry) == -ENOENT)
@@ -744,7 +744,7 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
        if (err)
                return ERR_PTR(err);
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
 retry:
        work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
                              strlen(OVL_WORKDIR_NAME));
@@ -770,7 +770,7 @@ retry:
                        goto out_dput;
        }
 out_unlock:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        mnt_drop_write(mnt);
 
        return work;
index 42cf8ddf0e5599da6621fcdf741d6a61fd2f2d7a..ab8dad3ccb6a8bac13a2eab25eeb2b68791d60f5 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
  */
 unsigned int pipe_min_size = PAGE_SIZE;
 
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
+ * matches default values.
+ */
+unsigned long pipe_user_pages_hard;
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
        return retval;
 }
 
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
+                                 unsigned long old, unsigned long new)
+{
+       atomic_long_add(new - old, &pipe->user->pipe_bufs);
+}
+
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
+{
+       return pipe_user_pages_soft &&
+              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+}
+
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
+{
+       return pipe_user_pages_hard &&
+              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+}
+
 struct pipe_inode_info *alloc_pipe_info(void)
 {
        struct pipe_inode_info *pipe;
 
        pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
        if (pipe) {
-               pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+               unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+               struct user_struct *user = get_current_user();
+
+               if (!too_many_pipe_buffers_hard(user)) {
+                       if (too_many_pipe_buffers_soft(user))
+                               pipe_bufs = 1;
+                       pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+               }
+
                if (pipe->bufs) {
                        init_waitqueue_head(&pipe->wait);
                        pipe->r_counter = pipe->w_counter = 1;
-                       pipe->buffers = PIPE_DEF_BUFFERS;
+                       pipe->buffers = pipe_bufs;
+                       pipe->user = user;
+                       account_pipe_buffers(pipe, 0, pipe_bufs);
                        mutex_init(&pipe->mutex);
                        return pipe;
                }
+               free_uid(user);
                kfree(pipe);
        }
 
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 {
        int i;
 
+       account_pipe_buffers(pipe, pipe->buffers, 0);
+       free_uid(pipe->user);
        for (i = 0; i < pipe->buffers; i++) {
                struct pipe_buffer *buf = pipe->bufs + i;
                if (buf->ops)
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
                        memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
        }
 
+       account_pipe_buffers(pipe, pipe->buffers, nr_pages);
        pipe->curbuf = 0;
        kfree(pipe->bufs);
        pipe->bufs = bufs;
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
                if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
                        ret = -EPERM;
                        goto out;
+               } else if ((too_many_pipe_buffers_hard(pipe->user) ||
+                           too_many_pipe_buffers_soft(pipe->user)) &&
+                          !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+                       ret = -EPERM;
+                       goto out;
                }
                ret = pipe_set_size(pipe, nr_pages);
                break;
index 6367e1e435c64144a1d9adb94e9c1cfd1f157dfe..c524fdddc7fb1f601d6d06ddb46d0affcca89c08 100644 (file)
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
 static struct mountpoint *mp;
 static struct hlist_head *list;
 
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+       return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
 static int propagate_one(struct mount *m)
 {
        struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
        /* skip if mountpoint isn't covered by it */
        if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
                return 0;
-       if (m->mnt_group_id == last_dest->mnt_group_id) {
+       if (peers(m, last_dest)) {
                type = CL_MAKE_SHARED;
        } else {
                struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
                                        last_source = last_source->mnt_master;
                                        last_dest = last_source->mnt_parent;
                                }
-                               if (n->mnt_group_id != last_dest->mnt_group_id) {
+                               if (!peers(n, last_dest)) {
                                        last_source = last_source->mnt_master;
                                        last_dest = last_source->mnt_parent;
                                }
index 92e6726f6e3732573bd9a64f3b313cc3508ce519..a939f5ed7f89ccb39673fef11beb542e5bef8159 100644 (file)
@@ -552,9 +552,9 @@ static int open_kcore(struct inode *inode, struct file *filp)
        if (kcore_need_update)
                kcore_update_ram();
        if (i_size_read(inode) != proc_root_kcore->size) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, proc_root_kcore->size);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
index 67e8db442cf03802c7758a76dd8adf79b46a3a50..b6a8d3529fea9714fb25d063e9505b198201e7ab 100644 (file)
@@ -50,7 +50,7 @@ int proc_setup_self(struct super_block *s)
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *self;
        
-       mutex_lock(&root_inode->i_mutex);
+       inode_lock(root_inode);
        self = d_alloc_name(s->s_root, "self");
        if (self) {
                struct inode *inode = new_inode_pseudo(s);
@@ -69,7 +69,7 @@ int proc_setup_self(struct super_block *s)
        } else {
                self = ERR_PTR(-ENOMEM);
        }
-       mutex_unlock(&root_inode->i_mutex);
+       inode_unlock(root_inode);
        if (IS_ERR(self)) {
                pr_err("proc_fill_super: can't allocate /proc/self\n");
                return PTR_ERR(self);
index 71ffc91060f6d6ad32081bfa53d2a3c7b3706e53..fa95ab2d36740803e06da898a4851db317b86a9e 100644 (file)
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
                                sizeof(struct proc_maps_private));
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
 {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= vma->vm_mm->start_stack &&
+                       vma->vm_end >= vma->vm_mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
 
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
 }
 
 static void
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 
        name = arch_vma_name(vma);
        if (!name) {
-               pid_t tid;
-
                if (!mm) {
                        name = "[vdso]";
                        goto done;
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
                        goto done;
                }
 
-               tid = pid_of_stack(priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack)) {
-                               name = "[stack]";
-                       } else {
-                               /* Thread stack in /proc/PID/maps */
-                               seq_pad(m, ' ');
-                               seq_printf(m, "[stack:%d]", tid);
-                       }
-               }
+               if (is_stack(priv, vma, is_pid))
+                       name = "[stack]";
        }
 
 done:
@@ -602,7 +593,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                smaps_pmd_entry(pmd, addr, walk);
                spin_unlock(ptl);
                return 0;
@@ -913,7 +905,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        spinlock_t *ptl;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
                        clear_soft_dirty_pmd(vma, addr, pmd);
                        goto out;
@@ -1187,7 +1180,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
        int err = 0;
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       if (pmd_trans_huge_lock(pmdp, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmdp, vma);
+       if (ptl) {
                u64 flags = 0, frame = 0;
                pmd_t pmd = *pmdp;
 
@@ -1519,7 +1513,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
        pte_t *orig_pte;
        pte_t *pte;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
@@ -1548,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
+       pte_t huge_pte = huge_ptep_get(pte);
        struct numa_maps *md;
        struct page *page;
 
-       if (!pte_present(*pte))
+       if (!pte_present(huge_pte))
                return 0;
 
-       page = pte_page(*pte);
+       page = pte_page(huge_pte);
        if (!page)
                return 0;
 
        md = walk->private;
-       gather_stats(page, md, pte_dirty(*pte), 1);
+       gather_stats(page, md, pte_dirty(huge_pte), 1);
        return 0;
 }
 
@@ -1613,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
                seq_file_path(m, file, "\n\t= ");
        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
                seq_puts(m, " heap");
-       } else {
-               pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_puts(m, " stack");
-                       else
-                               seq_printf(m, " stack:%d", tid);
-               }
+       } else if (is_stack(proc_priv, vma, is_pid)) {
+               seq_puts(m, " stack");
        }
 
        if (is_vm_hugetlb_page(vma))
index e0d64c92e4f6576c38a8a4a7cc9b8d13a2b3362e..faacb0c0d857602111bfc04f2e374c451059c358 100644 (file)
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
        return size;
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
 {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
-
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+       struct mm_struct *mm = vma->vm_mm;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= mm->start_stack &&
+                       vma->vm_end >= mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
+
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
 }
 
 /*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
        if (file) {
                seq_pad(m, ' ');
                seq_file_path(m, file, "");
-       } else if (mm) {
-               pid_t tid = pid_of_stack(priv, vma, is_pid);
-
-               if (tid != 0) {
-                       seq_pad(m, ' ');
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_printf(m, "[stack]");
-                       else
-                               seq_printf(m, "[stack:%d]", tid);
-               }
+       } else if (mm && is_stack(priv, vma, is_pid)) {
+               seq_pad(m, ' ');
+               seq_printf(m, "[stack]");
        }
 
        seq_putc(m, '\n');
index 9eacd59e0360f1367a084a1e7bfa5dca0e3fe170..e58a31e8fb2a186aaa2d3dee62811f090e2ade57 100644 (file)
@@ -52,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *thread_self;
 
-       mutex_lock(&root_inode->i_mutex);
+       inode_lock(root_inode);
        thread_self = d_alloc_name(s->s_root, "thread-self");
        if (thread_self) {
                struct inode *inode = new_inode_pseudo(s);
@@ -71,7 +71,7 @@ int proc_setup_thread_self(struct super_block *s)
        } else {
                thread_self = ERR_PTR(-ENOMEM);
        }
-       mutex_unlock(&root_inode->i_mutex);
+       inode_unlock(root_inode);
        if (IS_ERR(thread_self)) {
                pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
                return PTR_ERR(thread_self);
index d8c439d813ce1bd0a415fd09a8407c6c6192d49b..dc645b66cd79aea96ea723f8b127a16f7d6f70cd 100644 (file)
@@ -377,7 +377,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
                break;
        }
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_alloc_name(root, name);
        if (!dentry)
@@ -397,12 +397,12 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
        list_add(&private->list, &allpstore);
        spin_unlock_irqrestore(&allpstore_lock, flags);
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        return 0;
 
 fail_lockedalloc:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        kfree(private);
 fail_alloc:
        iput(inode);
index fbd70af98820752ca7126363c4597de3d781b14d..3c3b81bb6dfebbd3d6c3f1f8f1252e46308bf9c9 100644 (file)
@@ -682,9 +682,9 @@ int dquot_quota_sync(struct super_block *sb, int type)
                        continue;
                if (!sb_has_quota_active(sb, cnt))
                        continue;
-               mutex_lock(&dqopt->files[cnt]->i_mutex);
+               inode_lock(dqopt->files[cnt]);
                truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
-               mutex_unlock(&dqopt->files[cnt]->i_mutex);
+               inode_unlock(dqopt->files[cnt]);
        }
        mutex_unlock(&dqopt->dqonoff_mutex);
 
@@ -2162,12 +2162,12 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
                        /* If quota was reenabled in the meantime, we have
                         * nothing to do */
                        if (!sb_has_quota_loaded(sb, cnt)) {
-                               mutex_lock(&toputinode[cnt]->i_mutex);
+                               inode_lock(toputinode[cnt]);
                                toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
                                  S_NOATIME | S_NOQUOTA);
                                truncate_inode_pages(&toputinode[cnt]->i_data,
                                                     0);
-                               mutex_unlock(&toputinode[cnt]->i_mutex);
+                               inode_unlock(toputinode[cnt]);
                                mark_inode_dirty_sync(toputinode[cnt]);
                        }
                        mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2258,11 +2258,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
                /* We don't want quota and atime on quota files (deadlocks
                 * possible) Also nobody should write to the file - we use
                 * special IO operations which ignore the immutable bit. */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
                                             S_NOQUOTA);
                inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * When S_NOQUOTA is set, remove dquot references as no more
                 * references can be added
@@ -2305,12 +2305,12 @@ out_file_init:
        iput(inode);
 out_lock:
        if (oldflags != -1) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                /* Set the flags back (in the case of accidental quotaon()
                 * on a wrong file we don't want to mess up the flags) */
                inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
                inode->i_flags |= oldflags;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
@@ -2430,9 +2430,9 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
        struct dentry *dentry;
        int error;
 
-       mutex_lock(&d_inode(sb->s_root)->i_mutex);
+       inode_lock(d_inode(sb->s_root));
        dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
-       mutex_unlock(&d_inode(sb->s_root)->i_mutex);
+       inode_unlock(d_inode(sb->s_root));
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
index 06b07d5a08fec4be6d83e9c92192963da6c8bc5c..dadf24e5c95bb7004d105f0773430576bfc57cf5 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/splice.h>
 #include <linux/compat.h>
 #include <linux/mount.h>
+#include <linux/fs.h>
 #include "internal.h"
 
 #include <asm/uaccess.h>
@@ -183,7 +184,7 @@ loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
        switch (whence) {
        case SEEK_SET: case SEEK_CUR:
                return generic_file_llseek_size(file, offset, whence,
-                                               ~0ULL, 0);
+                                               OFFSET_MAX, 0);
        default:
                return -EINVAL;
        }
@@ -238,7 +239,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file_inode(file);
        loff_t retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
                case SEEK_END:
                        offset += i_size_read(inode);
@@ -283,7 +284,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
                retval = offset;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return retval;
 }
 EXPORT_SYMBOL(default_llseek);
@@ -1532,10 +1533,12 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 
        if (!(file_in->f_mode & FMODE_READ) ||
            !(file_out->f_mode & FMODE_WRITE) ||
-           (file_out->f_flags & O_APPEND) ||
-           !file_in->f_op->clone_file_range)
+           (file_out->f_flags & O_APPEND))
                return -EBADF;
 
+       if (!file_in->f_op->clone_file_range)
+               return -EOPNOTSUPP;
+
        ret = clone_verify_area(file_in, pos_in, len, false);
        if (ret)
                return ret;
@@ -1656,6 +1659,9 @@ next_file:
                mnt_drop_write_file(dst_file);
 next_loop:
                fdput(dst_fd);
+
+               if (fatal_signal_pending(current))
+                       goto out;
        }
 
 out:
index ced679179cac0686407c3743cff177289bfc3959..e69ef3b79787b9fb4f02ab65b35c28571c9df1a0 100644 (file)
@@ -44,7 +44,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
                fsnotify_access(file);
                file_accessed(file);
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return res;
 }
index 4a024e2ceb9f76fbf9156d0dc84c3e09a4a1ff15..3abd4004184ba0b49ddc91dac59795230125e133 100644 (file)
@@ -38,11 +38,11 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        reiserfs_write_lock(inode->i_sb);
        err = reiserfs_commit_for_inode(inode);
        reiserfs_write_unlock(inode->i_sb);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (err < 0)
                return err;
        return 0;
index 96a1bcf33db4435098baa153f807a71274543aaf..9424a4ba93a9504b12c75c08d0f4a4cd8f49fe20 100644 (file)
@@ -158,7 +158,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        BUG_ON(!S_ISREG(inode->i_mode));
        err = sync_mapping_buffers(inode->i_mapping);
        reiserfs_write_lock(inode->i_sb);
@@ -166,7 +166,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
        reiserfs_write_unlock(inode->i_sb);
        if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (barrier_done < 0)
                return barrier_done;
        return (err < 0) ? -EIO : 0;
index 6ec8a30a0911b953e0c79ddcbd1458c8fc13142a..036a1fc0a8c35655a2bdb6cfb5b79dfccca04d54 100644 (file)
@@ -224,7 +224,7 @@ out_unlock:
        page_cache_release(page);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        reiserfs_write_unlock(inode->i_sb);
        return retval;
 }
index 05db7473bcb5c64078dc572d2ef8e455a770dbba..c0306ec8ed7b8f5d4eef97fd1cf4c7653c2b1f20 100644 (file)
@@ -288,7 +288,7 @@ static int finish_unfinished(struct super_block *s)
                pathrelse(&path);
 
                inode = reiserfs_iget(s, &obj_key);
-               if (!inode) {
+               if (IS_ERR_OR_NULL(inode)) {
                        /*
                         * the unlink almost completed, it just did not
                         * manage to remove "save" link and release objectid
index e5ddb4e5ea9497956cfa7b04dca84eba790829d1..57e0b23105327b298d17db42d44c2ebf8bd31715 100644 (file)
 #ifdef CONFIG_REISERFS_FS_XATTR
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
        return dir->i_op->create(dir, dentry, mode, true);
 }
 #endif
 
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
        return dir->i_op->mkdir(dir, dentry, mode);
 }
 
@@ -85,11 +85,11 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 {
        int error;
 
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
 
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        error = dir->i_op->unlink(dir, dentry);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        if (!error)
                d_delete(dentry);
@@ -100,13 +100,13 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 {
        int error;
 
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
 
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        error = dir->i_op->rmdir(dir, dentry);
        if (!error)
                d_inode(dentry)->i_flags |= S_DEAD;
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        if (!error)
                d_delete(dentry);
 
@@ -123,7 +123,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
        if (d_really_is_negative(privroot))
                return ERR_PTR(-ENODATA);
 
-       mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR);
 
        xaroot = dget(REISERFS_SB(sb)->xattr_root);
        if (!xaroot)
@@ -139,7 +139,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
                }
        }
 
-       mutex_unlock(&d_inode(privroot)->i_mutex);
+       inode_unlock(d_inode(privroot));
        return xaroot;
 }
 
@@ -156,7 +156,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
                 inode->i_generation);
 
-       mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xaroot), I_MUTEX_XATTR);
 
        xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
        if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
@@ -170,7 +170,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                }
        }
 
-       mutex_unlock(&d_inode(xaroot)->i_mutex);
+       inode_unlock(d_inode(xaroot));
        dput(xaroot);
        return xadir;
 }
@@ -195,7 +195,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
                container_of(ctx, struct reiserfs_dentry_buf, ctx);
        struct dentry *dentry;
 
-       WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir)));
 
        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
                return -ENOSPC;
@@ -254,7 +254,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                goto out_dir;
        }
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
 
        buf.xadir = dir;
        while (1) {
@@ -276,7 +276,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                        break;
                buf.count = 0;
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        cleanup_dentry_buf(&buf);
 
@@ -298,13 +298,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                if (!err) {
                        int jerror;
 
-                       mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
+                       inode_lock_nested(d_inode(dir->d_parent),
                                          I_MUTEX_XATTR);
                        err = action(dir, data);
                        reiserfs_write_lock(inode->i_sb);
                        jerror = journal_end(&th);
                        reiserfs_write_unlock(inode->i_sb);
-                       mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
+                       inode_unlock(d_inode(dir->d_parent));
                        err = jerror ?: err;
                }
        }
@@ -384,7 +384,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (IS_ERR(xadir))
                return ERR_CAST(xadir);
 
-       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
        xafile = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(xafile)) {
                err = PTR_ERR(xafile);
@@ -404,7 +404,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (err)
                dput(xafile);
 out:
-       mutex_unlock(&d_inode(xadir)->i_mutex);
+       inode_unlock(d_inode(xadir));
        dput(xadir);
        if (err)
                return ERR_PTR(err);
@@ -469,7 +469,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
        if (IS_ERR(xadir))
                return PTR_ERR(xadir);
 
-       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
        dentry = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
@@ -483,7 +483,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
 
        dput(dentry);
 out_dput:
-       mutex_unlock(&d_inode(xadir)->i_mutex);
+       inode_unlock(d_inode(xadir));
        dput(xadir);
        return err;
 }
@@ -580,11 +580,11 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                        .ia_valid = ATTR_SIZE | ATTR_CTIME,
                };
 
-               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+               inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
                inode_dio_wait(d_inode(dentry));
 
                err = reiserfs_setattr(dentry, &newattrs);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
        } else
                update_ctime(inode);
 out_unlock:
@@ -888,9 +888,9 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
                goto out;
        }
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
        err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        if (!err)
                err = buf.pos;
@@ -905,7 +905,7 @@ static int create_privroot(struct dentry *dentry)
        int err;
        struct inode *inode = d_inode(dentry->d_parent);
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
 
        err = xattr_mkdir(inode, dentry, 0700);
        if (err || d_really_is_negative(dentry)) {
@@ -995,7 +995,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
        int err = 0;
 
        /* If we don't have the privroot located yet - go find it */
-       mutex_lock(&d_inode(s->s_root)->i_mutex);
+       inode_lock(d_inode(s->s_root));
        dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
                                strlen(PRIVROOT_NAME));
        if (!IS_ERR(dentry)) {
@@ -1005,7 +1005,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
                        d_inode(dentry)->i_flags |= S_PRIVATE;
        } else
                err = PTR_ERR(dentry);
-       mutex_unlock(&d_inode(s->s_root)->i_mutex);
+       inode_unlock(d_inode(s->s_root));
 
        return err;
 }
@@ -1025,14 +1025,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                goto error;
 
        if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
-               mutex_lock(&d_inode(s->s_root)->i_mutex);
+               inode_lock(d_inode(s->s_root));
                err = create_privroot(REISERFS_SB(s)->priv_root);
-               mutex_unlock(&d_inode(s->s_root)->i_mutex);
+               inode_unlock(d_inode(s->s_root));
        }
 
        if (d_really_is_positive(privroot)) {
                s->s_xattr = reiserfs_xattr_handlers;
-               mutex_lock(&d_inode(privroot)->i_mutex);
+               inode_lock(d_inode(privroot));
                if (!REISERFS_SB(s)->xattr_root) {
                        struct dentry *dentry;
 
@@ -1043,7 +1043,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                        else
                                err = PTR_ERR(dentry);
                }
-               mutex_unlock(&d_inode(privroot)->i_mutex);
+               inode_unlock(d_inode(privroot));
        }
 
 error:
index b94fa6c3c6ebe8c7308aea0052f3afacad9745c4..053818dd6c18be8f228e1c40483e50d78aa78007 100644 (file)
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
        if (isalarm(ctx))
                remaining = alarm_expires_remaining(&ctx->t.alarm);
        else
-               remaining = hrtimer_expires_remaining(&ctx->t.tmr);
+               remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
        return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
index c66f2423e1f5c511b0202cfdab46a5e2be53d7f8..4a0e48f9210483adf6b9bee36f220e80d65a09cd 100644 (file)
@@ -84,9 +84,9 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umo
         * the files within the tracefs system. It is up to the individual
         * mkdir routine to handle races.
         */
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        ret = tracefs_ops.mkdir(name);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        kfree(name);
 
@@ -109,13 +109,13 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
         * This time we need to unlock not only the parent (inode) but
         * also the directory that is being deleted.
         */
-       mutex_unlock(&inode->i_mutex);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(inode);
+       inode_unlock(dentry->d_inode);
 
        ret = tracefs_ops.rmdir(name);
 
-       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock_nested(inode, I_MUTEX_PARENT);
+       inode_lock(dentry->d_inode);
 
        kfree(name);
 
@@ -334,7 +334,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!parent)
                parent = tracefs_mount->mnt_root;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && dentry->d_inode) {
                dput(dentry);
@@ -342,7 +342,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        }
 
        if (IS_ERR(dentry)) {
-               mutex_unlock(&parent->d_inode->i_mutex);
+               inode_unlock(parent->d_inode);
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
        }
 
@@ -351,7 +351,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 
 static struct dentry *failed_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       inode_unlock(dentry->d_parent->d_inode);
        dput(dentry);
        simple_release_fs(&tracefs_mount, &tracefs_mount_count);
        return NULL;
@@ -359,7 +359,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
 
 static struct dentry *end_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       inode_unlock(dentry->d_parent->d_inode);
        return dentry;
 }
 
@@ -544,9 +544,9 @@ void tracefs_remove(struct dentry *dentry)
        if (!parent || !parent->d_inode)
                return;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        ret = __tracefs_remove(dentry, parent);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (!ret)
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
 }
@@ -572,7 +572,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
 
        parent = dentry;
  down:
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
  loop:
        /*
         * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -587,7 +587,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
                        spin_unlock(&parent->d_lock);
-                       mutex_unlock(&parent->d_inode->i_mutex);
+                       inode_unlock(parent->d_inode);
                        parent = child;
                        goto down;
                }
@@ -608,10 +608,10 @@ void tracefs_remove_recursive(struct dentry *dentry)
        }
        spin_unlock(&parent->d_lock);
 
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        child = parent;
        parent = parent->d_parent;
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
 
        if (child != dentry)
                /* go up */
@@ -619,7 +619,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
 
        if (!__tracefs_remove(child, parent))
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
 }
 
 /**
index e49bd2808bf3e397b9b499fe4d6036ad7600a71b..795992a8321e9e0c9531c92f97c91cc10835cc02 100644 (file)
@@ -515,8 +515,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
        dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
                dentry, inode->i_ino,
                inode->i_nlink, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
 
        err = dbg_check_synced_i_size(c, inode);
        if (err)
@@ -572,8 +572,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
        dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
                dentry, inode->i_ino,
                inode->i_nlink, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
        err = dbg_check_synced_i_size(c, inode);
        if (err)
                return err;
@@ -661,8 +661,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
 
        dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
                inode->i_ino, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
        err = check_dir_empty(c, d_inode(dentry));
        if (err)
                return err;
@@ -996,10 +996,10 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
                old_dentry, old_inode->i_ino, old_dir->i_ino,
                new_dentry, new_dir->i_ino);
-       ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+       ubifs_assert(inode_is_locked(old_dir));
+       ubifs_assert(inode_is_locked(new_dir));
        if (unlink)
-               ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+               ubifs_assert(inode_is_locked(new_inode));
 
 
        if (unlink && is_dir) {
index eff62801acbf10524e31f8ad7816c160e6f88a90..065c88f8e4b8c2d689e8fe9ce13122b5465e8f63 100644 (file)
@@ -1317,7 +1317,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* Synchronize the inode unless this is a 'datasync()' call. */
        if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
@@ -1332,7 +1332,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
         */
        err = ubifs_sync_wbufs_by_inode(c, inode);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index e53292d0c21bcd9d57c3f219d3f7cc31d5eb836e..c7f4d434d098fb7ac4a35df55e2c60adb179d4c0 100644 (file)
@@ -313,7 +313,7 @@ static int setxattr(struct inode *host, const char *name, const void *value,
        union ubifs_key key;
        int err, type;
 
-       ubifs_assert(mutex_is_locked(&host->i_mutex));
+       ubifs_assert(inode_is_locked(host));
 
        if (size > UBIFS_MAX_INO_DATA)
                return -ERANGE;
@@ -550,7 +550,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
 
        dbg_gen("xattr '%s', ino %lu ('%pd')", name,
                host->i_ino, dentry);
-       ubifs_assert(mutex_is_locked(&host->i_mutex));
+       ubifs_assert(inode_is_locked(host));
 
        err = check_namespace(&nm);
        if (err < 0)
index bddf3d071daec536fe5a3e775b2e041e0df5a620..1af98963d860f0e4ed2959d256ff53737d070fcb 100644 (file)
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct udf_inode_info *iinfo = UDF_I(inode);
        int err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        retval = generic_write_checks(iocb, from);
        if (retval <= 0)
@@ -136,7 +136,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                                (udf_file_entry_alloc_offset(inode) + end)) {
                        err = udf_expand_file_adinicb(inode);
                        if (err) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                udf_debug("udf_expand_adinicb: err=%d\n", err);
                                return err;
                        }
@@ -149,7 +149,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        retval = __generic_file_write_iter(iocb, from);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (retval > 0) {
                mark_inode_dirty(inode);
@@ -223,12 +223,12 @@ static int udf_release_file(struct inode *inode, struct file *filp)
                 * Grab i_mutex to avoid races with writes changing i_size
                 * while we are running.
                 */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                down_write(&UDF_I(inode)->i_data_sem);
                udf_discard_prealloc(inode);
                udf_truncate_tail_extent(inode);
                up_write(&UDF_I(inode)->i_data_sem);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
index 87dc16d155723eefb61b2a28c906287d4bf11222..166d3ed32c39a54b48c4ec93e2297de3d9efdcc2 100644 (file)
@@ -262,7 +262,7 @@ int udf_expand_file_adinicb(struct inode *inode)
                .nr_to_write = 1,
        };
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
        if (!iinfo->i_lenAlloc) {
                if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
                        iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
index 0fbb4c7c72e83701bb32ef066908a33b435900ae..a522c15a0bfd7e5e9e0d758bfbc59185a8cb2609 100644 (file)
@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
 {
        int i;
        int nr_groups = bitmap->s_nr_groups;
-       int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
-                                               nr_groups);
 
        for (i = 0; i < nr_groups; i++)
                if (bitmap->s_block_bitmap[i])
                        brelse(bitmap->s_block_bitmap[i]);
 
-       if (size <= PAGE_SIZE)
-               kfree(bitmap);
-       else
-               vfree(bitmap);
+       kvfree(bitmap);
 }
 
 static void udf_free_partition(struct udf_part_map *map)
index aa138d64560a6a3c2133bc70d57e367cc8c1476d..85c40f4f373d56b3d5d41ed2fe7459759c8a3aeb 100644 (file)
@@ -103,9 +103,9 @@ static int utimes_common(struct path *path, struct timespec *times)
                }
        }
 retry_deleg:
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = notify_change(path->dentry, &newattrs, &delegated_inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
index d5dd6c8b82a712085c105ec1c30f827c639ff614..4861322e28e8da2154f46b7b92cbeda576ee1438 100644 (file)
@@ -129,7 +129,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_inode_setxattr(dentry, name, value, size, flags);
        if (error)
                goto out;
@@ -137,7 +137,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -277,7 +277,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_inode_removexattr(dentry, name);
        if (error)
                goto out;
@@ -290,7 +290,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
        }
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(vfs_removexattr);
@@ -940,7 +940,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
        bool trusted = capable(CAP_SYS_ADMIN);
        struct simple_xattr *xattr;
        ssize_t remaining_size = size;
-       int err;
+       int err = 0;
 
 #ifdef CONFIG_FS_POSIX_ACL
        if (inode->i_acl) {
@@ -965,11 +965,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 
                err = xattr_list_one(&buffer, &remaining_size, xattr->name);
                if (err)
-                       return err;
+                       break;
        }
        spin_unlock(&xattrs->lock);
 
-       return size - remaining_size;
+       return err ? err : size - remaining_size;
 }
 
 /*
index e2536bb1c760036af5efff9e13437a26b179da27..dc97eb21af071b0e040fe0d74b8ba51214e28cf9 100644 (file)
@@ -984,8 +984,6 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 
 /*
  * Values for di_flags
- * There should be a one-to-one correspondence between these flags and the
- * XFS_XFLAG_s.
  */
 #define XFS_DIFLAG_REALTIME_BIT  0     /* file's blocks come from rt area */
 #define XFS_DIFLAG_PREALLOC_BIT  1     /* file space has been preallocated */
@@ -1025,6 +1023,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
         XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
 
+/*
+ * Values for di_flags2 These start by being exposed to userspace in the upper
+ * 16 bits of the XFS_XFLAG_s range.
+ */
+#define XFS_DIFLAG2_DAX_BIT    0       /* use DAX for this inode */
+#define XFS_DIFLAG2_DAX                (1 << XFS_DIFLAG2_DAX_BIT)
+
+#define XFS_DIFLAG2_ANY                (XFS_DIFLAG2_DAX)
+
 /*
  * Inode number format:
  * low inopblog bits - offset in block
index b2b73a998d425d04779262ead61d02d65b153bc8..fffe3d01bd9fb5c771a4919edee1c9f615ab49e2 100644 (file)
@@ -35,40 +35,6 @@ struct dioattr {
 };
 #endif
 
-/*
- * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR.
- */
-#ifndef HAVE_FSXATTR
-struct fsxattr {
-       __u32           fsx_xflags;     /* xflags field value (get/set) */
-       __u32           fsx_extsize;    /* extsize field value (get/set)*/
-       __u32           fsx_nextents;   /* nextents field value (get)   */
-       __u32           fsx_projid;     /* project identifier (get/set) */
-       unsigned char   fsx_pad[12];
-};
-#endif
-
-/*
- * Flags for the bs_xflags/fsx_xflags field
- * There should be a one-to-one correspondence between these flags and the
- * XFS_DIFLAG_s.
- */
-#define XFS_XFLAG_REALTIME     0x00000001      /* data in realtime volume */
-#define XFS_XFLAG_PREALLOC     0x00000002      /* preallocated file extents */
-#define XFS_XFLAG_IMMUTABLE    0x00000008      /* file cannot be modified */
-#define XFS_XFLAG_APPEND       0x00000010      /* all writes append */
-#define XFS_XFLAG_SYNC         0x00000020      /* all writes synchronous */
-#define XFS_XFLAG_NOATIME      0x00000040      /* do not update access time */
-#define XFS_XFLAG_NODUMP       0x00000080      /* do not include in backups */
-#define XFS_XFLAG_RTINHERIT    0x00000100      /* create with rt bit set */
-#define XFS_XFLAG_PROJINHERIT  0x00000200      /* create with parents projid */
-#define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
-#define XFS_XFLAG_EXTSIZE      0x00000800      /* extent size allocator hint */
-#define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
-#define XFS_XFLAG_NODEFRAG     0x00002000      /* do not defragment */
-#define XFS_XFLAG_FILESTREAM   0x00004000      /* use filestream allocator */
-#define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
-
 /*
  * Structure for XFS_IOC_GETBMAP.
  * On input, fill in bmv_offset and bmv_length of the first structure
@@ -514,8 +480,8 @@ typedef struct xfs_swapext
 #define XFS_IOC_ALLOCSP                _IOW ('X', 10, struct xfs_flock64)
 #define XFS_IOC_FREESP         _IOW ('X', 11, struct xfs_flock64)
 #define XFS_IOC_DIOINFO                _IOR ('X', 30, struct dioattr)
-#define XFS_IOC_FSGETXATTR     _IOR ('X', 31, struct fsxattr)
-#define XFS_IOC_FSSETXATTR     _IOW ('X', 32, struct fsxattr)
+#define XFS_IOC_FSGETXATTR     FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR     FS_IOC_FSSETXATTR
 #define XFS_IOC_ALLOCSP64      _IOW ('X', 36, struct xfs_flock64)
 #define XFS_IOC_FREESP64       _IOW ('X', 37, struct xfs_flock64)
 #define XFS_IOC_GETBMAP                _IOWR('X', 38, struct getbmap)
index 379c089fb0514a5a34934b51b790181567a3384f..a9ebabfe7587bf5abbc427e1603fcf012e5ac63c 100644 (file)
@@ -55,7 +55,7 @@ xfs_count_page_state(
        } while ((bh = bh->b_this_page) != head);
 }
 
-STATIC struct block_device *
+struct block_device *
 xfs_find_bdev_for_inode(
        struct inode            *inode)
 {
@@ -1208,6 +1208,10 @@ xfs_vm_writepages(
        struct writeback_control *wbc)
 {
        xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+       if (dax_mapping(mapping))
+               return dax_writeback_mapping_range(mapping,
+                               xfs_find_bdev_for_inode(mapping->host), wbc);
+
        return generic_writepages(mapping, wbc);
 }
 
index f6ffc9ae5cebeae7ebf8c2cc7c3598c07c4c68f5..a4343c63fb38c60336abbaf8741a75c2a9298a4d 100644 (file)
@@ -62,5 +62,6 @@ int   xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
                                 struct buffer_head *map_bh, int create);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
+extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 
 #endif /* __XFS_AOPS_H__ */
index 45ec9e40150c3dc44bc2268c274472f108ad188b..6c876012b2e53246bf38ca3af8dd2d23861151fe 100644 (file)
@@ -75,7 +75,8 @@ xfs_zero_extent(
        ssize_t         size = XFS_FSB_TO_B(mp, count_fsb);
 
        if (IS_DAX(VFS_I(ip)))
-               return dax_clear_blocks(VFS_I(ip), block, size);
+               return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
+                               sector, size);
 
        /*
         * let the block layer decide on the fastest method of
index daed4bfb85b2f2d56a3e82fa5ba4156f9f41011d..435c7de42e5f322a82845382ad7e1fa54dfe3d0b 100644 (file)
@@ -1527,6 +1527,16 @@ xfs_wait_buftarg(
        LIST_HEAD(dispose);
        int loop = 0;
 
+       /*
+        * We need to flush the buffer workqueue to ensure that all IO
+        * completion processing is 100% done. Just waiting on buffer locks is
+        * not sufficient for async IO as the reference count held over IO is
+        * not released until after the buffer lock is dropped. Hence we need to
+        * ensure here that all reference counts have been dropped before we
+        * start walking the LRU list.
+        */
+       drain_workqueue(btp->bt_mount->m_buf_workqueue);
+
        /* loop until there is nothing left on the lru list. */
        while (list_lru_count(&btp->bt_lru)) {
                list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
index ebe9b8290a705c402759c985c015b0bbc5f6a151..52883ac3cf84c06761afcf0792bbafcf781d509b 100644 (file)
@@ -55,7 +55,7 @@ xfs_rw_ilock(
        int                     type)
 {
        if (type & XFS_IOLOCK_EXCL)
-               mutex_lock(&VFS_I(ip)->i_mutex);
+               inode_lock(VFS_I(ip));
        xfs_ilock(ip, type);
 }
 
@@ -66,7 +66,7 @@ xfs_rw_iunlock(
 {
        xfs_iunlock(ip, type);
        if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
+               inode_unlock(VFS_I(ip));
 }
 
 static inline void
@@ -76,7 +76,7 @@ xfs_rw_ilock_demote(
 {
        xfs_ilock_demote(ip, type);
        if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
+               inode_unlock(VFS_I(ip));
 }
 
 /*
@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
 /*
  * pfn_mkwrite was originally inteneded to ensure we capture time stamp
  * updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
  */
 static int
 xfs_filemap_pfn_mkwrite(
@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
+       else if (IS_DAX(inode))
+               ret = dax_pfn_mkwrite(vma, vmf);
        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
        sb_end_pagefault(inode->i_sb);
        return ret;
index ae3758a90ed63ba6b714ccbdafdf007207a8be07..ceba1a83cacccd649caf473ebcf2dfae984bb243 100644 (file)
@@ -610,60 +610,69 @@ __xfs_iflock(
 
 STATIC uint
 _xfs_dic2xflags(
-       __uint16_t              di_flags)
+       __uint16_t              di_flags,
+       uint64_t                di_flags2,
+       bool                    has_attr)
 {
        uint                    flags = 0;
 
        if (di_flags & XFS_DIFLAG_ANY) {
                if (di_flags & XFS_DIFLAG_REALTIME)
-                       flags |= XFS_XFLAG_REALTIME;
+                       flags |= FS_XFLAG_REALTIME;
                if (di_flags & XFS_DIFLAG_PREALLOC)
-                       flags |= XFS_XFLAG_PREALLOC;
+                       flags |= FS_XFLAG_PREALLOC;
                if (di_flags & XFS_DIFLAG_IMMUTABLE)
-                       flags |= XFS_XFLAG_IMMUTABLE;
+                       flags |= FS_XFLAG_IMMUTABLE;
                if (di_flags & XFS_DIFLAG_APPEND)
-                       flags |= XFS_XFLAG_APPEND;
+                       flags |= FS_XFLAG_APPEND;
                if (di_flags & XFS_DIFLAG_SYNC)
-                       flags |= XFS_XFLAG_SYNC;
+                       flags |= FS_XFLAG_SYNC;
                if (di_flags & XFS_DIFLAG_NOATIME)
-                       flags |= XFS_XFLAG_NOATIME;
+                       flags |= FS_XFLAG_NOATIME;
                if (di_flags & XFS_DIFLAG_NODUMP)
-                       flags |= XFS_XFLAG_NODUMP;
+                       flags |= FS_XFLAG_NODUMP;
                if (di_flags & XFS_DIFLAG_RTINHERIT)
-                       flags |= XFS_XFLAG_RTINHERIT;
+                       flags |= FS_XFLAG_RTINHERIT;
                if (di_flags & XFS_DIFLAG_PROJINHERIT)
-                       flags |= XFS_XFLAG_PROJINHERIT;
+                       flags |= FS_XFLAG_PROJINHERIT;
                if (di_flags & XFS_DIFLAG_NOSYMLINKS)
-                       flags |= XFS_XFLAG_NOSYMLINKS;
+                       flags |= FS_XFLAG_NOSYMLINKS;
                if (di_flags & XFS_DIFLAG_EXTSIZE)
-                       flags |= XFS_XFLAG_EXTSIZE;
+                       flags |= FS_XFLAG_EXTSIZE;
                if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
-                       flags |= XFS_XFLAG_EXTSZINHERIT;
+                       flags |= FS_XFLAG_EXTSZINHERIT;
                if (di_flags & XFS_DIFLAG_NODEFRAG)
-                       flags |= XFS_XFLAG_NODEFRAG;
+                       flags |= FS_XFLAG_NODEFRAG;
                if (di_flags & XFS_DIFLAG_FILESTREAM)
-                       flags |= XFS_XFLAG_FILESTREAM;
+                       flags |= FS_XFLAG_FILESTREAM;
        }
 
+       if (di_flags2 & XFS_DIFLAG2_ANY) {
+               if (di_flags2 & XFS_DIFLAG2_DAX)
+                       flags |= FS_XFLAG_DAX;
+       }
+
+       if (has_attr)
+               flags |= FS_XFLAG_HASATTR;
+
        return flags;
 }
 
 uint
 xfs_ip2xflags(
-       xfs_inode_t             *ip)
+       struct xfs_inode        *ip)
 {
-       xfs_icdinode_t          *dic = &ip->i_d;
+       struct xfs_icdinode     *dic = &ip->i_d;
 
-       return _xfs_dic2xflags(dic->di_flags) |
-                               (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+       return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
 }
 
 uint
 xfs_dic2xflags(
-       xfs_dinode_t            *dip)
+       struct xfs_dinode       *dip)
 {
-       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
-                               (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
+                               be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
 }
 
 /*
@@ -862,7 +871,8 @@ xfs_ialloc(
        case S_IFREG:
        case S_IFDIR:
                if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
-                       uint    di_flags = 0;
+                       uint64_t        di_flags2 = 0;
+                       uint            di_flags = 0;
 
                        if (S_ISDIR(mode)) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
@@ -898,7 +908,11 @@ xfs_ialloc(
                                di_flags |= XFS_DIFLAG_NODEFRAG;
                        if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
                                di_flags |= XFS_DIFLAG_FILESTREAM;
+                       if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+                               di_flags2 |= XFS_DIFLAG2_DAX;
+
                        ip->i_d.di_flags |= di_flags;
+                       ip->i_d.di_flags2 |= di_flags2;
                }
                /* FALLTHROUGH */
        case S_IFLNK:
index d42738deec6de6128b1a4b0784c45b5447848d02..478d04e07f9500d6ceed20231deb6b474161c708 100644 (file)
@@ -859,25 +859,25 @@ xfs_merge_ioc_xflags(
        unsigned int    xflags = start;
 
        if (flags & FS_IMMUTABLE_FL)
-               xflags |= XFS_XFLAG_IMMUTABLE;
+               xflags |= FS_XFLAG_IMMUTABLE;
        else
-               xflags &= ~XFS_XFLAG_IMMUTABLE;
+               xflags &= ~FS_XFLAG_IMMUTABLE;
        if (flags & FS_APPEND_FL)
-               xflags |= XFS_XFLAG_APPEND;
+               xflags |= FS_XFLAG_APPEND;
        else
-               xflags &= ~XFS_XFLAG_APPEND;
+               xflags &= ~FS_XFLAG_APPEND;
        if (flags & FS_SYNC_FL)
-               xflags |= XFS_XFLAG_SYNC;
+               xflags |= FS_XFLAG_SYNC;
        else
-               xflags &= ~XFS_XFLAG_SYNC;
+               xflags &= ~FS_XFLAG_SYNC;
        if (flags & FS_NOATIME_FL)
-               xflags |= XFS_XFLAG_NOATIME;
+               xflags |= FS_XFLAG_NOATIME;
        else
-               xflags &= ~XFS_XFLAG_NOATIME;
+               xflags &= ~FS_XFLAG_NOATIME;
        if (flags & FS_NODUMP_FL)
-               xflags |= XFS_XFLAG_NODUMP;
+               xflags |= FS_XFLAG_NODUMP;
        else
-               xflags &= ~XFS_XFLAG_NODUMP;
+               xflags &= ~FS_XFLAG_NODUMP;
 
        return xflags;
 }
@@ -945,40 +945,51 @@ xfs_set_diflags(
        unsigned int            xflags)
 {
        unsigned int            di_flags;
+       uint64_t                di_flags2;
 
        /* can't set PREALLOC this way, just preserve it */
        di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-       if (xflags & XFS_XFLAG_IMMUTABLE)
+       if (xflags & FS_XFLAG_IMMUTABLE)
                di_flags |= XFS_DIFLAG_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
+       if (xflags & FS_XFLAG_APPEND)
                di_flags |= XFS_DIFLAG_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
+       if (xflags & FS_XFLAG_SYNC)
                di_flags |= XFS_DIFLAG_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
+       if (xflags & FS_XFLAG_NOATIME)
                di_flags |= XFS_DIFLAG_NOATIME;
-       if (xflags & XFS_XFLAG_NODUMP)
+       if (xflags & FS_XFLAG_NODUMP)
                di_flags |= XFS_DIFLAG_NODUMP;
-       if (xflags & XFS_XFLAG_NODEFRAG)
+       if (xflags & FS_XFLAG_NODEFRAG)
                di_flags |= XFS_DIFLAG_NODEFRAG;
-       if (xflags & XFS_XFLAG_FILESTREAM)
+       if (xflags & FS_XFLAG_FILESTREAM)
                di_flags |= XFS_DIFLAG_FILESTREAM;
        if (S_ISDIR(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_RTINHERIT)
+               if (xflags & FS_XFLAG_RTINHERIT)
                        di_flags |= XFS_DIFLAG_RTINHERIT;
-               if (xflags & XFS_XFLAG_NOSYMLINKS)
+               if (xflags & FS_XFLAG_NOSYMLINKS)
                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
-               if (xflags & XFS_XFLAG_EXTSZINHERIT)
+               if (xflags & FS_XFLAG_EXTSZINHERIT)
                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-               if (xflags & XFS_XFLAG_PROJINHERIT)
+               if (xflags & FS_XFLAG_PROJINHERIT)
                        di_flags |= XFS_DIFLAG_PROJINHERIT;
        } else if (S_ISREG(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_REALTIME)
+               if (xflags & FS_XFLAG_REALTIME)
                        di_flags |= XFS_DIFLAG_REALTIME;
-               if (xflags & XFS_XFLAG_EXTSIZE)
+               if (xflags & FS_XFLAG_EXTSIZE)
                        di_flags |= XFS_DIFLAG_EXTSIZE;
        }
-
        ip->i_d.di_flags = di_flags;
+
+       /* diflags2 only valid for v3 inodes. */
+       if (ip->i_d.di_version < 3)
+               return;
+
+       di_flags2 = 0;
+       if (xflags & FS_XFLAG_DAX)
+               di_flags2 |= XFS_DIFLAG2_DAX;
+
+       ip->i_d.di_flags2 = di_flags2;
+
 }
 
 STATIC void
@@ -988,22 +999,27 @@ xfs_diflags_to_linux(
        struct inode            *inode = VFS_I(ip);
        unsigned int            xflags = xfs_ip2xflags(ip);
 
-       if (xflags & XFS_XFLAG_IMMUTABLE)
+       if (xflags & FS_XFLAG_IMMUTABLE)
                inode->i_flags |= S_IMMUTABLE;
        else
                inode->i_flags &= ~S_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
+       if (xflags & FS_XFLAG_APPEND)
                inode->i_flags |= S_APPEND;
        else
                inode->i_flags &= ~S_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
+       if (xflags & FS_XFLAG_SYNC)
                inode->i_flags |= S_SYNC;
        else
                inode->i_flags &= ~S_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
+       if (xflags & FS_XFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
        else
                inode->i_flags &= ~S_NOATIME;
+       if (xflags & FS_XFLAG_DAX)
+               inode->i_flags |= S_DAX;
+       else
+               inode->i_flags &= ~S_DAX;
+
 }
 
 static int
@@ -1016,11 +1032,11 @@ xfs_ioctl_setattr_xflags(
 
        /* Can't change realtime flag if any extents are allocated. */
        if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
+           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
                return -EINVAL;
 
        /* If realtime flag is set then must have realtime device */
-       if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
+       if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
                if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
                    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
                        return -EINVAL;
@@ -1031,7 +1047,7 @@ xfs_ioctl_setattr_xflags(
         * we have appropriate permission.
         */
        if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
-            (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+            (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
            !capable(CAP_LINUX_IMMUTABLE))
                return -EPERM;
 
@@ -1095,8 +1111,8 @@ out_cancel:
  * extent size hint validation is somewhat cumbersome. Rules are:
  *
  * 1. extent size hint is only valid for directories and regular files
- * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
- * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
  * 4. can only be changed on regular files if no extents are allocated
  * 5. can be changed on directories at any time
  * 6. extsize hint of 0 turns off hints, clears inode flags.
@@ -1112,10 +1128,10 @@ xfs_ioctl_setattr_check_extsize(
 {
        struct xfs_mount        *mp = ip->i_mount;
 
-       if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
                return -EINVAL;
 
-       if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
            !S_ISDIR(ip->i_d.di_mode))
                return -EINVAL;
 
@@ -1132,7 +1148,7 @@ xfs_ioctl_setattr_check_extsize(
                        return -EINVAL;
 
                if (XFS_IS_REALTIME_INODE(ip) ||
-                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                   (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
                        size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
                } else {
                        size = mp->m_sb.sb_blocksize;
@@ -1143,7 +1159,7 @@ xfs_ioctl_setattr_check_extsize(
                if (fa->fsx_extsize % size)
                        return -EINVAL;
        } else
-               fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
+               fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
 
        return 0;
 }
@@ -1168,7 +1184,7 @@ xfs_ioctl_setattr_check_projid(
 
        if (xfs_get_projid(ip) != fa->fsx_projid)
                return -EINVAL;
-       if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
+       if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
            (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
                return -EINVAL;
 
index 06eafafe636e20f90e8f696ee20c0bb01b64ea8f..76b71a1c6c323e2043aeab1e93fb5a66db9f39d0 100644 (file)
@@ -1205,8 +1205,8 @@ xfs_diflags_to_iflags(
                inode->i_flags |= S_SYNC;
        if (flags & XFS_DIFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
-       /* XXX: Also needs an on-disk per inode flag! */
-       if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+       if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+           ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
                inode->i_flags |= S_DAX;
 }
 
index da37beb76f6e67faf90a658d55f49a06c6fc4152..594f7e63b432427fd5b6448afa0a75eb0b71d558 100644 (file)
@@ -4491,7 +4491,7 @@ xlog_recover_process(
         * know precisely what failed.
         */
        if (pass == XLOG_RECOVER_CRCPASS) {
-               if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+               if (rhead->h_crc && crc != rhead->h_crc)
                        return -EFSBADCRC;
                return 0;
        }
@@ -4502,7 +4502,7 @@ xlog_recover_process(
         * zero CRC check prevents warnings from being emitted when upgrading
         * the kernel from one that does not add CRCs by default.
         */
-       if (crc != le32_to_cpu(rhead->h_crc)) {
+       if (crc != rhead->h_crc) {
                if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
                        xfs_alert(log->l_mp,
                "log record CRC mismatch: found 0x%x, expected 0x%x.",
index dc6221942b85f437767decfca9bf7ca9b0a94026..ade236e90bb3612d429a8b6b0909b3937302096c 100644 (file)
@@ -42,11 +42,11 @@ xfs_break_layouts(
        while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
                xfs_iunlock(ip, *iolock);
                if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                error = break_layout(inode, true);
                *iolock = XFS_IOLOCK_EXCL;
                if (with_imutex)
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                xfs_ilock(ip, *iolock);
        }
 
index aa67339b953722b4e5c73442ac14835f0f9c4e32..4f18fd92ca13b21d8fd68e955e082d9db9a61195 100644 (file)
@@ -497,7 +497,6 @@ xfsaild(
        long            tout = 0;       /* milliseconds */
 
        current->flags |= PF_MEMALLOC;
-       set_freezable();
 
        while (!kthread_should_stop()) {
                if (tout && tout <= 20)
index 717a29810473894aaf2f22e6e487f84087628df5..dad8af3ebeb5405c144db72cea488d7f5ea5fd39 100644 (file)
@@ -133,6 +133,5 @@ extern int acpi_get_psd_map(struct cpudata **);
 /* Methods to interact with the PCC mailbox controller. */
 extern struct mbox_chan *
        pcc_mbox_request_channel(struct mbox_client *, unsigned int);
-extern int mbox_send_message(struct mbox_chan *chan, void *mssg);
 
 #endif /* _CPPC_ACPI_H*/
index 0419485891f2a8eb6125f154f52d7b16c5fb05b4..0f1c6f315cdc5294ca63bac49efbf69a5dfee805 100644 (file)
@@ -75,7 +75,7 @@ typedef u64 __nocast cputime64_t;
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 {
-       u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+       u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
        return (__force cputime_t) ret;
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
@@ -91,7 +91,8 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
  */
 static inline cputime_t timeval_to_cputime(const struct timeval *val)
 {
-       u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+       u64 ret = (u64)val->tv_sec * NSEC_PER_SEC +
+                       val->tv_usec * NSEC_PER_USEC;
        return (__force cputime_t) ret;
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
index 0b3c0d39ef753053bb26c1b9fb4979e706240a58..c370b261c72004dcafa3dd036920b7b5fe3d01a2 100644 (file)
@@ -239,6 +239,14 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                            pmd_t *pmdp);
 #endif
 
+#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+                                          unsigned long address, pmd_t *pmdp)
+{
+
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTE_SAME
 static inline int pte_same(pte_t pte_a, pte_t pte_b)
 {
index 3d69c93d50e83d7b804f9de7260007ffc9903d69..6361892ea737137899c5d9489634446cb3bdedac 100644 (file)
@@ -204,6 +204,7 @@ struct crypto_ahash {
                      unsigned int keylen);
 
        unsigned int reqsize;
+       bool has_setkey;
        struct crypto_tfm base;
 };
 
@@ -375,6 +376,11 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
                        unsigned int keylen);
 
+static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm)
+{
+       return tfm->has_setkey;
+}
+
 /**
  * crypto_ahash_finup() - update and finalize message digest
  * @req: reference to the ahash_request handle that holds all information
index 018afb264ac261ea5dea2f3afa547cf9a8f7a202..a2bfd7843f18f6e79d8bc7e5743b031345153053 100644 (file)
@@ -30,6 +30,9 @@ struct alg_sock {
 
        struct sock *parent;
 
+       unsigned int refcnt;
+       unsigned int nokey_refcnt;
+
        const struct af_alg_type *type;
        void *private;
 };
@@ -50,9 +53,11 @@ struct af_alg_type {
        void (*release)(void *private);
        int (*setkey)(void *private, const u8 *key, unsigned int keylen);
        int (*accept)(void *private, struct sock *sk);
+       int (*accept_nokey)(void *private, struct sock *sk);
        int (*setauthsize)(void *private, unsigned int authsize);
 
        struct proto_ops *ops;
+       struct proto_ops *ops_nokey;
        struct module *owner;
        char name[14];
 };
@@ -67,6 +72,7 @@ int af_alg_register_type(const struct af_alg_type *type);
 int af_alg_unregister_type(const struct af_alg_type *type);
 
 int af_alg_release(struct socket *sock);
+void af_alg_release_parent(struct sock *sk);
 int af_alg_accept(struct sock *sk, struct socket *newsock);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
@@ -83,11 +89,6 @@ static inline struct alg_sock *alg_sk(struct sock *sk)
        return (struct alg_sock *)sk;
 }
 
-static inline void af_alg_release_parent(struct sock *sk)
-{
-       sock_put(alg_sk(sk)->parent);
-}
-
 static inline void af_alg_init_completion(struct af_alg_completion *completion)
 {
        init_completion(&completion->completion);
index d8dd41fb034fe5af52b68699096abb6ebc5559d5..fd8742a40ff3e93476ec3b8cdb1370200275b6d3 100644 (file)
@@ -61,6 +61,8 @@ struct crypto_skcipher {
        unsigned int ivsize;
        unsigned int reqsize;
 
+       bool has_setkey;
+
        struct crypto_tfm base;
 };
 
@@ -305,6 +307,11 @@ static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
        return tfm->setkey(tfm, key, keylen);
 }
 
+static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm)
+{
+       return tfm->has_setkey;
+}
+
 /**
  * crypto_skcipher_reqtfm() - obtain cipher handle from request
  * @req: skcipher_request out of which the cipher handle is to be obtained
index 89d008dc08e2e2c6caa1d1d5eeb1ac3bdedd2474..fe5efada9d68237c3863ad8f9bb972da6f610819 100644 (file)
@@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
                             struct drm_atomic_state *state,
                             bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                                        struct drm_atomic_state *old_state);
 
index 7bfb063029d83fe1374e78a174f4a638bc39bc49..461a0558bca4d8d16e81b88e0286e3fa6251ab79 100644 (file)
 
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 
+static inline bool drm_arch_can_wc_memory(void)
+{
+#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
+       return false;
+#else
+       return true;
+#endif
+}
+
 #endif
index c65a212db77e7a99b26735c262cb31a9b2504195..c5b4b81a831ba733bee1dc8aba8c9a3bb7341739 100644 (file)
@@ -1166,6 +1166,7 @@ struct drm_connector {
        struct drm_mode_object base;
 
        char *name;
+       int connector_id;
        int connector_type;
        int connector_type_id;
        bool interlace_allowed;
@@ -2047,6 +2048,7 @@ struct drm_mode_config {
        struct list_head fb_list;
 
        int num_connector;
+       struct ida connector_ida;
        struct list_head connector_list;
        int num_encoder;
        struct list_head encoder_list;
@@ -2200,7 +2202,11 @@ int drm_connector_register(struct drm_connector *connector);
 void drm_connector_unregister(struct drm_connector *connector);
 
 extern void drm_connector_cleanup(struct drm_connector *connector);
-extern unsigned int drm_connector_index(struct drm_connector *connector);
+static inline unsigned drm_connector_index(struct drm_connector *connector)
+{
+       return connector->connector_id;
+}
+
 /* helper to unplug all connectors from sysfs for device */
 extern void drm_connector_unplug_all(struct drm_device *dev);
 
index 24ab1787b771936fdf1894aabd6271f65a771993..fdb47051d5492a4db126cc525667bd181343d2cd 100644 (file)
@@ -44,8 +44,6 @@ struct drm_dp_vcpi {
 /**
  * struct drm_dp_mst_port - MST port
  * @kref: reference count for this port.
- * @guid_valid: for DP 1.2 devices if we have validated the GUID.
- * @guid: guid for DP 1.2 device on this port.
  * @port_num: port number
  * @input: if this port is an input port.
  * @mcs: message capability status - DP 1.2 spec.
@@ -70,10 +68,6 @@ struct drm_dp_vcpi {
 struct drm_dp_mst_port {
        struct kref kref;
 
-       /* if dpcd 1.2 device is on this port - its GUID info */
-       bool guid_valid;
-       u8 guid[16];
-
        u8 port_num;
        bool input;
        bool mcs;
@@ -110,10 +104,12 @@ struct drm_dp_mst_port {
  * @tx_slots: transmission slots for this device.
  * @last_seqno: last sequence number used to talk to this.
  * @link_address_sent: if a link address message has been sent to this device yet.
+ * @guid: guid for DP 1.2 branch device. port under this branch can be
+ * identified by port #.
  *
  * This structure represents an MST branch device, there is one
- * primary branch device at the root, along with any others connected
- * to downstream ports
+ * primary branch device at the root, along with any other branches connected
+ * to downstream port of parent branches.
  */
 struct drm_dp_mst_branch {
        struct kref kref;
@@ -132,6 +128,9 @@ struct drm_dp_mst_branch {
        struct drm_dp_sideband_msg_tx *tx_slots[2];
        int last_seqno;
        bool link_address_sent;
+
+       /* global unique identifier to identify branch devices */
+       u8 guid[16];
 };
 
 
@@ -406,11 +405,9 @@ struct drm_dp_payload {
  * @conn_base_id: DRM connector ID this mgr is connected to.
  * @down_rep_recv: msg receiver state for down replies.
  * @up_req_recv: msg receiver state for up requests.
- * @lock: protects mst state, primary, guid, dpcd.
+ * @lock: protects mst state, primary, dpcd.
  * @mst_state: if this manager is enabled for an MST capable port.
  * @mst_primary: pointer to the primary branch device.
- * @guid_valid: GUID valid for the primary branch device.
- * @guid: GUID for primary port.
  * @dpcd: cache of DPCD for primary port.
  * @pbn_div: PBN to slots divisor.
  *
@@ -432,13 +429,11 @@ struct drm_dp_mst_topology_mgr {
        struct drm_dp_sideband_msg_rx up_req_recv;
 
        /* pointer to info about the initial MST device */
-       struct mutex lock; /* protects mst_state + primary + guid + dpcd */
+       struct mutex lock; /* protects mst_state + primary + dpcd */
 
        bool mst_state;
        struct drm_dp_mst_branch *mst_primary;
-       /* primary MST device GUID */
-       bool guid_valid;
-       u8 guid[16];
+
        u8 dpcd[DP_RECEIVER_CAP_SIZE];
        u8 sink_count;
        int pbn_div;
index d639049a613dfdb264650f8ee938836e6ed28e10..553210c02ee0f655fd28b8ae64d370e7cee2fc70 100644 (file)
@@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B)
 #define DRM_FIXED_ONE          (1ULL << DRM_FIXED_POINT)
 #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1)
 #define DRM_FIXED_DIGITS_MASK  (~DRM_FIXED_DECIMAL_MASK)
+#define DRM_FIXED_EPSILON      1LL
+#define DRM_FIXED_ALMOST_ONE   (DRM_FIXED_ONE - DRM_FIXED_EPSILON)
 
 static inline s64 drm_int2fixp(int a)
 {
        return ((s64)a) << DRM_FIXED_POINT;
 }
 
-static inline int drm_fixp2int(int64_t a)
+static inline int drm_fixp2int(s64 a)
 {
        return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline unsigned drm_fixp_msbset(int64_t a)
+static inline int drm_fixp2int_ceil(s64 a)
+{
+       if (a > 0)
+               return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE);
+       else
+               return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
+}
+
+static inline unsigned drm_fixp_msbset(s64 a)
 {
        unsigned shift, sign = (a >> 63) & 1;
 
@@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b)
        return result;
 }
 
+static inline s64 drm_fixp_from_fraction(s64 a, s64 b)
+{
+       s64 res;
+       bool a_neg = a < 0;
+       bool b_neg = b < 0;
+       u64 a_abs = a_neg ? -a : a;
+       u64 b_abs = b_neg ? -b : b;
+       u64 rem;
+
+       /* determine integer part */
+       u64 res_abs  = div64_u64_rem(a_abs, b_abs, &rem);
+
+       /* determine fractional part */
+       {
+               u32 i = DRM_FIXED_POINT;
+
+               do {
+                       rem <<= 1;
+                       res_abs <<= 1;
+                       if (rem >= b_abs) {
+                               res_abs |= 1;
+                               rem -= b_abs;
+                       }
+               } while (--i != 0);
+       }
+
+       /* round up LSB */
+       {
+               u64 summand = (rem << 1) >= b_abs;
+
+               res_abs += summand;
+       }
+
+       res = (s64) res_abs;
+       if (a_neg ^ b_neg)
+               res = -res;
+       return res;
+}
+
 static inline s64 drm_fixp_exp(s64 x)
 {
        s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000);
index 6f45aea49e4ff049a51394f6782a803e32c389eb..0a05b0d36ae74d6c4e4a7e810e442ab2ff2b09bf 100644 (file)
 /* 104 */
 /* 105 */
 #define TEGRA210_CLK_D_AUDIO 106
-/* 107 ( affects abp -> ape) */
+#define TEGRA210_CLK_APB2APE 107
 /* 108 */
 /* 109 */
 /* 110 */
index 744b997d6a94ae01852c190d195f89fa3c4124ae..164049357e5cee3a54d653d883f8639fc8e85243 100644 (file)
@@ -7,6 +7,7 @@
 #ifndef _AER_H_
 #define _AER_H_
 
+#include <linux/errno.h>
 #include <linux/types.h>
 
 #define AER_NONFATAL                   0
diff --git a/include/linux/bcm963xx_nvram.h b/include/linux/bcm963xx_nvram.h
new file mode 100644 (file)
index 0000000..290c231
--- /dev/null
@@ -0,0 +1,112 @@
+#ifndef __LINUX_BCM963XX_NVRAM_H__
+#define __LINUX_BCM963XX_NVRAM_H__
+
+#include <linux/crc32.h>
+#include <linux/if_ether.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/*
+ * Broadcom BCM963xx SoC board nvram data structure.
+ *
+ * The nvram structure varies in size depending on the SoC board version. Use
+ * the appropriate minimum BCM963XX_NVRAM_*_SIZE define for the information
+ * you need instead of sizeof(struct bcm963xx_nvram) as this may change.
+ */
+
+#define BCM963XX_NVRAM_V4_SIZE         300
+#define BCM963XX_NVRAM_V5_SIZE         (1 * SZ_1K)
+
+#define BCM963XX_DEFAULT_PSI_SIZE      64
+
+enum bcm963xx_nvram_nand_part {
+       BCM963XX_NVRAM_NAND_PART_BOOT = 0,
+       BCM963XX_NVRAM_NAND_PART_ROOTFS_1,
+       BCM963XX_NVRAM_NAND_PART_ROOTFS_2,
+       BCM963XX_NVRAM_NAND_PART_DATA,
+       BCM963XX_NVRAM_NAND_PART_BBT,
+
+       __BCM963XX_NVRAM_NAND_NR_PARTS
+};
+
+struct bcm963xx_nvram {
+       u32     version;
+       char    bootline[256];
+       char    name[16];
+       u32     main_tp_number;
+       u32     psi_size;
+       u32     mac_addr_count;
+       u8      mac_addr_base[ETH_ALEN];
+       u8      __reserved1[2];
+       u32     checksum_v4;
+
+       u8      __reserved2[292];
+       u32     nand_part_offset[__BCM963XX_NVRAM_NAND_NR_PARTS];
+       u32     nand_part_size[__BCM963XX_NVRAM_NAND_NR_PARTS];
+       u8      __reserved3[388];
+       u32     checksum_v5;
+};
+
+#define BCM963XX_NVRAM_NAND_PART_OFFSET(nvram, part) \
+       bcm963xx_nvram_nand_part_offset(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_offset(
+       const struct bcm963xx_nvram *nvram,
+       enum bcm963xx_nvram_nand_part part)
+{
+       return nvram->nand_part_offset[part] * SZ_1K;
+}
+
+#define BCM963XX_NVRAM_NAND_PART_SIZE(nvram, part) \
+       bcm963xx_nvram_nand_part_size(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_size(
+       const struct bcm963xx_nvram *nvram,
+       enum bcm963xx_nvram_nand_part part)
+{
+       return nvram->nand_part_size[part] * SZ_1K;
+}
+
+/*
+ * bcm963xx_nvram_checksum - Verify nvram checksum
+ *
+ * @nvram: pointer to full size nvram data structure
+ * @expected_out: optional pointer to store expected checksum value
+ * @actual_out: optional pointer to store actual checksum value
+ *
+ * Return: 0 if the checksum is valid, otherwise -EINVAL
+ */
+static int __maybe_unused bcm963xx_nvram_checksum(
+       const struct bcm963xx_nvram *nvram,
+       u32 *expected_out, u32 *actual_out)
+{
+       u32 expected, actual;
+       size_t len;
+
+       if (nvram->version <= 4) {
+               expected = nvram->checksum_v4;
+               len = BCM963XX_NVRAM_V4_SIZE - sizeof(u32);
+       } else {
+               expected = nvram->checksum_v5;
+               len = BCM963XX_NVRAM_V5_SIZE - sizeof(u32);
+       }
+
+       /*
+        * Calculate the CRC32 value for the nvram with a checksum value
+        * of 0 without modifying or copying the nvram by combining:
+        * - The CRC32 of the nvram without the checksum value
+        * - The CRC32 of a zero checksum value (which is also 0)
+        */
+       actual = crc32_le_combine(
+               crc32_le(~0, (u8 *)nvram, len), 0, sizeof(u32));
+
+       if (expected_out)
+               *expected_out = expected;
+
+       if (actual_out)
+               *actual_out = actual;
+
+       return expected == actual ? 0 : -EINVAL;
+};
+
+#endif /* __LINUX_BCM963XX_NVRAM_H__ */
diff --git a/include/linux/bcm963xx_tag.h b/include/linux/bcm963xx_tag.h
new file mode 100644 (file)
index 0000000..161c7b3
--- /dev/null
@@ -0,0 +1,102 @@
+#ifndef __LINUX_BCM963XX_TAG_H__
+#define __LINUX_BCM963XX_TAG_H__
+
+#include <linux/types.h>
+
+#define TAGVER_LEN             4       /* Length of Tag Version */
+#define TAGLAYOUT_LEN          4       /* Length of FlashLayoutVer */
+#define SIG1_LEN               20      /* Company Signature 1 Length */
+#define SIG2_LEN               14      /* Company Signature 2 Length */
+#define BOARDID_LEN            16      /* Length of BoardId */
+#define ENDIANFLAG_LEN         2       /* Endian Flag Length */
+#define CHIPID_LEN             6       /* Chip Id Length */
+#define IMAGE_LEN              10      /* Length of Length Field */
+#define ADDRESS_LEN            12      /* Length of Address field */
+#define IMAGE_SEQUENCE_LEN     4       /* Image sequence Length */
+#define RSASIG_LEN             20      /* Length of RSA Signature in tag */
+#define TAGINFO1_LEN           30      /* Length of vendor information field1 in tag */
+#define FLASHLAYOUTVER_LEN     4       /* Length of Flash Layout Version String tag */
+#define TAGINFO2_LEN           16      /* Length of vendor information field2 in tag */
+#define ALTTAGINFO_LEN         54      /* Alternate length for vendor information; Pirelli */
+
+#define NUM_PIRELLI            2
+#define IMAGETAG_CRC_START     0xFFFFFFFF
+
+#define PIRELLI_BOARDS { \
+       "AGPF-S0", \
+       "DWV-S0", \
+}
+
+/* Extended flash address, needs to be subtracted
+ * from bcm_tag flash image offsets.
+ */
+#define BCM963XX_EXTENDED_SIZE 0xBFC00000
+
+/*
+ * The broadcom firmware assumes the rootfs starts the image,
+ * therefore uses the rootfs start (flash_image_address)
+ * to determine where to flash the image.  Since we have the kernel first
+ * we have to give it the kernel address, but the crc uses the length
+ * associated with this address (root_length), which is added to the kernel
+ * length (kernel_length) to determine the length of image to flash and thus
+ * needs to be rootfs + deadcode (jffs2 EOF marker)
+*/
+
+struct bcm_tag {
+       /* 0-3: Version of the image tag */
+       char tag_version[TAGVER_LEN];
+       /* 4-23: Company Line 1 */
+       char sig_1[SIG1_LEN];
+       /*  24-37: Company Line 2 */
+       char sig_2[SIG2_LEN];
+       /* 38-43: Chip this image is for */
+       char chip_id[CHIPID_LEN];
+       /* 44-59: Board name */
+       char board_id[BOARDID_LEN];
+       /* 60-61: Map endianness -- 1 BE 0 LE */
+       char big_endian[ENDIANFLAG_LEN];
+       /* 62-71: Total length of image */
+       char total_length[IMAGE_LEN];
+       /* 72-83: Address in memory of CFE */
+       char cfe__address[ADDRESS_LEN];
+       /* 84-93: Size of CFE */
+       char cfe_length[IMAGE_LEN];
+       /* 94-105: Address in memory of image start
+        * (kernel for OpenWRT, rootfs for stock firmware)
+        */
+       char flash_image_start[ADDRESS_LEN];
+       /* 106-115: Size of rootfs */
+       char root_length[IMAGE_LEN];
+       /* 116-127: Address in memory of kernel */
+       char kernel_address[ADDRESS_LEN];
+       /* 128-137: Size of kernel */
+       char kernel_length[IMAGE_LEN];
+       /* 138-141: Image sequence number
+        * (to be incremented when flashed with a new image)
+        */
+       char image_sequence[IMAGE_SEQUENCE_LEN];
+       /* 142-161: RSA Signature (not used; some vendors may use this) */
+       char rsa_signature[RSASIG_LEN];
+       /* 162-191: Compilation and related information (not used in OpenWrt) */
+       char information1[TAGINFO1_LEN];
+       /* 192-195: Version flash layout */
+       char flash_layout_ver[FLASHLAYOUTVER_LEN];
+       /* 196-199: kernel+rootfs CRC32 */
+       __u32 fskernel_crc;
+       /* 200-215: Unused except on Alice Gate where is is information */
+       char information2[TAGINFO2_LEN];
+       /* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
+       __u32 image_crc;
+       /* 220-223: CRC32 of rootfs partition */
+       __u32 rootfs_crc;
+       /* 224-227: CRC32 of kernel partition */
+       __u32 kernel_crc;
+       /* 228-235: Unused at present */
+       char reserved1[8];
+       /* 236-239: CRC32 of header excluding last 20 bytes */
+       __u32 header_crc;
+       /* 240-255: Unused at present */
+       char reserved2[16];
+};
+
+#endif /* __LINUX_BCM63XX_TAG_H__ */
index b9b6e046b52ea5b9f783b69274fbdd6ac8fe5d23..5349e6816cbb075e2b5d6ac57cac1e8fee373438 100644 (file)
@@ -318,16 +318,6 @@ enum bip_flags {
        BIP_IP_CHECKSUM         = 1 << 4, /* IP checksum */
 };
 
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
-{
-       if (bio->bi_rw & REQ_INTEGRITY)
-               return bio->bi_integrity;
-
-       return NULL;
-}
-
 /*
  * bio integrity payload
  */
@@ -349,6 +339,16 @@ struct bio_integrity_payload {
        struct bio_vec          bip_inline_vecs[0];/* embedded bvec array */
 };
 
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
+{
+       if (bio->bi_rw & REQ_INTEGRITY)
+               return bio->bi_integrity;
+
+       return NULL;
+}
+
 static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
 {
        struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -795,6 +795,18 @@ static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
        return false;
 }
 
+static inline void *bio_integrity_alloc(struct bio * bio, gfp_t gfp,
+                                                               unsigned int nr)
+{
+       return ERR_PTR(-EINVAL);
+}
+
+static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
+                                       unsigned int len, unsigned int offset)
+{
+       return 0;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 #endif /* CONFIG_BLOCK */
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
deleted file mode 100644 (file)
index 77ae77c..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef BLK_IOPOLL_H
-#define BLK_IOPOLL_H
-
-struct blk_iopoll;
-typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
-
-struct blk_iopoll {
-       struct list_head list;
-       unsigned long state;
-       unsigned long data;
-       int weight;
-       int max;
-       blk_iopoll_fn *poll;
-};
-
-enum {
-       IOPOLL_F_SCHED          = 0,
-       IOPOLL_F_DISABLE        = 1,
-};
-
-/*
- * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
- * that we were the first to acquire this iop for scheduling. If this iop
- * is currently disabled, return "failure".
- */
-static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
-{
-       if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
-               return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
-
-       return 1;
-}
-
-static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
-{
-       return test_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-
-extern void blk_iopoll_sched(struct blk_iopoll *);
-extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
-extern void blk_iopoll_complete(struct blk_iopoll *);
-extern void __blk_iopoll_complete(struct blk_iopoll *);
-extern void blk_iopoll_enable(struct blk_iopoll *);
-extern void blk_iopoll_disable(struct blk_iopoll *);
-
-#endif
index 0fb65843ec1e8522461dfe2d308c06e6fe33e2af..86a38ea1823f3307caec422941fcb15dcf7c4e25 100644 (file)
@@ -188,7 +188,6 @@ enum rq_flag_bits {
        __REQ_PM,               /* runtime pm request */
        __REQ_HASHED,           /* on IO scheduler merge hash */
        __REQ_MQ_INFLIGHT,      /* track inflight for MQ */
-       __REQ_NO_TIMEOUT,       /* requests may never expire */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -242,7 +241,6 @@ enum rq_flag_bits {
 #define REQ_PM                 (1ULL << __REQ_PM)
 #define REQ_HASHED             (1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT                (1ULL << __REQ_MQ_INFLIGHT)
-#define REQ_NO_TIMEOUT         (1ULL << __REQ_NO_TIMEOUT)
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE  -1U
index d372ea87ead573e4b4c7d5369ac649f844124334..4571ef1a12a968ea62a78b87371c83a07e89d8e9 100644 (file)
@@ -409,6 +409,7 @@ struct request_queue {
 
        unsigned int            rq_timeout;
        struct timer_list       timeout;
+       struct work_struct      timeout_work;
        struct list_head        timeout_list;
 
        struct list_head        icq_list;
@@ -681,9 +682,12 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 /*
  * q->prep_rq_fn return values
  */
-#define BLKPREP_OK             0       /* serve it */
-#define BLKPREP_KILL           1       /* fatal error, kill */
-#define BLKPREP_DEFER          2       /* leave on queue */
+enum {
+       BLKPREP_OK,             /* serve it */
+       BLKPREP_KILL,           /* fatal error, kill, return -EIO */
+       BLKPREP_DEFER,          /* leave on queue */
+       BLKPREP_INVALID,        /* invalid command, kill, return -EREMOTEIO */
+};
 
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 
index f89b31d45cc894814d409a19e161a29c2c41e832..c1ef6f14e7be7f1317e34a0deac61fcb80511eef 100644 (file)
 #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
 // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
 #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
+#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
+#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
+#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
+#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
+#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
+#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
+#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
+#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
+#define CEPH_FEATURE_CRUSH_TUNABLES5   (1ULL<<58) /* chooseleaf stable mode */
+// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -108,7 +120,9 @@ static inline u64 ceph_sanitize_features(u64 features)
         CEPH_FEATURE_CRUSH_TUNABLES3 |         \
         CEPH_FEATURE_OSD_PRIMARY_AFFINITY |    \
         CEPH_FEATURE_MSGR_KEEPALIVE2 |         \
-        CEPH_FEATURE_CRUSH_V4)
+        CEPH_FEATURE_CRUSH_V4 |                \
+        CEPH_FEATURE_CRUSH_TUNABLES5 |         \
+        CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
        (CEPH_FEATURE_NOSRCADDR |        \
index 5babb8e95352a6a7468feca70a343fafb56bddad..b827e066e55a198ec13f41d52b52964bd72edf4d 100644 (file)
@@ -40,46 +40,11 @@ static inline __u32 ceph_frag_mask_shift(__u32 f)
        return 24 - ceph_frag_bits(f);
 }
 
-static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+static inline bool ceph_frag_contains_value(__u32 f, __u32 v)
 {
        return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
 }
-static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
-{
-       /* is sub as specific as us, and contained by us? */
-       return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
-              (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
-}
 
-static inline __u32 ceph_frag_parent(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f) - 1,
-                        ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
-}
-static inline int ceph_frag_is_left_child(__u32 f)
-{
-       return ceph_frag_bits(f) > 0 &&
-               (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
-}
-static inline int ceph_frag_is_right_child(__u32 f)
-{
-       return ceph_frag_bits(f) > 0 &&
-               (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
-}
-static inline __u32 ceph_frag_sibling(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f),
-                     ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
-}
-static inline __u32 ceph_frag_left_child(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
-}
-static inline __u32 ceph_frag_right_child(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f)+1,
-             ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
-}
 static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
 {
        int newbits = ceph_frag_bits(f) + by;
index 71b1d6cdcb5d1fc53dd45fa3950e138cfac19dfe..8dbd7879fdc6b74719d9cb65b85d8c60f589bde6 100644 (file)
@@ -220,6 +220,7 @@ struct ceph_connection {
        struct ceph_entity_addr actual_peer_addr;
 
        /* message out temps */
+       struct ceph_msg_header out_hdr;
        struct ceph_msg *out_msg;        /* sending message (== tail of
                                            out_sent) */
        bool out_msg_done;
@@ -229,7 +230,6 @@ struct ceph_connection {
        int out_kvec_left;   /* kvec's left in out_kvec */
        int out_skip;        /* skip this many bytes */
        int out_kvec_bytes;  /* total bytes left */
-       bool out_kvec_is_msg; /* kvec refers to out_msg */
        int out_more;        /* there is more data after the kvecs */
        __le64 out_temp_ack; /* for writing an ack */
        struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
index 7f540f7f588d8c8461af975a5ebd21a08e6cf14b..789471dba6fb30f15c752fbca46ddbefe5bfe850 100644 (file)
@@ -127,6 +127,12 @@ struct cgroup_subsys_state {
         */
        u64 serial_nr;
 
+       /*
+        * Incremented by online self and children.  Used to guarantee that
+        * parents are not offlined before their children.
+        */
+       atomic_t online_cnt;
+
        /* percpu_ref killing and RCU release */
        struct rcu_head rcu_head;
        struct work_struct destroy_work;
index bda5ec0b4b4dc26a8431756e468241aad6bdd030..fccf7f44139dd67c2bae6db1799e2517bcd9cd00 100644 (file)
@@ -37,7 +37,7 @@ struct cleancache_ops {
        void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
@@ -48,14 +48,14 @@ extern void __cleancache_invalidate_fs(struct super_block *);
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-       return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
        return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+       return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@ static inline void cleancache_init_shared_fs(struct super_block *sb)
 
 static inline int cleancache_get_page(struct page *page)
 {
-       int ret = -1;
-
        if (cleancache_enabled && cleancache_fs_enabled(page))
-               ret = __cleancache_get_page(page);
-       return ret;
+               return __cleancache_get_page(page);
+       return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)
index 00b042c49ccdac7af3262a399d33dacc88c83e25..48f5aab117ae12625d041cd18555031e87c178fc 100644 (file)
@@ -144,7 +144,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
  */
 #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) )
 #define __trace_if(cond) \
-       if (__builtin_constant_p((cond)) ? !!(cond) :                   \
+       if (__builtin_constant_p(!!(cond)) ? !!(cond) :                 \
        ({                                                              \
                int ______r;                                            \
                static struct ftrace_branch_data                        \
index 85a868ccb4931d374a1ee9fb4e4036bb84399561..fea160ee5803fd121d0493f622e240b4c35da480 100644 (file)
@@ -137,6 +137,8 @@ static inline void set_mems_allowed(nodemask_t nodemask)
        task_unlock(current);
 }
 
+extern void cpuset_post_attach_flush(void);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline bool cpusets_enabled(void) { return false; }
@@ -243,6 +245,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
        return false;
 }
 
+static inline void cpuset_post_attach_flush(void)
+{
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
index 48b49305716bd728e69477db6b430c34e7781746..be8f12b8f1950499380c10de27ab6928df25fd8e 100644 (file)
@@ -59,7 +59,8 @@ enum {
        CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
        CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
        CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
-       CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+       CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
+       CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
 };
 
 /*
@@ -205,6 +206,11 @@ struct crush_map {
         * mappings line up a bit better with previous mappings. */
        __u8 chooseleaf_vary_r;
 
+       /* if true, it makes chooseleaf firstn to return stable results (if
+        * no local retry) so that data migrations would be optimal when some
+        * device fails. */
+       __u8 chooseleaf_stable;
+
 #ifndef __KERNEL__
        /*
         * version 0 (original) of straw_calc has various flaws.  version 1
index b415e521528de3d5fc5db598fb588f688f42d307..636dd59ab505c6633ae3184b9c283cad06ab893f 100644 (file)
@@ -7,13 +7,24 @@
 
 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
                  get_block_t, dio_iodone_t, int flags);
-int dax_clear_blocks(struct inode *, sector_t block, long size);
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
                dax_iodone_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
                dax_iodone_t);
+
+#ifdef CONFIG_FS_DAX
+struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+#else
+static inline struct page *read_dax_sector(struct block_device *bdev,
+               sector_t n)
+{
+       return ERR_PTR(-ENXIO);
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
                                unsigned int flags, get_block_t, dax_iodone_t);
@@ -36,4 +47,13 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
 {
        return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
 }
+
+static inline bool dax_mapping(struct address_space *mapping)
+{
+       return mapping->host && IS_DAX(mapping->host);
+}
+
+struct writeback_control;
+int dax_writeback_mapping_range(struct address_space *mapping,
+               struct block_device *bdev, struct writeback_control *wbc);
 #endif
index 251a2090a55444cec55ce4f04510b6ef83a69cfb..e0ee0b3000b2da107c975137165fc989777d8a58 100644 (file)
@@ -19,6 +19,8 @@
 
 int devpts_new_index(struct inode *ptmx_inode);
 void devpts_kill_index(struct inode *ptmx_inode, int idx);
+void devpts_add_ref(struct inode *ptmx_inode);
+void devpts_del_ref(struct inode *ptmx_inode);
 /* mknod in devpts */
 struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
                void *priv);
@@ -32,6 +34,8 @@ void devpts_pty_kill(struct inode *inode);
 /* Dummy stubs in the no-pty case */
 static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
 static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
+static inline void devpts_add_ref(struct inode *ptmx_inode) { }
+static inline void devpts_del_ref(struct inode *ptmx_inode) { }
 static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
                dev_t device, int index, void *priv)
 {
index 8723f2a99e1588d7feba41625ebdbc9c924e94d0..d6b3c9943a2c49a6318f37ca6128e000e913c654 100644 (file)
@@ -25,7 +25,6 @@
 */
 #ifndef DRBD_H
 #define DRBD_H
-#include <linux/connector.h>
 #include <asm/types.h>
 
 #ifdef __KERNEL__
@@ -52,7 +51,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.5"
+#define REL_VERSION "8.4.6"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
@@ -339,6 +338,8 @@ enum drbd_state_rv {
 #define MDF_AL_CLEAN           (1 << 7)
 #define MDF_AL_DISABLED                (1 << 8)
 
+#define MAX_PEERS 32
+
 enum drbd_uuid_index {
        UI_CURRENT,
        UI_BITMAP,
@@ -349,14 +350,35 @@ enum drbd_uuid_index {
        UI_EXTENDED_SIZE   /* Everything. */
 };
 
+#define HISTORY_UUIDS MAX_PEERS
+
 enum drbd_timeout_flag {
        UT_DEFAULT      = 0,
        UT_DEGRADED     = 1,
        UT_PEER_OUTDATED = 2,
 };
 
+enum drbd_notification_type {
+       NOTIFY_EXISTS,
+       NOTIFY_CREATE,
+       NOTIFY_CHANGE,
+       NOTIFY_DESTROY,
+       NOTIFY_CALL,
+       NOTIFY_RESPONSE,
+
+       NOTIFY_CONTINUES = 0x8000,
+       NOTIFY_FLAGS = NOTIFY_CONTINUES,
+};
+
 #define UUID_JUST_CREATED ((__u64)4)
 
+enum write_ordering_e {
+       WO_NONE,
+       WO_DRAIN_IO,
+       WO_BDEV_FLUSH,
+       WO_BIO_BARRIER
+};
+
 /* magic numbers used in meta data and network packets */
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC_BIG 0x835a
index 7b131ed8f9c6696cfb1ec8b470c0d77c95dff07e..2d0e5ad5de9d34227700acfee0e188eb6c752471 100644 (file)
@@ -250,6 +250,76 @@ GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
        __flg_field(1, DRBD_GENLA_F_MANDATORY,  force_detach)
 )
 
+GENL_struct(DRBD_NLA_RESOURCE_INFO, 15, resource_info,
+       __u32_field(1, 0, res_role)
+       __flg_field(2, 0, res_susp)
+       __flg_field(3, 0, res_susp_nod)
+       __flg_field(4, 0, res_susp_fen)
+       /* __flg_field(5, 0, res_weak) */
+)
+
+GENL_struct(DRBD_NLA_DEVICE_INFO, 16, device_info,
+       __u32_field(1, 0, dev_disk_state)
+)
+
+GENL_struct(DRBD_NLA_CONNECTION_INFO, 17, connection_info,
+       __u32_field(1, 0, conn_connection_state)
+       __u32_field(2, 0, conn_role)
+)
+
+GENL_struct(DRBD_NLA_PEER_DEVICE_INFO, 18, peer_device_info,
+       __u32_field(1, 0, peer_repl_state)
+       __u32_field(2, 0, peer_disk_state)
+       __u32_field(3, 0, peer_resync_susp_user)
+       __u32_field(4, 0, peer_resync_susp_peer)
+       __u32_field(5, 0, peer_resync_susp_dependency)
+)
+
+GENL_struct(DRBD_NLA_RESOURCE_STATISTICS, 19, resource_statistics,
+       __u32_field(1, 0, res_stat_write_ordering)
+)
+
+GENL_struct(DRBD_NLA_DEVICE_STATISTICS, 20, device_statistics,
+       __u64_field(1, 0, dev_size)  /* (sectors) */
+       __u64_field(2, 0, dev_read)  /* (sectors) */
+       __u64_field(3, 0, dev_write)  /* (sectors) */
+       __u64_field(4, 0, dev_al_writes)  /* activity log writes (count) */
+       __u64_field(5, 0, dev_bm_writes)  /*  bitmap writes  (count) */
+       __u32_field(6, 0, dev_upper_pending)  /* application requests in progress */
+       __u32_field(7, 0, dev_lower_pending)  /* backing device requests in progress */
+       __flg_field(8, 0, dev_upper_blocked)
+       __flg_field(9, 0, dev_lower_blocked)
+       __flg_field(10, 0, dev_al_suspended)  /* activity log suspended */
+       __u64_field(11, 0, dev_exposed_data_uuid)
+       __u64_field(12, 0, dev_current_uuid)
+       __u32_field(13, 0, dev_disk_flags)
+       __bin_field(14, 0, history_uuids, HISTORY_UUIDS * sizeof(__u64))
+)
+
+GENL_struct(DRBD_NLA_CONNECTION_STATISTICS, 21, connection_statistics,
+       __flg_field(1, 0, conn_congested)
+)
+
+GENL_struct(DRBD_NLA_PEER_DEVICE_STATISTICS, 22, peer_device_statistics,
+       __u64_field(1, 0, peer_dev_received)  /* sectors */
+       __u64_field(2, 0, peer_dev_sent)  /* sectors */
+       __u32_field(3, 0, peer_dev_pending)  /* number of requests */
+       __u32_field(4, 0, peer_dev_unacked)  /* number of requests */
+       __u64_field(5, 0, peer_dev_out_of_sync)  /* sectors */
+       __u64_field(6, 0, peer_dev_resync_failed)  /* sectors */
+       __u64_field(7, 0, peer_dev_bitmap_uuid)
+       __u32_field(9, 0, peer_dev_flags)
+)
+
+GENL_struct(DRBD_NLA_NOTIFICATION_HEADER, 23, drbd_notification_header,
+       __u32_field(1, DRBD_GENLA_F_MANDATORY, nh_type)
+)
+
+GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info,
+       __str_field(1, DRBD_GENLA_F_MANDATORY, helper_name, 32)
+       __u32_field(2, DRBD_GENLA_F_MANDATORY, helper_status)
+)
+
 /*
  * Notifications and commands (genlmsghdr->cmd)
  */
@@ -382,3 +452,82 @@ GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
        GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DOWN,         27, GENL_doit(drbd_adm_down),
        GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_GET_RESOURCES, 30,
+        GENL_op_init(
+                .dumpit = drbd_adm_dump_resources,
+        ),
+        GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_GET_DEVICES, 31,
+        GENL_op_init(
+                .dumpit = drbd_adm_dump_devices,
+                .done = drbd_adm_dump_devices_done,
+        ),
+        GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_GET_CONNECTIONS, 32,
+        GENL_op_init(
+                .dumpit = drbd_adm_dump_connections,
+                .done = drbd_adm_dump_connections_done,
+        ),
+        GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_GET_PEER_DEVICES, 33,
+        GENL_op_init(
+                .dumpit = drbd_adm_dump_peer_devices,
+                .done = drbd_adm_dump_peer_devices_done,
+        ),
+        GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
+        GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_notification(
+       DRBD_RESOURCE_STATE, 34, events,
+       GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_notification(
+       DRBD_DEVICE_STATE, 35, events,
+       GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_notification(
+       DRBD_CONNECTION_STATE, 36, events,
+       GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_notification(
+       DRBD_PEER_DEVICE_STATE, 37, events,
+       GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_op(
+       DRBD_ADM_GET_INITIAL_STATE, 38,
+       GENL_op_init(
+               .dumpit = drbd_adm_get_initial_state,
+       ),
+       GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY))
+
+GENL_notification(
+       DRBD_HELPER, 40, events,
+       GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+       GENL_tla_expected(DRBD_NLA_HELPER, DRBD_F_REQUIRED))
+
+GENL_notification(
+       DRBD_INITIAL_STATE_DONE, 41, events,
+       GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED))
index 569b5a866bb1e6308bbc4c0a28a2da92d106d6b5..47be3ad7d3e5bad63b48a8fa344dbea65c0a97dd 100644 (file)
@@ -1199,7 +1199,10 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *),
 struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
                                       struct list_head *head, bool remove);
 
-bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len);
+bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data,
+                    unsigned long data_size);
+bool efivar_variable_is_removable(efi_guid_t vendor, const char *name,
+                                 size_t len);
 
 extern struct work_struct efivar_work;
 void efivar_run_worker(void);
index eb73d74ed99262c83f2fa5e51582b2da0344bafa..ae681002100a1fb8401e934a99f40dedf039cd7d 100644 (file)
@@ -433,7 +433,8 @@ struct address_space {
        struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
        /* Protected by tree_lock together with the radix tree */
        unsigned long           nrpages;        /* number of total pages */
-       unsigned long           nrshadows;      /* number of shadow entries */
+       /* number of shadow or DAX exceptional entries */
+       unsigned long           nrexceptional;
        pgoff_t                 writeback_index;/* writeback starts here */
        const struct address_space_operations *a_ops;   /* methods */
        unsigned long           flags;          /* error bits/gfp mask */
@@ -483,9 +484,6 @@ struct block_device {
        int                     bd_fsfreeze_count;
        /* Mutex for freeze */
        struct mutex            bd_fsfreeze_mutex;
-#ifdef CONFIG_FS_DAX
-       int                     bd_map_count;
-#endif
 };
 
 /*
@@ -714,6 +712,31 @@ enum inode_i_mutex_lock_class
        I_MUTEX_PARENT2,
 };
 
+static inline void inode_lock(struct inode *inode)
+{
+       mutex_lock(&inode->i_mutex);
+}
+
+static inline void inode_unlock(struct inode *inode)
+{
+       mutex_unlock(&inode->i_mutex);
+}
+
+static inline int inode_trylock(struct inode *inode)
+{
+       return mutex_trylock(&inode->i_mutex);
+}
+
+static inline int inode_is_locked(struct inode *inode)
+{
+       return mutex_is_locked(&inode->i_mutex);
+}
+
+static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
+{
+       mutex_lock_nested(&inode->i_mutex, subclass);
+}
+
 void lock_two_nondirectories(struct inode *, struct inode*);
 void unlock_two_nondirectories(struct inode *, struct inode*);
 
@@ -2881,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
 
 static inline bool io_is_direct(struct file *filp)
 {
-       return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
+       return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
 static inline int iocb_flags(struct file *file)
@@ -3047,8 +3070,8 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
 }
 static inline bool dir_relax(struct inode *inode)
 {
-       mutex_unlock(&inode->i_mutex);
-       mutex_lock(&inode->i_mutex);
+       inode_unlock(inode);
+       inode_lock(inode);
        return !IS_DEADDIR(inode);
 }
 
index 6b7e89f45aa49e3eb1531c51f6edae4cc18d576a..533c4408529a19819570c5dc40d917aa10ac2d26 100644 (file)
@@ -220,10 +220,7 @@ struct fsnotify_mark {
        /* List of marks by group->i_fsnotify_marks. Also reused for queueing
         * mark into destroy_list when it's waiting for the end of SRCU period
         * before it can be freed. [group->mark_mutex] */
-       union {
-               struct list_head g_list;
-               struct rcu_head g_rcu;
-       };
+       struct list_head g_list;
        /* Protects inode / mnt pointers, flags, masks */
        spinlock_t lock;
        /* List of marks for inode / vfsmount [obj_lock] */
index 0639dcc9819523a002de1149da5629d24bce039b..c2b340e23f62d3240abc6360ab0412a82a12f21a 100644 (file)
@@ -165,7 +165,6 @@ struct ftrace_ops {
        ftrace_func_t                   saved_func;
        int __percpu                    *disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-       int                             nr_trampolines;
        struct ftrace_ops_hash          local_hash;
        struct ftrace_ops_hash          *func_hash;
        struct ftrace_ops_hash          old_hash;
@@ -604,6 +603,7 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
 extern int skip_trace(unsigned long ip);
 extern void ftrace_module_init(struct module *mod);
+extern void ftrace_module_enable(struct module *mod);
 extern void ftrace_release_mod(struct module *mod);
 
 extern void ftrace_disable_daemon(void);
@@ -613,8 +613,9 @@ static inline int skip_trace(unsigned long ip) { return 0; }
 static inline int ftrace_force_update(void) { return 0; }
 static inline void ftrace_disable_daemon(void) { }
 static inline void ftrace_enable_daemon(void) { }
-static inline void ftrace_release_mod(struct module *mod) {}
-static inline void ftrace_module_init(struct module *mod) {}
+static inline void ftrace_module_init(struct module *mod) { }
+static inline void ftrace_module_enable(struct module *mod) { }
+static inline void ftrace_release_mod(struct module *mod) { }
 static inline __init int register_ftrace_command(struct ftrace_func_command *cmd)
 {
        return -EINVAL;
index 28ad5f6494b018a2c49493920059de62fcacdb8e..af1f2b24bbe4172f6055407229f0670ee90dc534 100644 (file)
@@ -547,16 +547,16 @@ static inline bool pm_suspended_storage(void)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_CMA
-
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
                              unsigned migratetype);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
+#endif
 
+#ifdef CONFIG_CMA
 /* CMA stuff */
 extern void init_cma_reserved_pageblock(struct page *page);
-
 #endif
 
 #endif /* __LINUX_GFP_H */
index 76dd4f0da5ca9907572bea19f4b0d0dbe8ad06eb..2ead22dd74a00896d24fdb937242e8ba36a77cfe 100644 (file)
@@ -87,7 +87,8 @@ enum hrtimer_restart {
  * @function:  timer expiry callback function
  * @base:      pointer to the timer base (per cpu and per clock)
  * @state:     state information (See bit values above)
- * @start_pid: timer statistics field to store the pid of the task which
+ * @is_rel:    Set if the timer was armed relative
+ * @start_pid:  timer statistics field to store the pid of the task which
  *             started the timer
  * @start_site:        timer statistics field to store the site where the timer
  *             was started
@@ -101,7 +102,8 @@ struct hrtimer {
        ktime_t                         _softexpires;
        enum hrtimer_restart            (*function)(struct hrtimer *);
        struct hrtimer_clock_base       *base;
-       unsigned long                   state;
+       u8                              state;
+       u8                              is_rel;
 #ifdef CONFIG_TIMER_STATS
        int                             start_pid;
        void                            *start_site;
@@ -321,6 +323,27 @@ static inline void clock_was_set_delayed(void) { }
 
 #endif
 
+static inline ktime_t
+__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
+{
+       ktime_t rem = ktime_sub(timer->node.expires, now);
+
+       /*
+        * Adjust relative timers for the extra we added in
+        * hrtimer_start_range_ns() to prevent short timeouts.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
+               rem.tv64 -= hrtimer_resolution;
+       return rem;
+}
+
+static inline ktime_t
+hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
+{
+       return __hrtimer_expires_remaining_adjusted(timer,
+                                                   timer->base->get_time());
+}
+
 extern void clock_was_set(void);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
@@ -390,7 +413,12 @@ static inline void hrtimer_restart(struct hrtimer *timer)
 }
 
 /* Query timers: */
-extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+
+static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+       return __hrtimer_get_remaining(timer, false);
+}
 
 extern u64 hrtimer_get_next_event(void);
 
index cfe81e10bd5429baf5ed0ccf50bd4613ff6ee3c6..459fd25b378e73cfd2e911761ad845076be547de 100644 (file)
@@ -120,15 +120,15 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                    unsigned long start,
                                    unsigned long end,
                                    long adjust_next);
-extern bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl);
+extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma);
 /* mmap_sem must be held on entry */
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma)
 {
        VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
        if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
-               return __pmd_trans_huge_lock(pmd, vma, ptl);
+               return __pmd_trans_huge_lock(pmd, vma);
        else
                return false;
 }
@@ -190,10 +190,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                         long adjust_next)
 {
 }
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma)
 {
-       return false;
+       return NULL;
 }
 
 static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
index 013fd9bc4cb6cefb102705a88a6911de546acd0a..083d61e9270632be41a7b707104af7003d3c23cb 100644 (file)
@@ -135,6 +135,20 @@ static inline void *idr_find(struct idr *idr, int id)
 #define idr_for_each_entry(idp, entry, id)                     \
        for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
 
+/**
+ * idr_for_each_entry - continue iteration over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define idr_for_each_entry_continue(idp, entry, id)                    \
+       for ((entry) = idr_get_next((idp), &(id));                      \
+            entry;                                                     \
+            ++id, (entry) = idr_get_next((idp), &(id)))
+
 /*
  * IDA - IDR based id allocator, use when translation from id to
  * pointer isn't necessary.
index 821273ca4873b147136f46f235d83b344b192373..2d9b650047a5b96582f66dff2b2581cce97c2b65 100644 (file)
@@ -235,6 +235,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 /* low 64 bit */
 #define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
 
+/* PRS_REG */
+#define DMA_PRS_PPR    ((u32)1)
+
 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts)                    \
 do {                                                                   \
        cycles_t start_time = get_cycles();                             \
index cb30edbfe9fcd010b217aa385073d54fa71105c4..0e95fcc75b2ac141ceeb3ebb3b2f79d7a5d2ddad 100644 (file)
@@ -413,7 +413,7 @@ enum
        NET_TX_SOFTIRQ,
        NET_RX_SOFTIRQ,
        BLOCK_SOFTIRQ,
-       BLOCK_IOPOLL_SOFTIRQ,
+       IRQ_POLL_SOFTIRQ,
        TASKLET_SOFTIRQ,
        SCHED_SOFTIRQ,
        HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
index f28dff313b07c3bb072dffc2235b27d5d0eaa09a..a5c539fa5d2bc03ba233f4d11de1b64d839990ab 100644 (file)
@@ -133,8 +133,9 @@ struct iommu_dm_region {
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@ struct iommu_dm_region {
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@ struct iommu_ops {
        int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
                                    phys_addr_t paddr, u64 size, int prot);
        void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-       /* Set the numer of window per domain */
+       /* Set the number of windows per domain */
        int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-       /* Get the numer of window per domain */
+       /* Get the number of windows per domain */
        u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU
diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h
new file mode 100644 (file)
index 0000000..3e8c1b8
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef IRQ_POLL_H
+#define IRQ_POLL_H
+
+struct irq_poll;
+typedef int (irq_poll_fn)(struct irq_poll *, int);
+
+struct irq_poll {
+       struct list_head list;
+       unsigned long state;
+       int weight;
+       irq_poll_fn *poll;
+};
+
+enum {
+       IRQ_POLL_F_SCHED        = 0,
+       IRQ_POLL_F_DISABLE      = 1,
+};
+
+extern void irq_poll_sched(struct irq_poll *);
+extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *);
+extern void irq_poll_complete(struct irq_poll *);
+extern void irq_poll_enable(struct irq_poll *);
+extern void irq_poll_disable(struct irq_poll *);
+
+#endif
index f64622ad02c196185be2eb79269e782d5772fcd0..04579d9fbce4f3b5c0a8484116500ee3034b93f7 100644 (file)
@@ -70,6 +70,7 @@ struct irq_fwspec {
  */
 enum irq_domain_bus_token {
        DOMAIN_BUS_ANY          = 0,
+       DOMAIN_BUS_WIRED,
        DOMAIN_BUS_PCI_MSI,
        DOMAIN_BUS_PLATFORM_MSI,
        DOMAIN_BUS_NEXUS,
index 851821bfd55321dce527f4b32e03d1534994678a..bec2abbd7ab28485cbf32bfefa7430b6a47c81e4 100644 (file)
@@ -526,6 +526,7 @@ enum ata_lpm_policy {
 enum ata_lpm_hints {
        ATA_LPM_EMPTY           = (1 << 0), /* port empty/probing */
        ATA_LPM_HIPM            = (1 << 1), /* may use HIPM */
+       ATA_LPM_WAKE_ONLY       = (1 << 2), /* only wake up link */
 };
 
 /* forward declarations */
index bed40dff0e86999b5c39c82ebadc29f25e0060ec..141ffdd59960d7b31583f629140b0f7dd9916b37 100644 (file)
@@ -26,9 +26,8 @@ enum {
 
        /* need to set a limit somewhere, but yes, this is likely overkill */
        ND_IOCTL_MAX_BUFLEN = SZ_4M,
-       ND_CMD_MAX_ELEM = 4,
+       ND_CMD_MAX_ELEM = 5,
        ND_CMD_MAX_ENVELOPE = 16,
-       ND_CMD_ARS_STATUS_MAX = SZ_4K,
        ND_MAX_MAPPINGS = 32,
 
        /* region flag indicating to direct-map persistent memory by default */
index 034117b3be5f7d91e5e869ea9e856f51964ebac6..2190419bdf0a04e549b64946037d8d61fd025781 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef NVM_H
 #define NVM_H
 
+#include <linux/types.h>
+
 enum {
        NVM_IO_OK = 0,
        NVM_IO_REQUEUE = 1,
@@ -11,12 +13,74 @@ enum {
        NVM_IOTYPE_GC = 1,
 };
 
+#define NVM_BLK_BITS (16)
+#define NVM_PG_BITS  (16)
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS  (8)
+#define NVM_LUN_BITS (8)
+#define NVM_CH_BITS  (8)
+
+struct ppa_addr {
+       /* Generic structure for all addresses */
+       union {
+               struct {
+                       u64 blk         : NVM_BLK_BITS;
+                       u64 pg          : NVM_PG_BITS;
+                       u64 sec         : NVM_SEC_BITS;
+                       u64 pl          : NVM_PL_BITS;
+                       u64 lun         : NVM_LUN_BITS;
+                       u64 ch          : NVM_CH_BITS;
+               } g;
+
+               u64 ppa;
+       };
+};
+
+struct nvm_rq;
+struct nvm_id;
+struct nvm_dev;
+
+typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
+typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
+typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
+typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
+                               nvm_l2p_update_fn *, void *);
+typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
+                               nvm_bb_update_fn *, void *);
+typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
+typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
+typedef void (nvm_destroy_dma_pool_fn)(void *);
+typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
+                                                               dma_addr_t *);
+typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
+
+struct nvm_dev_ops {
+       nvm_id_fn               *identity;
+       nvm_get_l2p_tbl_fn      *get_l2p_tbl;
+       nvm_op_bb_tbl_fn        *get_bb_tbl;
+       nvm_op_set_bb_fn        *set_bb_tbl;
+
+       nvm_submit_io_fn        *submit_io;
+       nvm_erase_blk_fn        *erase_block;
+
+       nvm_create_dma_pool_fn  *create_dma_pool;
+       nvm_destroy_dma_pool_fn *destroy_dma_pool;
+       nvm_dev_dma_alloc_fn    *dev_dma_alloc;
+       nvm_dev_dma_free_fn     *dev_dma_free;
+
+       unsigned int            max_phys_sect;
+};
+
+
+
 #ifdef CONFIG_NVM
 
 #include <linux/blkdev.h>
-#include <linux/types.h>
 #include <linux/file.h>
 #include <linux/dmapool.h>
+#include <uapi/linux/lightnvm.h>
 
 enum {
        /* HW Responsibilities */
@@ -58,8 +122,33 @@ enum {
        /* Block Types */
        NVM_BLK_T_FREE          = 0x0,
        NVM_BLK_T_BAD           = 0x1,
-       NVM_BLK_T_DEV           = 0x2,
-       NVM_BLK_T_HOST          = 0x4,
+       NVM_BLK_T_GRWN_BAD      = 0x2,
+       NVM_BLK_T_DEV           = 0x4,
+       NVM_BLK_T_HOST          = 0x8,
+
+       /* Memory capabilities */
+       NVM_ID_CAP_SLC          = 0x1,
+       NVM_ID_CAP_CMD_SUSPEND  = 0x2,
+       NVM_ID_CAP_SCRAMBLE     = 0x4,
+       NVM_ID_CAP_ENCRYPT      = 0x8,
+
+       /* Memory types */
+       NVM_ID_FMTYPE_SLC       = 0,
+       NVM_ID_FMTYPE_MLC       = 1,
+
+       /* Device capabilities */
+       NVM_ID_DCAP_BBLKMGMT    = 0x1,
+       NVM_UD_DCAP_ECC         = 0x2,
+};
+
+struct nvm_id_lp_mlc {
+       u16     num_pairs;
+       u8      pairs[886];
+};
+
+struct nvm_id_lp_tbl {
+       __u8    id[8];
+       struct nvm_id_lp_mlc mlc;
 };
 
 struct nvm_id_group {
@@ -82,6 +171,8 @@ struct nvm_id_group {
        u32     mpos;
        u32     mccap;
        u16     cpar;
+
+       struct nvm_id_lp_tbl lptbl;
 };
 
 struct nvm_addr_format {
@@ -125,28 +216,8 @@ struct nvm_tgt_instance {
 #define NVM_VERSION_MINOR 0
 #define NVM_VERSION_PATCH 0
 
-#define NVM_BLK_BITS (16)
-#define NVM_PG_BITS  (16)
-#define NVM_SEC_BITS (8)
-#define NVM_PL_BITS  (8)
-#define NVM_LUN_BITS (8)
-#define NVM_CH_BITS  (8)
-
-struct ppa_addr {
-       /* Generic structure for all addresses */
-       union {
-               struct {
-                       u64 blk         : NVM_BLK_BITS;
-                       u64 pg          : NVM_PG_BITS;
-                       u64 sec         : NVM_SEC_BITS;
-                       u64 pl          : NVM_PL_BITS;
-                       u64 lun         : NVM_LUN_BITS;
-                       u64 ch          : NVM_CH_BITS;
-               } g;
-
-               u64 ppa;
-       };
-};
+struct nvm_rq;
+typedef void (nvm_end_io_fn)(struct nvm_rq *);
 
 struct nvm_rq {
        struct nvm_tgt_instance *ins;
@@ -164,9 +235,14 @@ struct nvm_rq {
        void *metadata;
        dma_addr_t dma_metadata;
 
+       struct completion *wait;
+       nvm_end_io_fn *end_io;
+
        uint8_t opcode;
        uint16_t nr_pages;
        uint16_t flags;
+
+       int error;
 };
 
 static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
@@ -181,51 +257,31 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 
 struct nvm_block;
 
-typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
-typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
-typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
-typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
-                               nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
-                               nvm_bb_update_fn *, void *);
-typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
-typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
-typedef void (nvm_destroy_dma_pool_fn)(void *);
-typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
-                                                               dma_addr_t *);
-typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
-
-struct nvm_dev_ops {
-       nvm_id_fn               *identity;
-       nvm_get_l2p_tbl_fn      *get_l2p_tbl;
-       nvm_op_bb_tbl_fn        *get_bb_tbl;
-       nvm_op_set_bb_fn        *set_bb_tbl;
-
-       nvm_submit_io_fn        *submit_io;
-       nvm_erase_blk_fn        *erase_block;
-
-       nvm_create_dma_pool_fn  *create_dma_pool;
-       nvm_destroy_dma_pool_fn *destroy_dma_pool;
-       nvm_dev_dma_alloc_fn    *dev_dma_alloc;
-       nvm_dev_dma_free_fn     *dev_dma_free;
-
-       unsigned int            max_phys_sect;
-};
-
 struct nvm_lun {
        int id;
 
        int lun_id;
        int chnl_id;
 
-       unsigned int nr_inuse_blocks;   /* Number of used blocks */
+       /* It is up to the target to mark blocks as closed. If the target does
+        * not do it, all blocks are marked as open, and nr_open_blocks
+        * represents the number of blocks in use
+        */
+       unsigned int nr_open_blocks;    /* Number of used, writable blocks */
+       unsigned int nr_closed_blocks;  /* Number of used, read-only blocks */
        unsigned int nr_free_blocks;    /* Number of unused blocks */
        unsigned int nr_bad_blocks;     /* Number of bad blocks */
-       struct nvm_block *blocks;
 
        spinlock_t lock;
+
+       struct nvm_block *blocks;
+};
+
+enum {
+       NVM_BLK_ST_FREE =       0x1,    /* Free block */
+       NVM_BLK_ST_OPEN =       0x2,    /* Open block - read-write */
+       NVM_BLK_ST_CLOSED =     0x4,    /* Closed block - read-only */
+       NVM_BLK_ST_BAD =        0x8,    /* Bad block */
 };
 
 struct nvm_block {
@@ -234,7 +290,16 @@ struct nvm_block {
        unsigned long id;
 
        void *priv;
-       int type;
+       int state;
+};
+
+/* system block cpu representation */
+struct nvm_sb_info {
+       unsigned long           seqnr;
+       unsigned long           erase_cnt;
+       unsigned int            version;
+       char                    mmtype[NVM_MMTYPE_LEN];
+       struct ppa_addr         fs_ppa;
 };
 
 struct nvm_dev {
@@ -247,6 +312,9 @@ struct nvm_dev {
        struct nvmm_type *mt;
        void *mp;
 
+       /* System blocks */
+       struct nvm_sb_info sb;
+
        /* Device information */
        int nr_chnls;
        int nr_planes;
@@ -256,6 +324,7 @@ struct nvm_dev {
        int blks_per_lun;
        int sec_size;
        int oob_size;
+       int mccap;
        struct nvm_addr_format ppaf;
 
        /* Calculated/Cached values. These do not reflect the actual usable
@@ -268,6 +337,10 @@ struct nvm_dev {
        int sec_per_blk;
        int sec_per_lun;
 
+       /* lower page table */
+       int lps_per_blk;
+       int *lptbl;
+
        unsigned long total_pages;
        unsigned long total_blocks;
        int nr_luns;
@@ -280,6 +353,8 @@ struct nvm_dev {
        /* Backend device */
        struct request_queue *q;
        char name[DISK_NAME_LEN];
+
+       struct mutex mlock;
 };
 
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
@@ -345,9 +420,13 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
        return ppa;
 }
 
+static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
+{
+       return dev->lptbl[slc_pg];
+}
+
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
 typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
 typedef void (nvm_tgt_exit_fn)(void *);
 
@@ -358,7 +437,7 @@ struct nvm_tgt_type {
        /* target entry points */
        nvm_tgt_make_rq_fn *make_rq;
        nvm_tgt_capacity_fn *capacity;
-       nvm_tgt_end_io_fn *end_io;
+       nvm_end_io_fn *end_io;
 
        /* module-specific init/teardown */
        nvm_tgt_init_fn *init;
@@ -383,7 +462,6 @@ typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
                                                                unsigned long);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
@@ -397,6 +475,8 @@ struct nvmm_type {
        nvmm_unregister_fn *unregister_mgr;
 
        /* Block administration callbacks */
+       nvmm_get_blk_fn *get_blk_unlocked;
+       nvmm_put_blk_fn *put_blk_unlocked;
        nvmm_get_blk_fn *get_blk;
        nvmm_put_blk_fn *put_blk;
        nvmm_open_blk_fn *open_blk;
@@ -404,7 +484,6 @@ struct nvmm_type {
        nvmm_flush_blk_fn *flush_blk;
 
        nvmm_submit_io_fn *submit_io;
-       nvmm_end_io_fn *end_io;
        nvmm_erase_blk_fn *erase_blk;
 
        /* Configuration management */
@@ -418,6 +497,10 @@ struct nvmm_type {
 extern int nvm_register_mgr(struct nvmm_type *);
 extern void nvm_unregister_mgr(struct nvmm_type *);
 
+extern struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *,
+                                       struct nvm_lun *, unsigned long);
+extern void nvm_put_blk_unlocked(struct nvm_dev *, struct nvm_block *);
+
 extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
                                                                unsigned long);
 extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
@@ -427,7 +510,36 @@ extern int nvm_register(struct request_queue *, char *,
 extern void nvm_unregister(char *);
 
 extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
+extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
+extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
+                                                       struct ppa_addr *, int);
+extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int);
 extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
+extern void nvm_end_io(struct nvm_rq *, int);
+extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
+                                                               void *, int);
+
+/* sysblk.c */
+#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
+
+/* system block on disk representation */
+struct nvm_system_block {
+       __be32                  magic;          /* magic signature */
+       __be32                  seqnr;          /* sequence number */
+       __be32                  erase_cnt;      /* erase count */
+       __be16                  version;        /* version number */
+       u8                      mmtype[NVM_MMTYPE_LEN]; /* media manager name */
+       __be64                  fs_ppa;         /* PPA for media manager
+                                                * superblock */
+};
+
+extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *);
+extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *);
+extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
+
+extern int nvm_dev_factory(struct nvm_dev *, int flags);
 #else /* CONFIG_NVM */
 struct nvm_dev_ops;
 
index c57e424d914b70fc5032f9b6d1ec918e8e195c64..4dca42fd32f52d17326e436c4d9fcbd86a0e8d24 100644 (file)
@@ -66,7 +66,7 @@ struct lock_class {
        /*
         * class-hash:
         */
-       struct list_head                hash_entry;
+       struct hlist_node               hash_entry;
 
        /*
         * global list of all lock-classes:
@@ -199,7 +199,7 @@ struct lock_chain {
        u8                              irq_context;
        u8                              depth;
        u16                             base;
-       struct list_head                entry;
+       struct hlist_node               entry;
        u64                             chain_key;
 };
 
index 46262284de478196e4e907674de9e5e04f364d60..04fc6e6c7ff0a48c6d3a9f39e5fd090f8479e449 100644 (file)
@@ -264,7 +264,7 @@ extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
 extern void lc_committed(struct lru_cache *lc);
 
 struct seq_file;
-extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
+extern void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
 
 extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
                                void (*detail) (struct seq_file *, struct lc_element *));
index 9ae48d4aeb5ec7d6fafa9528c79f65f62a091ed3..792c8981e63365b06a03e7feda7e647eafb533c0 100644 (file)
@@ -51,7 +51,7 @@ enum mem_cgroup_stat_index {
        MEM_CGROUP_STAT_SWAP,           /* # of pages, swapped out */
        MEM_CGROUP_STAT_NSTATS,
        /* default hierarchy stats */
-       MEMCG_SOCK,
+       MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
        MEMCG_NR_STAT,
 };
 
index 58391f2e0414e0e2001fab4c002771798fad3d33..116b284bc4ce81b77c5cd6f14266eb62c7ad4c03 100644 (file)
@@ -206,7 +206,8 @@ enum {
        MLX4_SET_PORT_GID_TABLE = 0x5,
        MLX4_SET_PORT_PRIO2TC   = 0x8,
        MLX4_SET_PORT_SCHEDULER = 0x9,
-       MLX4_SET_PORT_VXLAN     = 0xB
+       MLX4_SET_PORT_VXLAN     = 0xB,
+       MLX4_SET_PORT_ROCE_ADDR = 0xD
 };
 
 enum {
index d3133be12d922521e24528480edbdb31659a7007..a0e8cc8dcc67dcc88e7d32ab47a8ef22af52c946 100644 (file)
@@ -44,6 +44,8 @@
 
 #include <linux/timecounter.h>
 
+#define DEFAULT_UAR_PAGE_SHIFT  12
+
 #define MAX_MSIX_P_PORT                17
 #define MAX_MSIX               64
 #define MIN_MSIX_P_PORT                5
@@ -216,6 +218,7 @@ enum {
        MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN      = 1LL <<  30,
        MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
        MLX4_DEV_CAP_FLAG2_LB_SRC_CHK           = 1ULL << 32,
+       MLX4_DEV_CAP_FLAG2_ROCE_V1_V2           = 1ULL <<  33,
 };
 
 enum {
@@ -267,12 +270,14 @@ enum {
        MLX4_BMME_FLAG_TYPE_2_WIN       = 1 <<  9,
        MLX4_BMME_FLAG_RESERVED_LKEY    = 1 << 10,
        MLX4_BMME_FLAG_FAST_REG_WR      = 1 << 11,
+       MLX4_BMME_FLAG_ROCE_V1_V2       = 1 << 19,
        MLX4_BMME_FLAG_PORT_REMAP       = 1 << 24,
        MLX4_BMME_FLAG_VSD_INIT2RTR     = 1 << 28,
 };
 
 enum {
-       MLX4_FLAG_PORT_REMAP            = MLX4_BMME_FLAG_PORT_REMAP
+       MLX4_FLAG_PORT_REMAP            = MLX4_BMME_FLAG_PORT_REMAP,
+       MLX4_FLAG_ROCE_V1_V2            = MLX4_BMME_FLAG_ROCE_V1_V2
 };
 
 enum mlx4_event {
@@ -853,6 +858,7 @@ struct mlx4_dev {
        u64                     regid_promisc_array[MLX4_MAX_PORTS + 1];
        u64                     regid_allmulti_array[MLX4_MAX_PORTS + 1];
        struct mlx4_vf_dev     *dev_vfs;
+       u8  uar_page_shift;
 };
 
 struct mlx4_clock_params {
@@ -979,14 +985,11 @@ struct mlx4_mad_ifc {
        for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)     \
                if ((type) == (dev)->caps.port_mask[(port)])
 
-#define mlx4_foreach_non_ib_transport_port(port, dev)                     \
-       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
-               if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
-
 #define mlx4_foreach_ib_transport_port(port, dev)                         \
-       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
+       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
                if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
-                       ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+                       ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
+                       ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
 
 #define MLX4_INVALID_SLAVE_ID  0xFF
 #define MLX4_SINK_COUNTER_INDEX(dev)   (dev->caps.max_counters - 1)
@@ -1457,6 +1460,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
 int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port);
 int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
@@ -1527,4 +1531,14 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
                                   struct mlx4_clock_params *params);
 
+static inline int mlx4_to_hw_uar_index(struct mlx4_dev *dev, int index)
+{
+       return (index << (PAGE_SHIFT - dev->uar_page_shift));
+}
+
+static inline int mlx4_get_num_reserved_uar(struct mlx4_dev *dev)
+{
+       /* The first 128 UARs are used for EQ doorbells */
+       return (128 >> (PAGE_SHIFT - dev->uar_page_shift));
+}
 #endif /* MLX4_DEVICE_H */
index fe052e234906da97e88206d0b05db2442bced61f..587cdf943b524abd88fa945844b1e5b209b7fe2e 100644 (file)
@@ -194,7 +194,7 @@ struct mlx4_qp_context {
        u8                      mtu_msgmax;
        u8                      rq_size_stride;
        u8                      sq_size_stride;
-       u8                      rlkey;
+       u8                      rlkey_roce_mode;
        __be32                  usr_page;
        __be32                  local_qpn;
        __be32                  remote_qpn;
@@ -204,7 +204,8 @@ struct mlx4_qp_context {
        u32                     reserved1;
        __be32                  next_send_psn;
        __be32                  cqn_send;
-       u32                     reserved2[2];
+       __be16                  roce_entropy;
+       __be16                  reserved2[3];
        __be32                  last_acked_psn;
        __be32                  ssn;
        __be32                  params2;
@@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 
 void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
+static inline u16 folded_qp(u32 q)
+{
+       u16 res;
+
+       res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00);
+       return res;
+}
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn);
+
 #endif /* MLX4_QP_H */
index 7be845e306897ccd9d598ace7341976733164e5a..987764afa65c2344d5a85328f9f35a419e21db64 100644 (file)
@@ -223,6 +223,14 @@ enum {
 #define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
 #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
 
+#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
+
+enum {
+       MLX5_EVENT_QUEUE_TYPE_QP = 0,
+       MLX5_EVENT_QUEUE_TYPE_RQ = 1,
+       MLX5_EVENT_QUEUE_TYPE_SQ = 2,
+};
+
 enum mlx5_event {
        MLX5_EVENT_TYPE_COMP               = 0x0,
 
@@ -279,6 +287,26 @@ enum {
        MLX5_DEV_CAP_FLAG_CMDIF_CSUM    = 3LL << 46,
 };
 
+enum {
+       MLX5_ROCE_VERSION_1             = 0,
+       MLX5_ROCE_VERSION_2             = 2,
+};
+
+enum {
+       MLX5_ROCE_VERSION_1_CAP         = 1 << MLX5_ROCE_VERSION_1,
+       MLX5_ROCE_VERSION_2_CAP         = 1 << MLX5_ROCE_VERSION_2,
+};
+
+enum {
+       MLX5_ROCE_L3_TYPE_IPV4          = 0,
+       MLX5_ROCE_L3_TYPE_IPV6          = 1,
+};
+
+enum {
+       MLX5_ROCE_L3_TYPE_IPV4_CAP      = 1 << 1,
+       MLX5_ROCE_L3_TYPE_IPV6_CAP      = 1 << 2,
+};
+
 enum {
        MLX5_OPCODE_NOP                 = 0x00,
        MLX5_OPCODE_SEND_INVAL          = 0x01,
@@ -446,7 +474,7 @@ struct mlx5_init_seg {
        __be32                  rsvd2[880];
        __be32                  internal_timer_h;
        __be32                  internal_timer_l;
-       __be32                  rsrv3[2];
+       __be32                  rsvd3[2];
        __be32                  health_counter;
        __be32                  rsvd4[1019];
        __be64                  ieee1588_clk;
@@ -460,7 +488,9 @@ struct mlx5_eqe_comp {
 };
 
 struct mlx5_eqe_qp_srq {
-       __be32  reserved[6];
+       __be32  reserved1[5];
+       u8      type;
+       u8      reserved2[3];
        __be32  qp_srq_n;
 };
 
@@ -650,6 +680,12 @@ enum {
        CQE_RSS_HTYPE_L4        = 0x3 << 2,
 };
 
+enum {
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH        = 0x0,
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6       = 0x1,
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4       = 0x2,
+};
+
 enum {
        CQE_L2_OK       = 1 << 0,
        CQE_L3_OK       = 1 << 1,
index 5162f3533042825d03ea5c6244d1735bd55a8663..1e3006dcf35d735175ebe054d8867f89b994231c 100644 (file)
@@ -115,6 +115,11 @@ enum {
        MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum {
+       MLX5_ATOMIC_OPS_CMP_SWAP        = 1 << 0,
+       MLX5_ATOMIC_OPS_FETCH_ADD       = 1 << 1,
+};
+
 enum mlx5_page_fault_resume_flags {
        MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
        MLX5_PAGE_FAULT_RESUME_WRITE     = 1 << 1,
@@ -341,9 +346,11 @@ struct mlx5_core_mr {
 };
 
 enum mlx5_res_type {
-       MLX5_RES_QP,
-       MLX5_RES_SRQ,
-       MLX5_RES_XSRQ,
+       MLX5_RES_QP     = MLX5_EVENT_QUEUE_TYPE_QP,
+       MLX5_RES_RQ     = MLX5_EVENT_QUEUE_TYPE_RQ,
+       MLX5_RES_SQ     = MLX5_EVENT_QUEUE_TYPE_SQ,
+       MLX5_RES_SRQ    = 3,
+       MLX5_RES_XSRQ   = 4,
 };
 
 struct mlx5_core_rsc_common {
@@ -651,13 +658,6 @@ extern struct workqueue_struct *mlx5_core_wq;
        .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field),      \
        .struct_size_bytes   = sizeof((struct ib_unpacked_ ## header *)0)->field
 
-struct ib_field {
-       size_t struct_offset_bytes;
-       size_t struct_size_bytes;
-       int    offset_bits;
-       int    size_bits;
-};
-
 static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
 {
        return pci_get_drvdata(pdev);
index 68d73f82e009e60654952f237c0f9fed01bd67e5..51f1e540fc2b83bf9dd143637bad8acdbc44d0fd 100644 (file)
@@ -66,6 +66,11 @@ enum {
        MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN   = 0x3
 };
 
+enum {
+       MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
+       MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
+};
+
 enum {
        MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
        MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
@@ -202,15 +207,15 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         outer_dmac[0x1];
        u8         outer_smac[0x1];
        u8         outer_ether_type[0x1];
-       u8         reserved_0[0x1];
+       u8         reserved_at_3[0x1];
        u8         outer_first_prio[0x1];
        u8         outer_first_cfi[0x1];
        u8         outer_first_vid[0x1];
-       u8         reserved_1[0x1];
+       u8         reserved_at_7[0x1];
        u8         outer_second_prio[0x1];
        u8         outer_second_cfi[0x1];
        u8         outer_second_vid[0x1];
-       u8         reserved_2[0x1];
+       u8         reserved_at_b[0x1];
        u8         outer_sip[0x1];
        u8         outer_dip[0x1];
        u8         outer_frag[0x1];
@@ -225,21 +230,21 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         outer_gre_protocol[0x1];
        u8         outer_gre_key[0x1];
        u8         outer_vxlan_vni[0x1];
-       u8         reserved_3[0x5];
+       u8         reserved_at_1a[0x5];
        u8         source_eswitch_port[0x1];
 
        u8         inner_dmac[0x1];
        u8         inner_smac[0x1];
        u8         inner_ether_type[0x1];
-       u8         reserved_4[0x1];
+       u8         reserved_at_23[0x1];
        u8         inner_first_prio[0x1];
        u8         inner_first_cfi[0x1];
        u8         inner_first_vid[0x1];
-       u8         reserved_5[0x1];
+       u8         reserved_at_27[0x1];
        u8         inner_second_prio[0x1];
        u8         inner_second_cfi[0x1];
        u8         inner_second_vid[0x1];
-       u8         reserved_6[0x1];
+       u8         reserved_at_2b[0x1];
        u8         inner_sip[0x1];
        u8         inner_dip[0x1];
        u8         inner_frag[0x1];
@@ -251,37 +256,37 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         inner_tcp_sport[0x1];
        u8         inner_tcp_dport[0x1];
        u8         inner_tcp_flags[0x1];
-       u8         reserved_7[0x9];
+       u8         reserved_at_37[0x9];
 
-       u8         reserved_8[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_flow_table_prop_layout_bits {
        u8         ft_support[0x1];
-       u8         reserved_0[0x2];
+       u8         reserved_at_1[0x2];
        u8         flow_modify_en[0x1];
        u8         modify_root[0x1];
        u8         identified_miss_table_mode[0x1];
        u8         flow_table_modify[0x1];
-       u8         reserved_1[0x19];
+       u8         reserved_at_7[0x19];
 
-       u8         reserved_2[0x2];
+       u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
-       u8         reserved_3[0x10];
+       u8         reserved_at_28[0x10];
        u8         max_ft_level[0x8];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_40[0x20];
 
-       u8         reserved_5[0x18];
+       u8         reserved_at_60[0x18];
        u8         log_max_ft_num[0x8];
 
-       u8         reserved_6[0x18];
+       u8         reserved_at_80[0x18];
        u8         log_max_destination[0x8];
 
-       u8         reserved_7[0x18];
+       u8         reserved_at_a0[0x18];
        u8         log_max_flow[0x8];
 
-       u8         reserved_8[0x40];
+       u8         reserved_at_c0[0x40];
 
        struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support;
 
@@ -293,13 +298,13 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
        u8         receive[0x1];
        u8         write[0x1];
        u8         read[0x1];
-       u8         reserved_0[0x1];
+       u8         reserved_at_4[0x1];
        u8         srq_receive[0x1];
-       u8         reserved_1[0x1a];
+       u8         reserved_at_6[0x1a];
 };
 
 struct mlx5_ifc_ipv4_layout_bits {
-       u8         reserved_0[0x60];
+       u8         reserved_at_0[0x60];
 
        u8         ipv4[0x20];
 };
@@ -311,7 +316,7 @@ struct mlx5_ifc_ipv6_layout_bits {
 union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
        struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
        struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
-       u8         reserved_0[0x80];
+       u8         reserved_at_0[0x80];
 };
 
 struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
@@ -331,15 +336,15 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
        u8         ip_dscp[0x6];
        u8         ip_ecn[0x2];
        u8         vlan_tag[0x1];
-       u8         reserved_0[0x1];
+       u8         reserved_at_91[0x1];
        u8         frag[0x1];
-       u8         reserved_1[0x4];
+       u8         reserved_at_93[0x4];
        u8         tcp_flags[0x9];
 
        u8         tcp_sport[0x10];
        u8         tcp_dport[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_c0[0x20];
 
        u8         udp_sport[0x10];
        u8         udp_dport[0x10];
@@ -350,9 +355,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
 };
 
 struct mlx5_ifc_fte_match_set_misc_bits {
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         source_port[0x10];
 
        u8         outer_second_prio[0x3];
@@ -364,31 +369,31 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 
        u8         outer_second_vlan_tag[0x1];
        u8         inner_second_vlan_tag[0x1];
-       u8         reserved_2[0xe];
+       u8         reserved_at_62[0xe];
        u8         gre_protocol[0x10];
 
        u8         gre_key_h[0x18];
        u8         gre_key_l[0x8];
 
        u8         vxlan_vni[0x18];
-       u8         reserved_3[0x8];
+       u8         reserved_at_b8[0x8];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_c0[0x20];
 
-       u8         reserved_5[0xc];
+       u8         reserved_at_e0[0xc];
        u8         outer_ipv6_flow_label[0x14];
 
-       u8         reserved_6[0xc];
+       u8         reserved_at_100[0xc];
        u8         inner_ipv6_flow_label[0x14];
 
-       u8         reserved_7[0xe0];
+       u8         reserved_at_120[0xe0];
 };
 
 struct mlx5_ifc_cmd_pas_bits {
        u8         pa_h[0x20];
 
        u8         pa_l[0x14];
-       u8         reserved_0[0xc];
+       u8         reserved_at_34[0xc];
 };
 
 struct mlx5_ifc_uint64_bits {
@@ -413,31 +418,31 @@ enum {
 struct mlx5_ifc_ads_bits {
        u8         fl[0x1];
        u8         free_ar[0x1];
-       u8         reserved_0[0xe];
+       u8         reserved_at_2[0xe];
        u8         pkey_index[0x10];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_20[0x8];
        u8         grh[0x1];
        u8         mlid[0x7];
        u8         rlid[0x10];
 
        u8         ack_timeout[0x5];
-       u8         reserved_2[0x3];
+       u8         reserved_at_45[0x3];
        u8         src_addr_index[0x8];
-       u8         reserved_3[0x4];
+       u8         reserved_at_50[0x4];
        u8         stat_rate[0x4];
        u8         hop_limit[0x8];
 
-       u8         reserved_4[0x4];
+       u8         reserved_at_60[0x4];
        u8         tclass[0x8];
        u8         flow_label[0x14];
 
        u8         rgid_rip[16][0x8];
 
-       u8         reserved_5[0x4];
+       u8         reserved_at_100[0x4];
        u8         f_dscp[0x1];
        u8         f_ecn[0x1];
-       u8         reserved_6[0x1];
+       u8         reserved_at_106[0x1];
        u8         f_eth_prio[0x1];
        u8         ecn[0x2];
        u8         dscp[0x6];
@@ -453,25 +458,25 @@ struct mlx5_ifc_ads_bits {
 };
 
 struct mlx5_ifc_flow_table_nic_cap_bits {
-       u8         reserved_0[0x200];
+       u8         reserved_at_0[0x200];
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
-       u8         reserved_1[0x200];
+       u8         reserved_at_400[0x200];
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer;
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit;
 
-       u8         reserved_2[0x200];
+       u8         reserved_at_a00[0x200];
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
-       u8         reserved_3[0x7200];
+       u8         reserved_at_e00[0x7200];
 };
 
 struct mlx5_ifc_flow_table_eswitch_cap_bits {
-       u8     reserved_0[0x200];
+       u8     reserved_at_0[0x200];
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
@@ -479,7 +484,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress;
 
-       u8      reserved_1[0x7800];
+       u8      reserved_at_800[0x7800];
 };
 
 struct mlx5_ifc_e_switch_cap_bits {
@@ -488,9 +493,9 @@ struct mlx5_ifc_e_switch_cap_bits {
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_insert_if_not_exist[0x1];
        u8         vport_cvlan_insert_overwrite[0x1];
-       u8         reserved_0[0x1b];
+       u8         reserved_at_5[0x1b];
 
-       u8         reserved_1[0x7e0];
+       u8         reserved_at_20[0x7e0];
 };
 
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
@@ -499,51 +504,51 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
        u8         lro_cap[0x1];
        u8         lro_psh_flag[0x1];
        u8         lro_time_stamp[0x1];
-       u8         reserved_0[0x3];
+       u8         reserved_at_5[0x3];
        u8         self_lb_en_modifiable[0x1];
-       u8         reserved_1[0x2];
+       u8         reserved_at_9[0x2];
        u8         max_lso_cap[0x5];
-       u8         reserved_2[0x4];
+       u8         reserved_at_10[0x4];
        u8         rss_ind_tbl_cap[0x4];
-       u8         reserved_3[0x3];
+       u8         reserved_at_18[0x3];
        u8         tunnel_lso_const_out_ip_id[0x1];
-       u8         reserved_4[0x2];
+       u8         reserved_at_1c[0x2];
        u8         tunnel_statless_gre[0x1];
        u8         tunnel_stateless_vxlan[0x1];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_20[0x20];
 
-       u8         reserved_6[0x10];
+       u8         reserved_at_40[0x10];
        u8         lro_min_mss_size[0x10];
 
-       u8         reserved_7[0x120];
+       u8         reserved_at_60[0x120];
 
        u8         lro_timer_supported_periods[4][0x20];
 
-       u8         reserved_8[0x600];
+       u8         reserved_at_200[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
        u8         roce_apm[0x1];
-       u8         reserved_0[0x1f];
+       u8         reserved_at_1[0x1f];
 
-       u8         reserved_1[0x60];
+       u8         reserved_at_20[0x60];
 
-       u8         reserved_2[0xc];
+       u8         reserved_at_80[0xc];
        u8         l3_type[0x4];
-       u8         reserved_3[0x8];
+       u8         reserved_at_90[0x8];
        u8         roce_version[0x8];
 
-       u8         reserved_4[0x10];
+       u8         reserved_at_a0[0x10];
        u8         r_roce_dest_udp_port[0x10];
 
        u8         r_roce_max_src_udp_port[0x10];
        u8         r_roce_min_src_udp_port[0x10];
 
-       u8         reserved_5[0x10];
+       u8         reserved_at_e0[0x10];
        u8         roce_address_table_size[0x10];
 
-       u8         reserved_6[0x700];
+       u8         reserved_at_100[0x700];
 };
 
 enum {
@@ -571,32 +576,35 @@ enum {
 };
 
 struct mlx5_ifc_atomic_caps_bits {
-       u8         reserved_0[0x40];
+       u8         reserved_at_0[0x40];
+
+       u8         atomic_req_8B_endianess_mode[0x2];
+       u8         reserved_at_42[0x4];
+       u8         supported_atomic_req_8B_endianess_mode_1[0x1];
 
-       u8         atomic_req_endianness[0x1];
-       u8         reserved_1[0x1f];
+       u8         reserved_at_47[0x19];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_80[0x10];
        u8         atomic_operations[0x10];
 
-       u8         reserved_4[0x10];
+       u8         reserved_at_a0[0x10];
        u8         atomic_size_qp[0x10];
 
-       u8         reserved_5[0x10];
+       u8         reserved_at_c0[0x10];
        u8         atomic_size_dc[0x10];
 
-       u8         reserved_6[0x720];
+       u8         reserved_at_e0[0x720];
 };
 
 struct mlx5_ifc_odp_cap_bits {
-       u8         reserved_0[0x40];
+       u8         reserved_at_0[0x40];
 
        u8         sig[0x1];
-       u8         reserved_1[0x1f];
+       u8         reserved_at_41[0x1f];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 
        struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps;
 
@@ -604,7 +612,7 @@ struct mlx5_ifc_odp_cap_bits {
 
        struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps;
 
-       u8         reserved_3[0x720];
+       u8         reserved_at_e0[0x720];
 };
 
 enum {
@@ -652,55 +660,55 @@ enum {
 };
 
 struct mlx5_ifc_cmd_hca_cap_bits {
-       u8         reserved_0[0x80];
+       u8         reserved_at_0[0x80];
 
        u8         log_max_srq_sz[0x8];
        u8         log_max_qp_sz[0x8];
-       u8         reserved_1[0xb];
+       u8         reserved_at_90[0xb];
        u8         log_max_qp[0x5];
 
-       u8         reserved_2[0xb];
+       u8         reserved_at_a0[0xb];
        u8         log_max_srq[0x5];
-       u8         reserved_3[0x10];
+       u8         reserved_at_b0[0x10];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_c0[0x8];
        u8         log_max_cq_sz[0x8];
-       u8         reserved_5[0xb];
+       u8         reserved_at_d0[0xb];
        u8         log_max_cq[0x5];
 
        u8         log_max_eq_sz[0x8];
-       u8         reserved_6[0x2];
+       u8         reserved_at_e8[0x2];
        u8         log_max_mkey[0x6];
-       u8         reserved_7[0xc];
+       u8         reserved_at_f0[0xc];
        u8         log_max_eq[0x4];
 
        u8         max_indirection[0x8];
-       u8         reserved_8[0x1];
+       u8         reserved_at_108[0x1];
        u8         log_max_mrw_sz[0x7];
-       u8         reserved_9[0x2];
+       u8         reserved_at_110[0x2];
        u8         log_max_bsf_list_size[0x6];
-       u8         reserved_10[0x2];
+       u8         reserved_at_118[0x2];
        u8         log_max_klm_list_size[0x6];
 
-       u8         reserved_11[0xa];
+       u8         reserved_at_120[0xa];
        u8         log_max_ra_req_dc[0x6];
-       u8         reserved_12[0xa];
+       u8         reserved_at_130[0xa];
        u8         log_max_ra_res_dc[0x6];
 
-       u8         reserved_13[0xa];
+       u8         reserved_at_140[0xa];
        u8         log_max_ra_req_qp[0x6];
-       u8         reserved_14[0xa];
+       u8         reserved_at_150[0xa];
        u8         log_max_ra_res_qp[0x6];
 
        u8         pad_cap[0x1];
        u8         cc_query_allowed[0x1];
        u8         cc_modify_allowed[0x1];
-       u8         reserved_15[0xd];
+       u8         reserved_at_163[0xd];
        u8         gid_table_size[0x10];
 
        u8         out_of_seq_cnt[0x1];
        u8         vport_counters[0x1];
-       u8         reserved_16[0x4];
+       u8         reserved_at_182[0x4];
        u8         max_qp_cnt[0xa];
        u8         pkey_table_size[0x10];
 
@@ -708,157 +716,158 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         vhca_group_manager[0x1];
        u8         ib_virt[0x1];
        u8         eth_virt[0x1];
-       u8         reserved_17[0x1];
+       u8         reserved_at_1a4[0x1];
        u8         ets[0x1];
        u8         nic_flow_table[0x1];
        u8         eswitch_flow_table[0x1];
        u8         early_vf_enable;
-       u8         reserved_18[0x2];
+       u8         reserved_at_1a8[0x2];
        u8         local_ca_ack_delay[0x5];
-       u8         reserved_19[0x6];
+       u8         reserved_at_1af[0x6];
        u8         port_type[0x2];
        u8         num_ports[0x8];
 
-       u8         reserved_20[0x3];
+       u8         reserved_at_1bf[0x3];
        u8         log_max_msg[0x5];
-       u8         reserved_21[0x18];
+       u8         reserved_at_1c7[0x18];
 
        u8         stat_rate_support[0x10];
-       u8         reserved_22[0xc];
+       u8         reserved_at_1ef[0xc];
        u8         cqe_version[0x4];
 
        u8         compact_address_vector[0x1];
-       u8         reserved_23[0xe];
+       u8         reserved_at_200[0xe];
        u8         drain_sigerr[0x1];
        u8         cmdif_checksum[0x2];
        u8         sigerr_cqe[0x1];
-       u8         reserved_24[0x1];
+       u8         reserved_at_212[0x1];
        u8         wq_signature[0x1];
        u8         sctr_data_cqe[0x1];
-       u8         reserved_25[0x1];
+       u8         reserved_at_215[0x1];
        u8         sho[0x1];
        u8         tph[0x1];
        u8         rf[0x1];
        u8         dct[0x1];
-       u8         reserved_26[0x1];
+       u8         reserved_at_21a[0x1];
        u8         eth_net_offloads[0x1];
        u8         roce[0x1];
        u8         atomic[0x1];
-       u8         reserved_27[0x1];
+       u8         reserved_at_21e[0x1];
 
        u8         cq_oi[0x1];
        u8         cq_resize[0x1];
        u8         cq_moderation[0x1];
-       u8         reserved_28[0x3];
+       u8         reserved_at_222[0x3];
        u8         cq_eq_remap[0x1];
        u8         pg[0x1];
        u8         block_lb_mc[0x1];
-       u8         reserved_29[0x1];
+       u8         reserved_at_228[0x1];
        u8         scqe_break_moderation[0x1];
-       u8         reserved_30[0x1];
+       u8         reserved_at_22a[0x1];
        u8         cd[0x1];
-       u8         reserved_31[0x1];
+       u8         reserved_at_22c[0x1];
        u8         apm[0x1];
-       u8         reserved_32[0x7];
+       u8         reserved_at_22e[0x7];
        u8         qkv[0x1];
        u8         pkv[0x1];
-       u8         reserved_33[0x4];
+       u8         reserved_at_237[0x4];
        u8         xrc[0x1];
        u8         ud[0x1];
        u8         uc[0x1];
        u8         rc[0x1];
 
-       u8         reserved_34[0xa];
+       u8         reserved_at_23f[0xa];
        u8         uar_sz[0x6];
-       u8         reserved_35[0x8];
+       u8         reserved_at_24f[0x8];
        u8         log_pg_sz[0x8];
 
        u8         bf[0x1];
-       u8         reserved_36[0x1];
+       u8         reserved_at_260[0x1];
        u8         pad_tx_eth_packet[0x1];
-       u8         reserved_37[0x8];
+       u8         reserved_at_262[0x8];
        u8         log_bf_reg_size[0x5];
-       u8         reserved_38[0x10];
+       u8         reserved_at_26f[0x10];
 
-       u8         reserved_39[0x10];
+       u8         reserved_at_27f[0x10];
        u8         max_wqe_sz_sq[0x10];
 
-       u8         reserved_40[0x10];
+       u8         reserved_at_29f[0x10];
        u8         max_wqe_sz_rq[0x10];
 
-       u8         reserved_41[0x10];
+       u8         reserved_at_2bf[0x10];
        u8         max_wqe_sz_sq_dc[0x10];
 
-       u8         reserved_42[0x7];
+       u8         reserved_at_2df[0x7];
        u8         max_qp_mcg[0x19];
 
-       u8         reserved_43[0x18];
+       u8         reserved_at_2ff[0x18];
        u8         log_max_mcg[0x8];
 
-       u8         reserved_44[0x3];
+       u8         reserved_at_31f[0x3];
        u8         log_max_transport_domain[0x5];
-       u8         reserved_45[0x3];
+       u8         reserved_at_327[0x3];
        u8         log_max_pd[0x5];
-       u8         reserved_46[0xb];
+       u8         reserved_at_32f[0xb];
        u8         log_max_xrcd[0x5];
 
-       u8         reserved_47[0x20];
+       u8         reserved_at_33f[0x20];
 
-       u8         reserved_48[0x3];
+       u8         reserved_at_35f[0x3];
        u8         log_max_rq[0x5];
-       u8         reserved_49[0x3];
+       u8         reserved_at_367[0x3];
        u8         log_max_sq[0x5];
-       u8         reserved_50[0x3];
+       u8         reserved_at_36f[0x3];
        u8         log_max_tir[0x5];
-       u8         reserved_51[0x3];
+       u8         reserved_at_377[0x3];
        u8         log_max_tis[0x5];
 
        u8         basic_cyclic_rcv_wqe[0x1];
-       u8         reserved_52[0x2];
+       u8         reserved_at_380[0x2];
        u8         log_max_rmp[0x5];
-       u8         reserved_53[0x3];
+       u8         reserved_at_387[0x3];
        u8         log_max_rqt[0x5];
-       u8         reserved_54[0x3];
+       u8         reserved_at_38f[0x3];
        u8         log_max_rqt_size[0x5];
-       u8         reserved_55[0x3];
+       u8         reserved_at_397[0x3];
        u8         log_max_tis_per_sq[0x5];
 
-       u8         reserved_56[0x3];
+       u8         reserved_at_39f[0x3];
        u8         log_max_stride_sz_rq[0x5];
-       u8         reserved_57[0x3];
+       u8         reserved_at_3a7[0x3];
        u8         log_min_stride_sz_rq[0x5];
-       u8         reserved_58[0x3];
+       u8         reserved_at_3af[0x3];
        u8         log_max_stride_sz_sq[0x5];
-       u8         reserved_59[0x3];
+       u8         reserved_at_3b7[0x3];
        u8         log_min_stride_sz_sq[0x5];
 
-       u8         reserved_60[0x1b];
+       u8         reserved_at_3bf[0x1b];
        u8         log_max_wq_sz[0x5];
 
        u8         nic_vport_change_event[0x1];
-       u8         reserved_61[0xa];
+       u8         reserved_at_3e0[0xa];
        u8         log_max_vlan_list[0x5];
-       u8         reserved_62[0x3];
+       u8         reserved_at_3ef[0x3];
        u8         log_max_current_mc_list[0x5];
-       u8         reserved_63[0x3];
+       u8         reserved_at_3f7[0x3];
        u8         log_max_current_uc_list[0x5];
 
-       u8         reserved_64[0x80];
+       u8         reserved_at_3ff[0x80];
 
-       u8         reserved_65[0x3];
+       u8         reserved_at_47f[0x3];
        u8         log_max_l2_table[0x5];
-       u8         reserved_66[0x8];
+       u8         reserved_at_487[0x8];
        u8         log_uar_page_sz[0x10];
 
-       u8         reserved_67[0x40];
+       u8         reserved_at_49f[0x20];
+       u8         device_frequency_mhz[0x20];
        u8         device_frequency_khz[0x20];
-       u8         reserved_68[0x5f];
+       u8         reserved_at_4ff[0x5f];
        u8         cqe_zip[0x1];
 
        u8         cqe_zip_timeout[0x10];
        u8         cqe_zip_max_num[0x10];
 
-       u8         reserved_69[0x220];
+       u8         reserved_at_57f[0x220];
 };
 
 enum mlx5_flow_destination_type {
@@ -871,7 +880,7 @@ struct mlx5_ifc_dest_format_struct_bits {
        u8         destination_type[0x8];
        u8         destination_id[0x18];
 
-       u8         reserved_0[0x20];
+       u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_fte_match_param_bits {
@@ -881,7 +890,7 @@ struct mlx5_ifc_fte_match_param_bits {
 
        struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
 
-       u8         reserved_0[0xa00];
+       u8         reserved_at_600[0xa00];
 };
 
 enum {
@@ -913,18 +922,18 @@ struct mlx5_ifc_wq_bits {
        u8         wq_signature[0x1];
        u8         end_padding_mode[0x2];
        u8         cd_slave[0x1];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         hds_skip_first_sge[0x1];
        u8         log2_hds_buf_size[0x3];
-       u8         reserved_1[0x7];
+       u8         reserved_at_24[0x7];
        u8         page_offset[0x5];
        u8         lwm[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         pd[0x18];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_60[0x8];
        u8         uar_page[0x18];
 
        u8         dbr_addr[0x40];
@@ -933,60 +942,60 @@ struct mlx5_ifc_wq_bits {
 
        u8         sw_counter[0x20];
 
-       u8         reserved_4[0xc];
+       u8         reserved_at_100[0xc];
        u8         log_wq_stride[0x4];
-       u8         reserved_5[0x3];
+       u8         reserved_at_110[0x3];
        u8         log_wq_pg_sz[0x5];
-       u8         reserved_6[0x3];
+       u8         reserved_at_118[0x3];
        u8         log_wq_sz[0x5];
 
-       u8         reserved_7[0x4e0];
+       u8         reserved_at_120[0x4e0];
 
        struct mlx5_ifc_cmd_pas_bits pas[0];
 };
 
 struct mlx5_ifc_rq_num_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         rq_num[0x18];
 };
 
 struct mlx5_ifc_mac_address_layout_bits {
-       u8         reserved_0[0x10];
+       u8         reserved_at_0[0x10];
        u8         mac_addr_47_32[0x10];
 
        u8         mac_addr_31_0[0x20];
 };
 
 struct mlx5_ifc_vlan_layout_bits {
-       u8         reserved_0[0x14];
+       u8         reserved_at_0[0x14];
        u8         vlan[0x0c];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_cong_control_r_roce_ecn_np_bits {
-       u8         reserved_0[0xa0];
+       u8         reserved_at_0[0xa0];
 
        u8         min_time_between_cnps[0x20];
 
-       u8         reserved_1[0x12];
+       u8         reserved_at_c0[0x12];
        u8         cnp_dscp[0x6];
-       u8         reserved_2[0x5];
+       u8         reserved_at_d8[0x5];
        u8         cnp_802p_prio[0x3];
 
-       u8         reserved_3[0x720];
+       u8         reserved_at_e0[0x720];
 };
 
 struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits {
-       u8         reserved_0[0x60];
+       u8         reserved_at_0[0x60];
 
-       u8         reserved_1[0x4];
+       u8         reserved_at_60[0x4];
        u8         clamp_tgt_rate[0x1];
-       u8         reserved_2[0x3];
+       u8         reserved_at_65[0x3];
        u8         clamp_tgt_rate_after_time_inc[0x1];
-       u8         reserved_3[0x17];
+       u8         reserved_at_69[0x17];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_80[0x20];
 
        u8         rpg_time_reset[0x20];
 
@@ -1006,7 +1015,7 @@ struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits {
 
        u8         rpg_min_rate[0x20];
 
-       u8         reserved_5[0xe0];
+       u8         reserved_at_1c0[0xe0];
 
        u8         rate_to_set_on_first_cnp[0x20];
 
@@ -1016,15 +1025,15 @@ struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits {
 
        u8         rate_reduce_monitor_period[0x20];
 
-       u8         reserved_6[0x20];
+       u8         reserved_at_320[0x20];
 
        u8         initial_alpha_value[0x20];
 
-       u8         reserved_7[0x4a0];
+       u8         reserved_at_360[0x4a0];
 };
 
 struct mlx5_ifc_cong_control_802_1qau_rp_bits {
-       u8         reserved_0[0x80];
+       u8         reserved_at_0[0x80];
 
        u8         rppp_max_rps[0x20];
 
@@ -1046,7 +1055,7 @@ struct mlx5_ifc_cong_control_802_1qau_rp_bits {
 
        u8         rpg_min_rate[0x20];
 
-       u8         reserved_1[0x640];
+       u8         reserved_at_1c0[0x640];
 };
 
 enum {
@@ -1196,7 +1205,7 @@ struct mlx5_ifc_phys_layer_cntrs_bits {
 
        u8         successful_recovery_events[0x20];
 
-       u8         reserved_0[0x180];
+       u8         reserved_at_640[0x180];
 };
 
 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
@@ -1204,7 +1213,7 @@ struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
 
        u8         transmit_queue_low[0x20];
 
-       u8         reserved_0[0x780];
+       u8         reserved_at_40[0x780];
 };
 
 struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
@@ -1212,7 +1221,7 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
 
        u8         rx_octets_low[0x20];
 
-       u8         reserved_0[0xc0];
+       u8         reserved_at_40[0xc0];
 
        u8         rx_frames_high[0x20];
 
@@ -1222,7 +1231,7 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
 
        u8         tx_octets_low[0x20];
 
-       u8         reserved_1[0xc0];
+       u8         reserved_at_180[0xc0];
 
        u8         tx_frames_high[0x20];
 
@@ -1248,7 +1257,7 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
 
        u8         rx_pause_transition_low[0x20];
 
-       u8         reserved_2[0x400];
+       u8         reserved_at_3c0[0x400];
 };
 
 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
@@ -1256,7 +1265,7 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
 
        u8         port_transmit_wait_low[0x20];
 
-       u8         reserved_0[0x780];
+       u8         reserved_at_40[0x780];
 };
 
 struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@ -1324,7 +1333,7 @@ struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
 
        u8         dot3out_pause_frames_low[0x20];
 
-       u8         reserved_0[0x3c0];
+       u8         reserved_at_400[0x3c0];
 };
 
 struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits {
@@ -1412,7 +1421,7 @@ struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits {
 
        u8         ether_stats_pkts8192to10239octets_low[0x20];
 
-       u8         reserved_0[0x280];
+       u8         reserved_at_540[0x280];
 };
 
 struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits {
@@ -1468,7 +1477,7 @@ struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits {
 
        u8         if_out_broadcast_pkts_low[0x20];
 
-       u8         reserved_0[0x480];
+       u8         reserved_at_340[0x480];
 };
 
 struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
@@ -1548,54 +1557,54 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
 
        u8         a_pause_mac_ctrl_frames_transmitted_low[0x20];
 
-       u8         reserved_0[0x300];
+       u8         reserved_at_4c0[0x300];
 };
 
 struct mlx5_ifc_cmd_inter_comp_event_bits {
        u8         command_completion_vector[0x20];
 
-       u8         reserved_0[0xc0];
+       u8         reserved_at_20[0xc0];
 };
 
 struct mlx5_ifc_stall_vl_event_bits {
-       u8         reserved_0[0x18];
+       u8         reserved_at_0[0x18];
        u8         port_num[0x1];
-       u8         reserved_1[0x3];
+       u8         reserved_at_19[0x3];
        u8         vl[0x4];
 
-       u8         reserved_2[0xa0];
+       u8         reserved_at_20[0xa0];
 };
 
 struct mlx5_ifc_db_bf_congestion_event_bits {
        u8         event_subtype[0x8];
-       u8         reserved_0[0x8];
+       u8         reserved_at_8[0x8];
        u8         congestion_level[0x8];
-       u8         reserved_1[0x8];
+       u8         reserved_at_18[0x8];
 
-       u8         reserved_2[0xa0];
+       u8         reserved_at_20[0xa0];
 };
 
 struct mlx5_ifc_gpio_event_bits {
-       u8         reserved_0[0x60];
+       u8         reserved_at_0[0x60];
 
        u8         gpio_event_hi[0x20];
 
        u8         gpio_event_lo[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_a0[0x40];
 };
 
 struct mlx5_ifc_port_state_change_event_bits {
-       u8         reserved_0[0x40];
+       u8         reserved_at_0[0x40];
 
        u8         port_num[0x4];
-       u8         reserved_1[0x1c];
+       u8         reserved_at_44[0x1c];
 
-       u8         reserved_2[0x80];
+       u8         reserved_at_60[0x80];
 };
 
 struct mlx5_ifc_dropped_packet_logged_bits {
-       u8         reserved_0[0xe0];
+       u8         reserved_at_0[0xe0];
 };
 
 enum {
@@ -1604,15 +1613,15 @@ enum {
 };
 
 struct mlx5_ifc_cq_error_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         cqn[0x18];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_20[0x20];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_40[0x18];
        u8         syndrome[0x8];
 
-       u8         reserved_3[0x80];
+       u8         reserved_at_60[0x80];
 };
 
 struct mlx5_ifc_rdma_page_fault_event_bits {
@@ -1620,14 +1629,14 @@ struct mlx5_ifc_rdma_page_fault_event_bits {
 
        u8         r_key[0x20];
 
-       u8         reserved_0[0x10];
+       u8         reserved_at_40[0x10];
        u8         packet_len[0x10];
 
        u8         rdma_op_len[0x20];
 
        u8         rdma_va[0x40];
 
-       u8         reserved_1[0x5];
+       u8         reserved_at_c0[0x5];
        u8         rdma[0x1];
        u8         write[0x1];
        u8         requestor[0x1];
@@ -1637,15 +1646,15 @@ struct mlx5_ifc_rdma_page_fault_event_bits {
 struct mlx5_ifc_wqe_associated_page_fault_event_bits {
        u8         bytes_committed[0x20];
 
-       u8         reserved_0[0x10];
+       u8         reserved_at_20[0x10];
        u8         wqe_index[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_40[0x10];
        u8         len[0x10];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_60[0x60];
 
-       u8         reserved_3[0x5];
+       u8         reserved_at_c0[0x5];
        u8         rdma[0x1];
        u8         write_read[0x1];
        u8         requestor[0x1];
@@ -1653,26 +1662,26 @@ struct mlx5_ifc_wqe_associated_page_fault_event_bits {
 };
 
 struct mlx5_ifc_qp_events_bits {
-       u8         reserved_0[0xa0];
+       u8         reserved_at_0[0xa0];
 
        u8         type[0x8];
-       u8         reserved_1[0x18];
+       u8         reserved_at_a8[0x18];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_c0[0x8];
        u8         qpn_rqn_sqn[0x18];
 };
 
 struct mlx5_ifc_dct_events_bits {
-       u8         reserved_0[0xc0];
+       u8         reserved_at_0[0xc0];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_c0[0x8];
        u8         dct_number[0x18];
 };
 
 struct mlx5_ifc_comp_event_bits {
-       u8         reserved_0[0xc0];
+       u8         reserved_at_0[0xc0];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_c0[0x8];
        u8         cq_number[0x18];
 };
 
@@ -1745,41 +1754,41 @@ enum {
 
 struct mlx5_ifc_qpc_bits {
        u8         state[0x4];
-       u8         reserved_0[0x4];
+       u8         reserved_at_4[0x4];
        u8         st[0x8];
-       u8         reserved_1[0x3];
+       u8         reserved_at_10[0x3];
        u8         pm_state[0x2];
-       u8         reserved_2[0x7];
+       u8         reserved_at_15[0x7];
        u8         end_padding_mode[0x2];
-       u8         reserved_3[0x2];
+       u8         reserved_at_1e[0x2];
 
        u8         wq_signature[0x1];
        u8         block_lb_mc[0x1];
        u8         atomic_like_write_en[0x1];
        u8         latency_sensitive[0x1];
-       u8         reserved_4[0x1];
+       u8         reserved_at_24[0x1];
        u8         drain_sigerr[0x1];
-       u8         reserved_5[0x2];
+       u8         reserved_at_26[0x2];
        u8         pd[0x18];
 
        u8         mtu[0x3];
        u8         log_msg_max[0x5];
-       u8         reserved_6[0x1];
+       u8         reserved_at_48[0x1];
        u8         log_rq_size[0x4];
        u8         log_rq_stride[0x3];
        u8         no_sq[0x1];
        u8         log_sq_size[0x4];
-       u8         reserved_7[0x6];
+       u8         reserved_at_55[0x6];
        u8         rlky[0x1];
-       u8         reserved_8[0x4];
+       u8         reserved_at_5c[0x4];
 
        u8         counter_set_id[0x8];
        u8         uar_page[0x18];
 
-       u8         reserved_9[0x8];
+       u8         reserved_at_80[0x8];
        u8         user_index[0x18];
 
-       u8         reserved_10[0x3];
+       u8         reserved_at_a0[0x3];
        u8         log_page_size[0x5];
        u8         remote_qpn[0x18];
 
@@ -1788,66 +1797,66 @@ struct mlx5_ifc_qpc_bits {
        struct mlx5_ifc_ads_bits secondary_address_path;
 
        u8         log_ack_req_freq[0x4];
-       u8         reserved_11[0x4];
+       u8         reserved_at_384[0x4];
        u8         log_sra_max[0x3];
-       u8         reserved_12[0x2];
+       u8         reserved_at_38b[0x2];
        u8         retry_count[0x3];
        u8         rnr_retry[0x3];
-       u8         reserved_13[0x1];
+       u8         reserved_at_393[0x1];
        u8         fre[0x1];
        u8         cur_rnr_retry[0x3];
        u8         cur_retry_count[0x3];
-       u8         reserved_14[0x5];
+       u8         reserved_at_39b[0x5];
 
-       u8         reserved_15[0x20];
+       u8         reserved_at_3a0[0x20];
 
-       u8         reserved_16[0x8];
+       u8         reserved_at_3c0[0x8];
        u8         next_send_psn[0x18];
 
-       u8         reserved_17[0x8];
+       u8         reserved_at_3e0[0x8];
        u8         cqn_snd[0x18];
 
-       u8         reserved_18[0x40];
+       u8         reserved_at_400[0x40];
 
-       u8         reserved_19[0x8];
+       u8         reserved_at_440[0x8];
        u8         last_acked_psn[0x18];
 
-       u8         reserved_20[0x8];
+       u8         reserved_at_460[0x8];
        u8         ssn[0x18];
 
-       u8         reserved_21[0x8];
+       u8         reserved_at_480[0x8];
        u8         log_rra_max[0x3];
-       u8         reserved_22[0x1];
+       u8         reserved_at_48b[0x1];
        u8         atomic_mode[0x4];
        u8         rre[0x1];
        u8         rwe[0x1];
        u8         rae[0x1];
-       u8         reserved_23[0x1];
+       u8         reserved_at_493[0x1];
        u8         page_offset[0x6];
-       u8         reserved_24[0x3];
+       u8         reserved_at_49a[0x3];
        u8         cd_slave_receive[0x1];
        u8         cd_slave_send[0x1];
        u8         cd_master[0x1];
 
-       u8         reserved_25[0x3];
+       u8         reserved_at_4a0[0x3];
        u8         min_rnr_nak[0x5];
        u8         next_rcv_psn[0x18];
 
-       u8         reserved_26[0x8];
+       u8         reserved_at_4c0[0x8];
        u8         xrcd[0x18];
 
-       u8         reserved_27[0x8];
+       u8         reserved_at_4e0[0x8];
        u8         cqn_rcv[0x18];
 
        u8         dbr_addr[0x40];
 
        u8         q_key[0x20];
 
-       u8         reserved_28[0x5];
+       u8         reserved_at_560[0x5];
        u8         rq_type[0x3];
        u8         srqn_rmpn[0x18];
 
-       u8         reserved_29[0x8];
+       u8         reserved_at_580[0x8];
        u8         rmsn[0x18];
 
        u8         hw_sq_wqebb_counter[0x10];
@@ -1857,33 +1866,33 @@ struct mlx5_ifc_qpc_bits {
 
        u8         sw_rq_counter[0x20];
 
-       u8         reserved_30[0x20];
+       u8         reserved_at_600[0x20];
 
-       u8         reserved_31[0xf];
+       u8         reserved_at_620[0xf];
        u8         cgs[0x1];
        u8         cs_req[0x8];
        u8         cs_res[0x8];
 
        u8         dc_access_key[0x40];
 
-       u8         reserved_32[0xc0];
+       u8         reserved_at_680[0xc0];
 };
 
 struct mlx5_ifc_roce_addr_layout_bits {
        u8         source_l3_address[16][0x8];
 
-       u8         reserved_0[0x3];
+       u8         reserved_at_80[0x3];
        u8         vlan_valid[0x1];
        u8         vlan_id[0xc];
        u8         source_mac_47_32[0x10];
 
        u8         source_mac_31_0[0x20];
 
-       u8         reserved_1[0x14];
+       u8         reserved_at_c0[0x14];
        u8         roce_l3_type[0x4];
        u8         roce_version[0x8];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_e0[0x20];
 };
 
 union mlx5_ifc_hca_cap_union_bits {
@@ -1895,7 +1904,7 @@ union mlx5_ifc_hca_cap_union_bits {
        struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
        struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
        struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
-       u8         reserved_0[0x8000];
+       u8         reserved_at_0[0x8000];
 };
 
 enum {
@@ -1905,24 +1914,24 @@ enum {
 };
 
 struct mlx5_ifc_flow_context_bits {
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 
        u8         group_id[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         flow_tag[0x18];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_60[0x10];
        u8         action[0x10];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_80[0x8];
        u8         destination_list_size[0x18];
 
-       u8         reserved_4[0x160];
+       u8         reserved_at_a0[0x160];
 
        struct mlx5_ifc_fte_match_param_bits match_value;
 
-       u8         reserved_5[0x600];
+       u8         reserved_at_1200[0x600];
 
        struct mlx5_ifc_dest_format_struct_bits destination[0];
 };
@@ -1935,43 +1944,43 @@ enum {
 struct mlx5_ifc_xrc_srqc_bits {
        u8         state[0x4];
        u8         log_xrc_srq_size[0x4];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         wq_signature[0x1];
        u8         cont_srq[0x1];
-       u8         reserved_1[0x1];
+       u8         reserved_at_22[0x1];
        u8         rlky[0x1];
        u8         basic_cyclic_rcv_wqe[0x1];
        u8         log_rq_stride[0x3];
        u8         xrcd[0x18];
 
        u8         page_offset[0x6];
-       u8         reserved_2[0x2];
+       u8         reserved_at_46[0x2];
        u8         cqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         user_index_equal_xrc_srqn[0x1];
-       u8         reserved_4[0x1];
+       u8         reserved_at_81[0x1];
        u8         log_page_size[0x6];
        u8         user_index[0x18];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_a0[0x20];
 
-       u8         reserved_6[0x8];
+       u8         reserved_at_c0[0x8];
        u8         pd[0x18];
 
        u8         lwm[0x10];
        u8         wqe_cnt[0x10];
 
-       u8         reserved_7[0x40];
+       u8         reserved_at_100[0x40];
 
        u8         db_record_addr_h[0x20];
 
        u8         db_record_addr_l[0x1e];
-       u8         reserved_8[0x2];
+       u8         reserved_at_17e[0x2];
 
-       u8         reserved_9[0x80];
+       u8         reserved_at_180[0x80];
 };
 
 struct mlx5_ifc_traffic_counter_bits {
@@ -1981,16 +1990,16 @@ struct mlx5_ifc_traffic_counter_bits {
 };
 
 struct mlx5_ifc_tisc_bits {
-       u8         reserved_0[0xc];
+       u8         reserved_at_0[0xc];
        u8         prio[0x4];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0x100];
+       u8         reserved_at_20[0x100];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_120[0x8];
        u8         transport_domain[0x18];
 
-       u8         reserved_4[0x3c0];
+       u8         reserved_at_140[0x3c0];
 };
 
 enum {
@@ -2015,31 +2024,31 @@ enum {
 };
 
 struct mlx5_ifc_tirc_bits {
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 
        u8         disp_type[0x4];
-       u8         reserved_1[0x1c];
+       u8         reserved_at_24[0x1c];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
-       u8         reserved_3[0x4];
+       u8         reserved_at_80[0x4];
        u8         lro_timeout_period_usecs[0x10];
        u8         lro_enable_mask[0x4];
        u8         lro_max_ip_payload_size[0x8];
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_a0[0x40];
 
-       u8         reserved_5[0x8];
+       u8         reserved_at_e0[0x8];
        u8         inline_rqn[0x18];
 
        u8         rx_hash_symmetric[0x1];
-       u8         reserved_6[0x1];
+       u8         reserved_at_101[0x1];
        u8         tunneled_offload_en[0x1];
-       u8         reserved_7[0x5];
+       u8         reserved_at_103[0x5];
        u8         indirect_table[0x18];
 
        u8         rx_hash_fn[0x4];
-       u8         reserved_8[0x2];
+       u8         reserved_at_124[0x2];
        u8         self_lb_block[0x2];
        u8         transport_domain[0x18];
 
@@ -2049,7 +2058,7 @@ struct mlx5_ifc_tirc_bits {
 
        struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner;
 
-       u8         reserved_9[0x4c0];
+       u8         reserved_at_2c0[0x4c0];
 };
 
 enum {
@@ -2060,39 +2069,39 @@ enum {
 struct mlx5_ifc_srqc_bits {
        u8         state[0x4];
        u8         log_srq_size[0x4];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         wq_signature[0x1];
        u8         cont_srq[0x1];
-       u8         reserved_1[0x1];
+       u8         reserved_at_22[0x1];
        u8         rlky[0x1];
-       u8         reserved_2[0x1];
+       u8         reserved_at_24[0x1];
        u8         log_rq_stride[0x3];
        u8         xrcd[0x18];
 
        u8         page_offset[0x6];
-       u8         reserved_3[0x2];
+       u8         reserved_at_46[0x2];
        u8         cqn[0x18];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_60[0x20];
 
-       u8         reserved_5[0x2];
+       u8         reserved_at_80[0x2];
        u8         log_page_size[0x6];
-       u8         reserved_6[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_7[0x20];
+       u8         reserved_at_a0[0x20];
 
-       u8         reserved_8[0x8];
+       u8         reserved_at_c0[0x8];
        u8         pd[0x18];
 
        u8         lwm[0x10];
        u8         wqe_cnt[0x10];
 
-       u8         reserved_9[0x40];
+       u8         reserved_at_100[0x40];
 
        u8         dbr_addr[0x40];
 
-       u8         reserved_10[0x80];
+       u8         reserved_at_180[0x80];
 };
 
 enum {
@@ -2106,39 +2115,39 @@ struct mlx5_ifc_sqc_bits {
        u8         cd_master[0x1];
        u8         fre[0x1];
        u8         flush_in_error_en[0x1];
-       u8         reserved_0[0x4];
+       u8         reserved_at_4[0x4];
        u8         state[0x4];
-       u8         reserved_1[0x14];
+       u8         reserved_at_c[0x14];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_20[0x8];
        u8         user_index[0x18];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_40[0x8];
        u8         cqn[0x18];
 
-       u8         reserved_4[0xa0];
+       u8         reserved_at_60[0xa0];
 
        u8         tis_lst_sz[0x10];
-       u8         reserved_5[0x10];
+       u8         reserved_at_110[0x10];
 
-       u8         reserved_6[0x40];
+       u8         reserved_at_120[0x40];
 
-       u8         reserved_7[0x8];
+       u8         reserved_at_160[0x8];
        u8         tis_num_0[0x18];
 
        struct mlx5_ifc_wq_bits wq;
 };
 
 struct mlx5_ifc_rqtc_bits {
-       u8         reserved_0[0xa0];
+       u8         reserved_at_0[0xa0];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_a0[0x10];
        u8         rqt_max_size[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_c0[0x10];
        u8         rqt_actual_size[0x10];
 
-       u8         reserved_3[0x6a0];
+       u8         reserved_at_e0[0x6a0];
 
        struct mlx5_ifc_rq_num_bits rq_num[0];
 };
@@ -2156,27 +2165,27 @@ enum {
 
 struct mlx5_ifc_rqc_bits {
        u8         rlky[0x1];
-       u8         reserved_0[0x2];
+       u8         reserved_at_1[0x2];
        u8         vsd[0x1];
        u8         mem_rq_type[0x4];
        u8         state[0x4];
-       u8         reserved_1[0x1];
+       u8         reserved_at_c[0x1];
        u8         flush_in_error_en[0x1];
-       u8         reserved_2[0x12];
+       u8         reserved_at_e[0x12];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_20[0x8];
        u8         user_index[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_40[0x8];
        u8         cqn[0x18];
 
        u8         counter_set_id[0x8];
-       u8         reserved_5[0x18];
+       u8         reserved_at_68[0x18];
 
-       u8         reserved_6[0x8];
+       u8         reserved_at_80[0x8];
        u8         rmpn[0x18];
 
-       u8         reserved_7[0xe0];
+       u8         reserved_at_a0[0xe0];
 
        struct mlx5_ifc_wq_bits wq;
 };
@@ -2187,47 +2196,53 @@ enum {
 };
 
 struct mlx5_ifc_rmpc_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         state[0x4];
-       u8         reserved_1[0x14];
+       u8         reserved_at_c[0x14];
 
        u8         basic_cyclic_rcv_wqe[0x1];
-       u8         reserved_2[0x1f];
+       u8         reserved_at_21[0x1f];
 
-       u8         reserved_3[0x140];
+       u8         reserved_at_40[0x140];
 
        struct mlx5_ifc_wq_bits wq;
 };
 
 struct mlx5_ifc_nic_vport_context_bits {
-       u8         reserved_0[0x1f];
+       u8         reserved_at_0[0x1f];
        u8         roce_en[0x1];
 
        u8         arm_change_event[0x1];
-       u8         reserved_1[0x1a];
+       u8         reserved_at_21[0x1a];
        u8         event_on_mtu[0x1];
        u8         event_on_promisc_change[0x1];
        u8         event_on_vlan_change[0x1];
        u8         event_on_mc_address_change[0x1];
        u8         event_on_uc_address_change[0x1];
 
-       u8         reserved_2[0xf0];
+       u8         reserved_at_40[0xf0];
 
        u8         mtu[0x10];
 
-       u8         reserved_3[0x640];
+       u8         system_image_guid[0x40];
+       u8         port_guid[0x40];
+       u8         node_guid[0x40];
+
+       u8         reserved_at_200[0x140];
+       u8         qkey_violation_counter[0x10];
+       u8         reserved_at_350[0x430];
 
        u8         promisc_uc[0x1];
        u8         promisc_mc[0x1];
        u8         promisc_all[0x1];
-       u8         reserved_4[0x2];
+       u8         reserved_at_783[0x2];
        u8         allowed_list_type[0x3];
-       u8         reserved_5[0xc];
+       u8         reserved_at_788[0xc];
        u8         allowed_list_size[0xc];
 
        struct mlx5_ifc_mac_address_layout_bits permanent_address;
 
-       u8         reserved_6[0x20];
+       u8         reserved_at_7e0[0x20];
 
        u8         current_uc_mac_address[0][0x40];
 };
@@ -2239,9 +2254,9 @@ enum {
 };
 
 struct mlx5_ifc_mkc_bits {
-       u8         reserved_0[0x1];
+       u8         reserved_at_0[0x1];
        u8         free[0x1];
-       u8         reserved_1[0xd];
+       u8         reserved_at_2[0xd];
        u8         small_fence_on_rdma_read_response[0x1];
        u8         umr_en[0x1];
        u8         a[0x1];
@@ -2250,19 +2265,19 @@ struct mlx5_ifc_mkc_bits {
        u8         lw[0x1];
        u8         lr[0x1];
        u8         access_mode[0x2];
-       u8         reserved_2[0x8];
+       u8         reserved_at_18[0x8];
 
        u8         qpn[0x18];
        u8         mkey_7_0[0x8];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_40[0x20];
 
        u8         length64[0x1];
        u8         bsf_en[0x1];
        u8         sync_umr[0x1];
-       u8         reserved_4[0x2];
+       u8         reserved_at_63[0x2];
        u8         expected_sigerr_count[0x1];
-       u8         reserved_5[0x1];
+       u8         reserved_at_66[0x1];
        u8         en_rinval[0x1];
        u8         pd[0x18];
 
@@ -2272,18 +2287,18 @@ struct mlx5_ifc_mkc_bits {
 
        u8         bsf_octword_size[0x20];
 
-       u8         reserved_6[0x80];
+       u8         reserved_at_120[0x80];
 
        u8         translations_octword_size[0x20];
 
-       u8         reserved_7[0x1b];
+       u8         reserved_at_1c0[0x1b];
        u8         log_page_size[0x5];
 
-       u8         reserved_8[0x20];
+       u8         reserved_at_1e0[0x20];
 };
 
 struct mlx5_ifc_pkey_bits {
-       u8         reserved_0[0x10];
+       u8         reserved_at_0[0x10];
        u8         pkey[0x10];
 };
 
@@ -2294,19 +2309,19 @@ struct mlx5_ifc_array128_auto_bits {
 struct mlx5_ifc_hca_vport_context_bits {
        u8         field_select[0x20];
 
-       u8         reserved_0[0xe0];
+       u8         reserved_at_20[0xe0];
 
        u8         sm_virt_aware[0x1];
        u8         has_smi[0x1];
        u8         has_raw[0x1];
        u8         grh_required[0x1];
-       u8         reserved_1[0xc];
+       u8         reserved_at_104[0xc];
        u8         port_physical_state[0x4];
        u8         vport_state_policy[0x4];
        u8         port_state[0x4];
        u8         vport_state[0x4];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_120[0x20];
 
        u8         system_image_guid[0x40];
 
@@ -2322,33 +2337,33 @@ struct mlx5_ifc_hca_vport_context_bits {
 
        u8         cap_mask2_field_select[0x20];
 
-       u8         reserved_3[0x80];
+       u8         reserved_at_280[0x80];
 
        u8         lid[0x10];
-       u8         reserved_4[0x4];
+       u8         reserved_at_310[0x4];
        u8         init_type_reply[0x4];
        u8         lmc[0x3];
        u8         subnet_timeout[0x5];
 
        u8         sm_lid[0x10];
        u8         sm_sl[0x4];
-       u8         reserved_5[0xc];
+       u8         reserved_at_334[0xc];
 
        u8         qkey_violation_counter[0x10];
        u8         pkey_violation_counter[0x10];
 
-       u8         reserved_6[0xca0];
+       u8         reserved_at_360[0xca0];
 };
 
 struct mlx5_ifc_esw_vport_context_bits {
-       u8         reserved_0[0x3];
+       u8         reserved_at_0[0x3];
        u8         vport_svlan_strip[0x1];
        u8         vport_cvlan_strip[0x1];
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_insert[0x2];
-       u8         reserved_1[0x18];
+       u8         reserved_at_8[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_20[0x20];
 
        u8         svlan_cfi[0x1];
        u8         svlan_pcp[0x3];
@@ -2357,7 +2372,7 @@ struct mlx5_ifc_esw_vport_context_bits {
        u8         cvlan_pcp[0x3];
        u8         cvlan_id[0xc];
 
-       u8         reserved_3[0x7a0];
+       u8         reserved_at_60[0x7a0];
 };
 
 enum {
@@ -2372,41 +2387,41 @@ enum {
 
 struct mlx5_ifc_eqc_bits {
        u8         status[0x4];
-       u8         reserved_0[0x9];
+       u8         reserved_at_4[0x9];
        u8         ec[0x1];
        u8         oi[0x1];
-       u8         reserved_1[0x5];
+       u8         reserved_at_f[0x5];
        u8         st[0x4];
-       u8         reserved_2[0x8];
+       u8         reserved_at_18[0x8];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_20[0x20];
 
-       u8         reserved_4[0x14];
+       u8         reserved_at_40[0x14];
        u8         page_offset[0x6];
-       u8         reserved_5[0x6];
+       u8         reserved_at_5a[0x6];
 
-       u8         reserved_6[0x3];
+       u8         reserved_at_60[0x3];
        u8         log_eq_size[0x5];
        u8         uar_page[0x18];
 
-       u8         reserved_7[0x20];
+       u8         reserved_at_80[0x20];
 
-       u8         reserved_8[0x18];
+       u8         reserved_at_a0[0x18];
        u8         intr[0x8];
 
-       u8         reserved_9[0x3];
+       u8         reserved_at_c0[0x3];
        u8         log_page_size[0x5];
-       u8         reserved_10[0x18];
+       u8         reserved_at_c8[0x18];
 
-       u8         reserved_11[0x60];
+       u8         reserved_at_e0[0x60];
 
-       u8         reserved_12[0x8];
+       u8         reserved_at_140[0x8];
        u8         consumer_counter[0x18];
 
-       u8         reserved_13[0x8];
+       u8         reserved_at_160[0x8];
        u8         producer_counter[0x18];
 
-       u8         reserved_14[0x80];
+       u8         reserved_at_180[0x80];
 };
 
 enum {
@@ -2430,14 +2445,14 @@ enum {
 };
 
 struct mlx5_ifc_dctc_bits {
-       u8         reserved_0[0x4];
+       u8         reserved_at_0[0x4];
        u8         state[0x4];
-       u8         reserved_1[0x18];
+       u8         reserved_at_8[0x18];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_20[0x8];
        u8         user_index[0x18];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_40[0x8];
        u8         cqn[0x18];
 
        u8         counter_set_id[0x8];
@@ -2449,45 +2464,45 @@ struct mlx5_ifc_dctc_bits {
        u8         latency_sensitive[0x1];
        u8         rlky[0x1];
        u8         free_ar[0x1];
-       u8         reserved_4[0xd];
+       u8         reserved_at_73[0xd];
 
-       u8         reserved_5[0x8];
+       u8         reserved_at_80[0x8];
        u8         cs_res[0x8];
-       u8         reserved_6[0x3];
+       u8         reserved_at_90[0x3];
        u8         min_rnr_nak[0x5];
-       u8         reserved_7[0x8];
+       u8         reserved_at_98[0x8];
 
-       u8         reserved_8[0x8];
+       u8         reserved_at_a0[0x8];
        u8         srqn[0x18];
 
-       u8         reserved_9[0x8];
+       u8         reserved_at_c0[0x8];
        u8         pd[0x18];
 
        u8         tclass[0x8];
-       u8         reserved_10[0x4];
+       u8         reserved_at_e8[0x4];
        u8         flow_label[0x14];
 
        u8         dc_access_key[0x40];
 
-       u8         reserved_11[0x5];
+       u8         reserved_at_140[0x5];
        u8         mtu[0x3];
        u8         port[0x8];
        u8         pkey_index[0x10];
 
-       u8         reserved_12[0x8];
+       u8         reserved_at_160[0x8];
        u8         my_addr_index[0x8];
-       u8         reserved_13[0x8];
+       u8         reserved_at_170[0x8];
        u8         hop_limit[0x8];
 
        u8         dc_access_key_violation_count[0x20];
 
-       u8         reserved_14[0x14];
+       u8         reserved_at_1a0[0x14];
        u8         dei_cfi[0x1];
        u8         eth_prio[0x3];
        u8         ecn[0x2];
        u8         dscp[0x6];
 
-       u8         reserved_15[0x40];
+       u8         reserved_at_1c0[0x40];
 };
 
 enum {
@@ -2509,54 +2524,54 @@ enum {
 
 struct mlx5_ifc_cqc_bits {
        u8         status[0x4];
-       u8         reserved_0[0x4];
+       u8         reserved_at_4[0x4];
        u8         cqe_sz[0x3];
        u8         cc[0x1];
-       u8         reserved_1[0x1];
+       u8         reserved_at_c[0x1];
        u8         scqe_break_moderation_en[0x1];
        u8         oi[0x1];
-       u8         reserved_2[0x2];
+       u8         reserved_at_f[0x2];
        u8         cqe_zip_en[0x1];
        u8         mini_cqe_res_format[0x2];
        u8         st[0x4];
-       u8         reserved_3[0x8];
+       u8         reserved_at_18[0x8];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_20[0x20];
 
-       u8         reserved_5[0x14];
+       u8         reserved_at_40[0x14];
        u8         page_offset[0x6];
-       u8         reserved_6[0x6];
+       u8         reserved_at_5a[0x6];
 
-       u8         reserved_7[0x3];
+       u8         reserved_at_60[0x3];
        u8         log_cq_size[0x5];
        u8         uar_page[0x18];
 
-       u8         reserved_8[0x4];
+       u8         reserved_at_80[0x4];
        u8         cq_period[0xc];
        u8         cq_max_count[0x10];
 
-       u8         reserved_9[0x18];
+       u8         reserved_at_a0[0x18];
        u8         c_eqn[0x8];
 
-       u8         reserved_10[0x3];
+       u8         reserved_at_c0[0x3];
        u8         log_page_size[0x5];
-       u8         reserved_11[0x18];
+       u8         reserved_at_c8[0x18];
 
-       u8         reserved_12[0x20];
+       u8         reserved_at_e0[0x20];
 
-       u8         reserved_13[0x8];
+       u8         reserved_at_100[0x8];
        u8         last_notified_index[0x18];
 
-       u8         reserved_14[0x8];
+       u8         reserved_at_120[0x8];
        u8         last_solicit_index[0x18];
 
-       u8         reserved_15[0x8];
+       u8         reserved_at_140[0x8];
        u8         consumer_counter[0x18];
 
-       u8         reserved_16[0x8];
+       u8         reserved_at_160[0x8];
        u8         producer_counter[0x18];
 
-       u8         reserved_17[0x40];
+       u8         reserved_at_180[0x40];
 
        u8         dbr_addr[0x40];
 };
@@ -2565,16 +2580,16 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits {
        struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp;
        struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp;
        struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np;
-       u8         reserved_0[0x800];
+       u8         reserved_at_0[0x800];
 };
 
 struct mlx5_ifc_query_adapter_param_block_bits {
-       u8         reserved_0[0xc0];
+       u8         reserved_at_0[0xc0];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_c0[0x8];
        u8         ieee_vendor_id[0x18];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_e0[0x10];
        u8         vsd_vendor_id[0x10];
 
        u8         vsd[208][0x8];
@@ -2585,14 +2600,14 @@ struct mlx5_ifc_query_adapter_param_block_bits {
 union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
        struct mlx5_ifc_modify_field_select_bits modify_field_select;
        struct mlx5_ifc_resize_field_select_bits resize_field_select;
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 };
 
 union mlx5_ifc_field_select_802_1_r_roce_auto_bits {
        struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp;
        struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp;
        struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np;
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 };
 
 union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
@@ -2604,7 +2619,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
        struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
        struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
        struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
-       u8         reserved_0[0x7c0];
+       u8         reserved_at_0[0x7c0];
 };
 
 union mlx5_ifc_event_auto_bits {
@@ -2620,23 +2635,23 @@ union mlx5_ifc_event_auto_bits {
        struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event;
        struct mlx5_ifc_stall_vl_event_bits stall_vl_event;
        struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event;
-       u8         reserved_0[0xe0];
+       u8         reserved_at_0[0xe0];
 };
 
 struct mlx5_ifc_health_buffer_bits {
-       u8         reserved_0[0x100];
+       u8         reserved_at_0[0x100];
 
        u8         assert_existptr[0x20];
 
        u8         assert_callra[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_140[0x40];
 
        u8         fw_version[0x20];
 
        u8         hw_id[0x20];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_1c0[0x20];
 
        u8         irisc_index[0x8];
        u8         synd[0x8];
@@ -2645,20 +2660,20 @@ struct mlx5_ifc_health_buffer_bits {
 
 struct mlx5_ifc_register_loopback_control_bits {
        u8         no_lb[0x1];
-       u8         reserved_0[0x7];
+       u8         reserved_at_1[0x7];
        u8         port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_teardown_hca_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -2668,108 +2683,108 @@ enum {
 
 struct mlx5_ifc_teardown_hca_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         profile[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_sqerr2rts_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_sqerr2rts_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_sqd2rts_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_sqd2rts_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_set_roce_address_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_roce_address_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         roce_address_index[0x10];
-       u8         reserved_2[0x10];
+       u8         reserved_at_50[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        struct mlx5_ifc_roce_addr_layout_bits roce_address;
 };
 
 struct mlx5_ifc_set_mad_demux_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -2779,89 +2794,89 @@ enum {
 
 struct mlx5_ifc_set_mad_demux_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_40[0x20];
 
-       u8         reserved_3[0x6];
+       u8         reserved_at_60[0x6];
        u8         demux_mode[0x2];
-       u8         reserved_4[0x18];
+       u8         reserved_at_68[0x18];
 };
 
 struct mlx5_ifc_set_l2_table_entry_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_l2_table_entry_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_40[0x60];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_index[0x18];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_c0[0x20];
 
-       u8         reserved_5[0x13];
+       u8         reserved_at_e0[0x13];
        u8         vlan_valid[0x1];
        u8         vlan[0xc];
 
        struct mlx5_ifc_mac_address_layout_bits mac_address;
 
-       u8         reserved_6[0xc0];
+       u8         reserved_at_140[0xc0];
 };
 
 struct mlx5_ifc_set_issi_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_issi_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         current_issi[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_set_hca_cap_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_hca_cap_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        union mlx5_ifc_hca_cap_union_bits capability;
 };
@@ -2875,156 +2890,156 @@ enum {
 
 struct mlx5_ifc_set_fte_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_fte_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_5[0x18];
+       u8         reserved_at_c0[0x18];
        u8         modify_enable_mask[0x8];
 
-       u8         reserved_6[0x20];
+       u8         reserved_at_e0[0x20];
 
        u8         flow_index[0x20];
 
-       u8         reserved_7[0xe0];
+       u8         reserved_at_120[0xe0];
 
        struct mlx5_ifc_flow_context_bits flow_context;
 };
 
 struct mlx5_ifc_rts2rts_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rts2rts_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_rtr2rts_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rtr2rts_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_rst2init_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rst2init_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_query_xrc_srq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-       u8         reserved_2[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_xrc_srq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         xrc_srqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -3034,13 +3049,13 @@ enum {
 
 struct mlx5_ifc_query_vport_state_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_40[0x20];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_60[0x18];
        u8         admin_state[0x4];
        u8         state[0x4];
 };
@@ -3052,25 +3067,25 @@ enum {
 
 struct mlx5_ifc_query_vport_state_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xf];
+       u8         reserved_at_41[0xf];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_vport_counter_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_traffic_counter_bits received_errors;
 
@@ -3096,7 +3111,7 @@ struct mlx5_ifc_query_vport_counter_out_bits {
 
        struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast;
 
-       u8         reserved_2[0xa00];
+       u8         reserved_at_680[0xa00];
 };
 
 enum {
@@ -3105,328 +3120,328 @@ enum {
 
 struct mlx5_ifc_query_vport_counter_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xf];
+       u8         reserved_at_41[0xf];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x60];
+       u8         reserved_at_60[0x60];
 
        u8         clear[0x1];
-       u8         reserved_4[0x1f];
+       u8         reserved_at_c1[0x1f];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_e0[0x20];
 };
 
 struct mlx5_ifc_query_tis_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_tisc_bits tis_context;
 };
 
 struct mlx5_ifc_query_tis_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         tisn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_tir_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_tirc_bits tir_context;
 };
 
 struct mlx5_ifc_query_tir_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         tirn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_srq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_srqc_bits srq_context_entry;
 
-       u8         reserved_2[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_srq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         srqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_sq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_sqc_bits sq_context;
 };
 
 struct mlx5_ifc_query_sq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         sqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_special_contexts_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_40[0x20];
 
        u8         resd_lkey[0x20];
 };
 
 struct mlx5_ifc_query_special_contexts_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_rqt_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_rqtc_bits rqt_context;
 };
 
 struct mlx5_ifc_query_rqt_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         rqtn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_rq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_rqc_bits rq_context;
 };
 
 struct mlx5_ifc_query_rq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         rqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_roce_address_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_roce_addr_layout_bits roce_address;
 };
 
 struct mlx5_ifc_query_roce_address_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         roce_address_index[0x10];
-       u8         reserved_2[0x10];
+       u8         reserved_at_50[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_rmp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_rmpc_bits rmp_context;
 };
 
 struct mlx5_ifc_query_rmp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         rmpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_3[0x80];
+       u8         reserved_at_800[0x80];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_q_counter_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         rx_write_requests[0x20];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_a0[0x20];
 
        u8         rx_read_requests[0x20];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_e0[0x20];
 
        u8         rx_atomic_requests[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_120[0x20];
 
        u8         rx_dct_connect[0x20];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_160[0x20];
 
        u8         out_of_buffer[0x20];
 
-       u8         reserved_6[0x20];
+       u8         reserved_at_1a0[0x20];
 
        u8         out_of_sequence[0x20];
 
-       u8         reserved_7[0x620];
+       u8         reserved_at_1e0[0x620];
 };
 
 struct mlx5_ifc_query_q_counter_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x80];
+       u8         reserved_at_40[0x80];
 
        u8         clear[0x1];
-       u8         reserved_3[0x1f];
+       u8         reserved_at_c1[0x1f];
 
-       u8         reserved_4[0x18];
+       u8         reserved_at_e0[0x18];
        u8         counter_set_id[0x8];
 };
 
 struct mlx5_ifc_query_pages_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_40[0x10];
        u8         function_id[0x10];
 
        u8         num_pages[0x20];
@@ -3440,55 +3455,55 @@ enum {
 
 struct mlx5_ifc_query_pages_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         function_id[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_nic_vport_context_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
 };
 
 struct mlx5_ifc_query_nic_vport_context_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xf];
+       u8         reserved_at_41[0xf];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x5];
+       u8         reserved_at_60[0x5];
        u8         allowed_list_type[0x3];
-       u8         reserved_4[0x18];
+       u8         reserved_at_68[0x18];
 };
 
 struct mlx5_ifc_query_mkey_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
-       u8         reserved_2[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         bsf0_klm0_pas_mtt0_1[16][0x8];
 
@@ -3497,265 +3512,265 @@ struct mlx5_ifc_query_mkey_out_bits {
 
 struct mlx5_ifc_query_mkey_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         mkey_index[0x18];
 
        u8         pg_access[0x1];
-       u8         reserved_3[0x1f];
+       u8         reserved_at_61[0x1f];
 };
 
 struct mlx5_ifc_query_mad_demux_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         mad_dumux_parameters_block[0x20];
 };
 
 struct mlx5_ifc_query_mad_demux_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_l2_table_entry_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0xa0];
+       u8         reserved_at_40[0xa0];
 
-       u8         reserved_2[0x13];
+       u8         reserved_at_e0[0x13];
        u8         vlan_valid[0x1];
        u8         vlan[0xc];
 
        struct mlx5_ifc_mac_address_layout_bits mac_address;
 
-       u8         reserved_3[0xc0];
+       u8         reserved_at_140[0xc0];
 };
 
 struct mlx5_ifc_query_l2_table_entry_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_40[0x60];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_index[0x18];
 
-       u8         reserved_4[0x140];
+       u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_query_issi_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_40[0x10];
        u8         current_issi[0x10];
 
-       u8         reserved_2[0xa0];
+       u8         reserved_at_60[0xa0];
 
-       u8         supported_issi_reserved[76][0x8];
+       u8         reserved_at_100[76][0x8];
        u8         supported_issi_dw0[0x20];
 };
 
 struct mlx5_ifc_query_issi_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_hca_vport_pkey_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_pkey_bits pkey[0];
 };
 
 struct mlx5_ifc_query_hca_vport_pkey_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xb];
+       u8         reserved_at_41[0xb];
        u8         port_num[0x4];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_60[0x10];
        u8         pkey_index[0x10];
 };
 
 struct mlx5_ifc_query_hca_vport_gid_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_40[0x20];
 
        u8         gids_num[0x10];
-       u8         reserved_2[0x10];
+       u8         reserved_at_70[0x10];
 
        struct mlx5_ifc_array128_auto_bits gid[0];
 };
 
 struct mlx5_ifc_query_hca_vport_gid_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xb];
+       u8         reserved_at_41[0xb];
        u8         port_num[0x4];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_60[0x10];
        u8         gid_index[0x10];
 };
 
 struct mlx5_ifc_query_hca_vport_context_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
 };
 
 struct mlx5_ifc_query_hca_vport_context_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xb];
+       u8         reserved_at_41[0xb];
        u8         port_num[0x4];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_hca_cap_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        union mlx5_ifc_hca_cap_union_bits capability;
 };
 
 struct mlx5_ifc_query_hca_cap_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_flow_table_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x80];
+       u8         reserved_at_40[0x80];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_c0[0x8];
        u8         level[0x8];
-       u8         reserved_3[0x8];
+       u8         reserved_at_d0[0x8];
        u8         log_size[0x8];
 
-       u8         reserved_4[0x120];
+       u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_query_flow_table_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_5[0x140];
+       u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_query_fte_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x1c0];
+       u8         reserved_at_40[0x1c0];
 
        struct mlx5_ifc_flow_context_bits flow_context;
 };
 
 struct mlx5_ifc_query_fte_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_5[0x40];
+       u8         reserved_at_c0[0x40];
 
        u8         flow_index[0x20];
 
-       u8         reserved_6[0xe0];
+       u8         reserved_at_120[0xe0];
 };
 
 enum {
@@ -3766,84 +3781,84 @@ enum {
 
 struct mlx5_ifc_query_flow_group_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0xa0];
+       u8         reserved_at_40[0xa0];
 
        u8         start_flow_index[0x20];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_100[0x20];
 
        u8         end_flow_index[0x20];
 
-       u8         reserved_3[0xa0];
+       u8         reserved_at_140[0xa0];
 
-       u8         reserved_4[0x18];
+       u8         reserved_at_1e0[0x18];
        u8         match_criteria_enable[0x8];
 
        struct mlx5_ifc_fte_match_param_bits match_criteria;
 
-       u8         reserved_5[0xe00];
+       u8         reserved_at_1200[0xe00];
 };
 
 struct mlx5_ifc_query_flow_group_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
        u8         group_id[0x20];
 
-       u8         reserved_5[0x120];
+       u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_query_esw_vport_context_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_esw_vport_context_bits esw_vport_context;
 };
 
 struct mlx5_ifc_query_esw_vport_context_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xf];
+       u8         reserved_at_41[0xf];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_modify_esw_vport_context_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_esw_vport_context_fields_select_bits {
-       u8         reserved[0x1c];
+       u8         reserved_at_0[0x1c];
        u8         vport_cvlan_insert[0x1];
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_strip[0x1];
@@ -3852,13 +3867,13 @@ struct mlx5_ifc_esw_vport_context_fields_select_bits {
 
 struct mlx5_ifc_modify_esw_vport_context_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xf];
+       u8         reserved_at_41[0xf];
        u8         vport_number[0x10];
 
        struct mlx5_ifc_esw_vport_context_fields_select_bits field_select;
@@ -3868,124 +3883,124 @@ struct mlx5_ifc_modify_esw_vport_context_in_bits {
 
 struct mlx5_ifc_query_eq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_eqc_bits eq_context_entry;
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_280[0x40];
 
        u8         event_bitmask[0x40];
 
-       u8         reserved_3[0x580];
+       u8         reserved_at_300[0x580];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_eq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_40[0x18];
        u8         eq_number[0x8];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_dct_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_dctc_bits dct_context_entry;
 
-       u8         reserved_2[0x180];
+       u8         reserved_at_280[0x180];
 };
 
 struct mlx5_ifc_query_dct_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         dctn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_cqc_bits cq_context;
 
-       u8         reserved_2[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_cq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         cqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_status_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_40[0x20];
 
        u8         enable[0x1];
        u8         tag_enable[0x1];
-       u8         reserved_2[0x1e];
+       u8         reserved_at_62[0x1e];
 };
 
 struct mlx5_ifc_query_cong_status_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_40[0x18];
        u8         priority[0x4];
        u8         cong_protocol[0x4];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_statistics_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         cur_flows[0x20];
 
@@ -3999,7 +4014,7 @@ struct mlx5_ifc_query_cong_statistics_out_bits {
 
        u8         cnp_handled_low[0x20];
 
-       u8         reserved_2[0x100];
+       u8         reserved_at_140[0x100];
 
        u8         time_stamp_high[0x20];
 
@@ -4015,446 +4030,453 @@ struct mlx5_ifc_query_cong_statistics_out_bits {
 
        u8         cnps_sent_low[0x20];
 
-       u8         reserved_3[0x560];
+       u8         reserved_at_320[0x560];
 };
 
 struct mlx5_ifc_query_cong_statistics_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         clear[0x1];
-       u8         reserved_2[0x1f];
+       u8         reserved_at_41[0x1f];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_params_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
 };
 
 struct mlx5_ifc_query_cong_params_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x1c];
+       u8         reserved_at_40[0x1c];
        u8         cong_protocol[0x4];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_adapter_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct;
 };
 
 struct mlx5_ifc_query_adapter_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2rst_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2rst_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_qp_2err_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2err_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_page_fault_resume_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_page_fault_resume_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         error[0x1];
-       u8         reserved_2[0x4];
+       u8         reserved_at_41[0x4];
        u8         rdma[0x1];
        u8         read_write[0x1];
        u8         req_res[0x1];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_nop_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_nop_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_vport_state_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_vport_state_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xf];
+       u8         reserved_at_41[0xf];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x18];
+       u8         reserved_at_60[0x18];
        u8         admin_state[0x4];
-       u8         reserved_4[0x4];
+       u8         reserved_at_7c[0x4];
 };
 
 struct mlx5_ifc_modify_tis_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_tis_bitmask_bits {
+       u8         reserved_at_0[0x20];
+
+       u8         reserved_at_20[0x1f];
+       u8         prio[0x1];
 };
 
 struct mlx5_ifc_modify_tis_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         tisn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
-       u8         modify_bitmask[0x40];
+       struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_c0[0x40];
 
        struct mlx5_ifc_tisc_bits ctx;
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 
-       u8         reserved_1[0x1b];
+       u8         reserved_at_20[0x1b];
        u8         self_lb_en[0x1];
-       u8         reserved_2[0x3];
+       u8         reserved_at_3c[0x3];
        u8         lro[0x1];
 };
 
 struct mlx5_ifc_modify_tir_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_tir_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         tirn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        struct mlx5_ifc_modify_tir_bitmask_bits bitmask;
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_c0[0x40];
 
        struct mlx5_ifc_tirc_bits ctx;
 };
 
 struct mlx5_ifc_modify_sq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_sq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         sq_state[0x4];
-       u8         reserved_2[0x4];
+       u8         reserved_at_44[0x4];
        u8         sqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         modify_bitmask[0x40];
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_c0[0x40];
 
        struct mlx5_ifc_sqc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rqt_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rqt_bitmask_bits {
-       u8         reserved[0x20];
+       u8         reserved_at_0[0x20];
 
-       u8         reserved1[0x1f];
+       u8         reserved_at_20[0x1f];
        u8         rqn_list[0x1];
 };
 
 struct mlx5_ifc_modify_rqt_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         rqtn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        struct mlx5_ifc_rqt_bitmask_bits bitmask;
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_c0[0x40];
 
        struct mlx5_ifc_rqtc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_rq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         rq_state[0x4];
-       u8         reserved_2[0x4];
+       u8         reserved_at_44[0x4];
        u8         rqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         modify_bitmask[0x40];
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_c0[0x40];
 
        struct mlx5_ifc_rqc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rmp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rmp_bitmask_bits {
-       u8         reserved[0x20];
+       u8         reserved_at_0[0x20];
 
-       u8         reserved1[0x1f];
+       u8         reserved_at_20[0x1f];
        u8         lwm[0x1];
 };
 
 struct mlx5_ifc_modify_rmp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         rmp_state[0x4];
-       u8         reserved_2[0x4];
+       u8         reserved_at_44[0x4];
        u8         rmpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        struct mlx5_ifc_rmp_bitmask_bits bitmask;
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_c0[0x40];
 
        struct mlx5_ifc_rmpc_bits ctx;
 };
 
 struct mlx5_ifc_modify_nic_vport_context_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_nic_vport_field_select_bits {
-       u8         reserved_0[0x19];
+       u8         reserved_at_0[0x19];
        u8         mtu[0x1];
        u8         change_event[0x1];
        u8         promisc[0x1];
        u8         permanent_address[0x1];
        u8         addresses_list[0x1];
        u8         roce_en[0x1];
-       u8         reserved_1[0x1];
+       u8         reserved_at_1f[0x1];
 };
 
 struct mlx5_ifc_modify_nic_vport_context_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xf];
+       u8         reserved_at_41[0xf];
        u8         vport_number[0x10];
 
        struct mlx5_ifc_modify_nic_vport_field_select_bits field_select;
 
-       u8         reserved_3[0x780];
+       u8         reserved_at_80[0x780];
 
        struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
 };
 
 struct mlx5_ifc_modify_hca_vport_context_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_hca_vport_context_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         other_vport[0x1];
-       u8         reserved_2[0xb];
+       u8         reserved_at_41[0xb];
        u8         port_num[0x4];
        u8         vport_number[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
 };
 
 struct mlx5_ifc_modify_cq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -4464,83 +4486,83 @@ enum {
 
 struct mlx5_ifc_modify_cq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         cqn[0x18];
 
        union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select;
 
        struct mlx5_ifc_cqc_bits cq_context;
 
-       u8         reserved_3[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_modify_cong_status_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_cong_status_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_40[0x18];
        u8         priority[0x4];
        u8         cong_protocol[0x4];
 
        u8         enable[0x1];
        u8         tag_enable[0x1];
-       u8         reserved_3[0x1e];
+       u8         reserved_at_62[0x1e];
 };
 
 struct mlx5_ifc_modify_cong_params_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_cong_params_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x1c];
+       u8         reserved_at_40[0x1c];
        u8         cong_protocol[0x4];
 
        union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select;
 
-       u8         reserved_3[0x80];
+       u8         reserved_at_80[0x80];
 
        union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
 };
 
 struct mlx5_ifc_manage_pages_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
        u8         output_num_entries[0x20];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         pas[0][0x40];
 };
@@ -4553,12 +4575,12 @@ enum {
 
 struct mlx5_ifc_manage_pages_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         function_id[0x10];
 
        u8         input_num_entries[0x20];
@@ -4568,117 +4590,117 @@ struct mlx5_ifc_manage_pages_in_bits {
 
 struct mlx5_ifc_mad_ifc_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         response_mad_packet[256][0x8];
 };
 
 struct mlx5_ifc_mad_ifc_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         remote_lid[0x10];
-       u8         reserved_2[0x8];
+       u8         reserved_at_50[0x8];
        u8         port[0x8];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         mad[256][0x8];
 };
 
 struct mlx5_ifc_init_hca_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init_hca_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2rtr_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2rtr_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_init2init_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2init_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_get_dropped_packet_log_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         packet_headers_log[128][0x8];
 
@@ -4687,1029 +4709,1029 @@ struct mlx5_ifc_get_dropped_packet_log_out_bits {
 
 struct mlx5_ifc_get_dropped_packet_log_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_gen_eqe_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_40[0x18];
        u8         eq_number[0x8];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         eqe[64][0x8];
 };
 
 struct mlx5_ifc_gen_eq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_enable_hca_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_40[0x20];
 };
 
 struct mlx5_ifc_enable_hca_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         function_id[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_drain_dct_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_drain_dct_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         dctn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_disable_hca_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_40[0x20];
 };
 
 struct mlx5_ifc_disable_hca_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         function_id[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_detach_from_mcg_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_detach_from_mcg_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         multicast_gid[16][0x8];
 };
 
 struct mlx5_ifc_destroy_xrc_srq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_xrc_srq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         xrc_srqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_tis_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_tis_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         tisn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_tir_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_tir_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         tirn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_srq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_srq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         srqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_sq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_sq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         sqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rqt_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rqt_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         rqtn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         rqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rmp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rmp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         rmpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_psv_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_psv_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         psvn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_mkey_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_mkey_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         mkey_index[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_flow_table_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_flow_table_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_5[0x140];
+       u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_destroy_flow_group_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_flow_group_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
        u8         group_id[0x20];
 
-       u8         reserved_5[0x120];
+       u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_destroy_eq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_eq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_40[0x18];
        u8         eq_number[0x8];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_dct_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_dct_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         dctn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_cq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_cq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         cqn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_delete_vxlan_udp_dport_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_vxlan_udp_dport_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_40[0x20];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_60[0x10];
        u8         vxlan_udp_port[0x10];
 };
 
 struct mlx5_ifc_delete_l2_table_entry_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_l2_table_entry_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_40[0x60];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_index[0x18];
 
-       u8         reserved_4[0x140];
+       u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_delete_fte_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_fte_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_5[0x40];
+       u8         reserved_at_c0[0x40];
 
        u8         flow_index[0x20];
 
-       u8         reserved_6[0xe0];
+       u8         reserved_at_120[0xe0];
 };
 
 struct mlx5_ifc_dealloc_xrcd_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_xrcd_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         xrcd[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_uar_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_uar_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         uar[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_transport_domain_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_transport_domain_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         transport_domain[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_q_counter_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_q_counter_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_40[0x18];
        u8         counter_set_id[0x8];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_pd_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_pd_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         pd[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_xrc_srq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         xrc_srqn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_xrc_srq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-       u8         reserved_3[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_tis_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         tisn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_tis_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_tisc_bits ctx;
 };
 
 struct mlx5_ifc_create_tir_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         tirn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_tir_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_tirc_bits ctx;
 };
 
 struct mlx5_ifc_create_srq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         srqn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_srq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_srqc_bits srq_context_entry;
 
-       u8         reserved_3[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_sq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         sqn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_sq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_sqc_bits ctx;
 };
 
 struct mlx5_ifc_create_rqt_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         rqtn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rqt_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_rqtc_bits rqt_context;
 };
 
 struct mlx5_ifc_create_rq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         rqn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_rqc_bits ctx;
 };
 
 struct mlx5_ifc_create_rmp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         rmpn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rmp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0xc0];
+       u8         reserved_at_40[0xc0];
 
        struct mlx5_ifc_rmpc_bits ctx;
 };
 
 struct mlx5_ifc_create_qp_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_qp_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         opt_param_mask[0x20];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_a0[0x20];
 
        struct mlx5_ifc_qpc_bits qpc;
 
-       u8         reserved_4[0x80];
+       u8         reserved_at_800[0x80];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_psv_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_80[0x8];
        u8         psv0_index[0x18];
 
-       u8         reserved_3[0x8];
+       u8         reserved_at_a0[0x8];
        u8         psv1_index[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_c0[0x8];
        u8         psv2_index[0x18];
 
-       u8         reserved_5[0x8];
+       u8         reserved_at_e0[0x8];
        u8         psv3_index[0x18];
 };
 
 struct mlx5_ifc_create_psv_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         num_psv[0x4];
-       u8         reserved_2[0x4];
+       u8         reserved_at_44[0x4];
        u8         pd[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_mkey_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         mkey_index[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_mkey_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_40[0x20];
 
        u8         pg_access[0x1];
-       u8         reserved_3[0x1f];
+       u8         reserved_at_61[0x1f];
 
        struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
-       u8         reserved_4[0x80];
+       u8         reserved_at_280[0x80];
 
        u8         translations_octword_actual_size[0x20];
 
-       u8         reserved_5[0x560];
+       u8         reserved_at_320[0x560];
 
        u8         klm_pas_mtt[0][0x20];
 };
 
 struct mlx5_ifc_create_flow_table_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_flow_table_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_a0[0x20];
 
-       u8         reserved_5[0x4];
+       u8         reserved_at_c0[0x4];
        u8         table_miss_mode[0x4];
        u8         level[0x8];
-       u8         reserved_6[0x8];
+       u8         reserved_at_d0[0x8];
        u8         log_size[0x8];
 
-       u8         reserved_7[0x8];
+       u8         reserved_at_e0[0x8];
        u8         table_miss_id[0x18];
 
-       u8         reserved_8[0x100];
+       u8         reserved_at_100[0x100];
 };
 
 struct mlx5_ifc_create_flow_group_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         group_id[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -5720,134 +5742,134 @@ enum {
 
 struct mlx5_ifc_create_flow_group_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_c0[0x20];
 
        u8         start_flow_index[0x20];
 
-       u8         reserved_6[0x20];
+       u8         reserved_at_100[0x20];
 
        u8         end_flow_index[0x20];
 
-       u8         reserved_7[0xa0];
+       u8         reserved_at_140[0xa0];
 
-       u8         reserved_8[0x18];
+       u8         reserved_at_1e0[0x18];
        u8         match_criteria_enable[0x8];
 
        struct mlx5_ifc_fte_match_param_bits match_criteria;
 
-       u8         reserved_9[0xe00];
+       u8         reserved_at_1200[0xe00];
 };
 
 struct mlx5_ifc_create_eq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x18];
+       u8         reserved_at_40[0x18];
        u8         eq_number[0x8];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_eq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_eqc_bits eq_context_entry;
 
-       u8         reserved_3[0x40];
+       u8         reserved_at_280[0x40];
 
        u8         event_bitmask[0x40];
 
-       u8         reserved_4[0x580];
+       u8         reserved_at_300[0x580];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_dct_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         dctn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_dct_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_dctc_bits dct_context_entry;
 
-       u8         reserved_3[0x180];
+       u8         reserved_at_280[0x180];
 };
 
 struct mlx5_ifc_create_cq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         cqn[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_cq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        struct mlx5_ifc_cqc_bits cq_context;
 
-       u8         reserved_3[0x600];
+       u8         reserved_at_280[0x600];
 
        u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_config_int_moderation_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x4];
+       u8         reserved_at_40[0x4];
        u8         min_delay[0xc];
        u8         int_vector[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -5857,49 +5879,49 @@ enum {
 
 struct mlx5_ifc_config_int_moderation_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x4];
+       u8         reserved_at_40[0x4];
        u8         min_delay[0xc];
        u8         int_vector[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_attach_to_mcg_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_attach_to_mcg_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         qpn[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 
        u8         multicast_gid[16][0x8];
 };
 
 struct mlx5_ifc_arm_xrc_srq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -5908,25 +5930,25 @@ enum {
 
 struct mlx5_ifc_arm_xrc_srq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         xrc_srqn[0x18];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_60[0x10];
        u8         lwm[0x10];
 };
 
 struct mlx5_ifc_arm_rq_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -5935,179 +5957,179 @@ enum {
 
 struct mlx5_ifc_arm_rq_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         srq_number[0x18];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_60[0x10];
        u8         lwm[0x10];
 };
 
 struct mlx5_ifc_arm_dct_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_arm_dct_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_40[0x8];
        u8         dct_number[0x18];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_xrcd_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         xrcd[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_xrcd_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_uar_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         uar[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_uar_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_transport_domain_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         transport_domain[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_transport_domain_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_q_counter_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x18];
+       u8         reserved_at_40[0x18];
        u8         counter_set_id[0x8];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_q_counter_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_pd_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x8];
+       u8         reserved_at_40[0x8];
        u8         pd[0x18];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_pd_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_40[0x20];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_60[0x10];
        u8         vxlan_udp_port[0x10];
 };
 
 struct mlx5_ifc_access_register_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         register_data[0][0x20];
 };
@@ -6119,12 +6141,12 @@ enum {
 
 struct mlx5_ifc_access_register_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x10];
+       u8         reserved_at_40[0x10];
        u8         register_id[0x10];
 
        u8         argument[0x20];
@@ -6137,24 +6159,24 @@ struct mlx5_ifc_sltp_reg_bits {
        u8         version[0x4];
        u8         local_port[0x8];
        u8         pnat[0x2];
-       u8         reserved_0[0x2];
+       u8         reserved_at_12[0x2];
        u8         lane[0x4];
-       u8         reserved_1[0x8];
+       u8         reserved_at_18[0x8];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_20[0x20];
 
-       u8         reserved_3[0x7];
+       u8         reserved_at_40[0x7];
        u8         polarity[0x1];
        u8         ob_tap0[0x8];
        u8         ob_tap1[0x8];
        u8         ob_tap2[0x8];
 
-       u8         reserved_4[0xc];
+       u8         reserved_at_60[0xc];
        u8         ob_preemp_mode[0x4];
        u8         ob_reg[0x8];
        u8         ob_bias[0x8];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_80[0x20];
 };
 
 struct mlx5_ifc_slrg_reg_bits {
@@ -6162,36 +6184,36 @@ struct mlx5_ifc_slrg_reg_bits {
        u8         version[0x4];
        u8         local_port[0x8];
        u8         pnat[0x2];
-       u8         reserved_0[0x2];
+       u8         reserved_at_12[0x2];
        u8         lane[0x4];
-       u8         reserved_1[0x8];
+       u8         reserved_at_18[0x8];
 
        u8         time_to_link_up[0x10];
-       u8         reserved_2[0xc];
+       u8         reserved_at_30[0xc];
        u8         grade_lane_speed[0x4];
 
        u8         grade_version[0x8];
        u8         grade[0x18];
 
-       u8         reserved_3[0x4];
+       u8         reserved_at_60[0x4];
        u8         height_grade_type[0x4];
        u8         height_grade[0x18];
 
        u8         height_dz[0x10];
        u8         height_dv[0x10];
 
-       u8         reserved_4[0x10];
+       u8         reserved_at_a0[0x10];
        u8         height_sigma[0x10];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_c0[0x20];
 
-       u8         reserved_6[0x4];
+       u8         reserved_at_e0[0x4];
        u8         phase_grade_type[0x4];
        u8         phase_grade[0x18];
 
-       u8         reserved_7[0x8];
+       u8         reserved_at_100[0x8];
        u8         phase_eo_pos[0x8];
-       u8         reserved_8[0x8];
+       u8         reserved_at_110[0x8];
        u8         phase_eo_neg[0x8];
 
        u8         ffe_set_tested[0x10];
@@ -6199,70 +6221,70 @@ struct mlx5_ifc_slrg_reg_bits {
 };
 
 struct mlx5_ifc_pvlc_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0x1c];
+       u8         reserved_at_20[0x1c];
        u8         vl_hw_cap[0x4];
 
-       u8         reserved_3[0x1c];
+       u8         reserved_at_40[0x1c];
        u8         vl_admin[0x4];
 
-       u8         reserved_4[0x1c];
+       u8         reserved_at_60[0x1c];
        u8         vl_operational[0x4];
 };
 
 struct mlx5_ifc_pude_reg_bits {
        u8         swid[0x8];
        u8         local_port[0x8];
-       u8         reserved_0[0x4];
+       u8         reserved_at_10[0x4];
        u8         admin_status[0x4];
-       u8         reserved_1[0x4];
+       u8         reserved_at_18[0x4];
        u8         oper_status[0x4];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0xd];
+       u8         reserved_at_10[0xd];
        u8         proto_mask[0x3];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_20[0x40];
 
        u8         eth_proto_capability[0x20];
 
        u8         ib_link_width_capability[0x10];
        u8         ib_proto_capability[0x10];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_a0[0x20];
 
        u8         eth_proto_admin[0x20];
 
        u8         ib_link_width_admin[0x10];
        u8         ib_proto_admin[0x10];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_100[0x20];
 
        u8         eth_proto_oper[0x20];
 
        u8         ib_link_width_oper[0x10];
        u8         ib_proto_oper[0x10];
 
-       u8         reserved_5[0x20];
+       u8         reserved_at_160[0x20];
 
        u8         eth_proto_lp_advertise[0x20];
 
-       u8         reserved_6[0x60];
+       u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_ptas_reg_bits {
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 
        u8         algorithm_options[0x10];
-       u8         reserved_1[0x4];
+       u8         reserved_at_30[0x4];
        u8         repetitions_mode[0x4];
        u8         num_of_repetitions[0x8];
 
@@ -6288,13 +6310,13 @@ struct mlx5_ifc_ptas_reg_bits {
        u8         ndeo_error_threshold[0x10];
 
        u8         mixer_offset_step_size[0x10];
-       u8         reserved_2[0x8];
+       u8         reserved_at_110[0x8];
        u8         mix90_phase_for_voltage_bath[0x8];
 
        u8         mixer_offset_start[0x10];
        u8         mixer_offset_end[0x10];
 
-       u8         reserved_3[0x15];
+       u8         reserved_at_140[0x15];
        u8         ber_test_time[0xb];
 };
 
@@ -6302,154 +6324,154 @@ struct mlx5_ifc_pspa_reg_bits {
        u8         swid[0x8];
        u8         local_port[0x8];
        u8         sub_port[0x8];
-       u8         reserved_0[0x8];
+       u8         reserved_at_18[0x8];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_pqdr_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x5];
+       u8         reserved_at_10[0x5];
        u8         prio[0x3];
-       u8         reserved_2[0x6];
+       u8         reserved_at_18[0x6];
        u8         mode[0x2];
 
-       u8         reserved_3[0x20];
+       u8         reserved_at_20[0x20];
 
-       u8         reserved_4[0x10];
+       u8         reserved_at_40[0x10];
        u8         min_threshold[0x10];
 
-       u8         reserved_5[0x10];
+       u8         reserved_at_60[0x10];
        u8         max_threshold[0x10];
 
-       u8         reserved_6[0x10];
+       u8         reserved_at_80[0x10];
        u8         mark_probability_denominator[0x10];
 
-       u8         reserved_7[0x60];
+       u8         reserved_at_a0[0x60];
 };
 
 struct mlx5_ifc_ppsc_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_20[0x60];
 
-       u8         reserved_3[0x1c];
+       u8         reserved_at_80[0x1c];
        u8         wrps_admin[0x4];
 
-       u8         reserved_4[0x1c];
+       u8         reserved_at_a0[0x1c];
        u8         wrps_status[0x4];
 
-       u8         reserved_5[0x8];
+       u8         reserved_at_c0[0x8];
        u8         up_threshold[0x8];
-       u8         reserved_6[0x8];
+       u8         reserved_at_d0[0x8];
        u8         down_threshold[0x8];
 
-       u8         reserved_7[0x20];
+       u8         reserved_at_e0[0x20];
 
-       u8         reserved_8[0x1c];
+       u8         reserved_at_100[0x1c];
        u8         srps_admin[0x4];
 
-       u8         reserved_9[0x1c];
+       u8         reserved_at_120[0x1c];
        u8         srps_status[0x4];
 
-       u8         reserved_10[0x40];
+       u8         reserved_at_140[0x40];
 };
 
 struct mlx5_ifc_pplr_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_20[0x8];
        u8         lb_cap[0x8];
-       u8         reserved_3[0x8];
+       u8         reserved_at_30[0x8];
        u8         lb_en[0x8];
 };
 
 struct mlx5_ifc_pplm_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_20[0x20];
 
        u8         port_profile_mode[0x8];
        u8         static_port_profile[0x8];
        u8         active_port_profile[0x8];
-       u8         reserved_3[0x8];
+       u8         reserved_at_58[0x8];
 
        u8         retransmission_active[0x8];
        u8         fec_mode_active[0x18];
 
-       u8         reserved_4[0x20];
+       u8         reserved_at_80[0x20];
 };
 
 struct mlx5_ifc_ppcnt_reg_bits {
        u8         swid[0x8];
        u8         local_port[0x8];
        u8         pnat[0x2];
-       u8         reserved_0[0x8];
+       u8         reserved_at_12[0x8];
        u8         grp[0x6];
 
        u8         clr[0x1];
-       u8         reserved_1[0x1c];
+       u8         reserved_at_21[0x1c];
        u8         prio_tc[0x3];
 
        union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
 struct mlx5_ifc_ppad_reg_bits {
-       u8         reserved_0[0x3];
+       u8         reserved_at_0[0x3];
        u8         single_mac[0x1];
-       u8         reserved_1[0x4];
+       u8         reserved_at_4[0x4];
        u8         local_port[0x8];
        u8         mac_47_32[0x10];
 
        u8         mac_31_0[0x20];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_pmtu_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
        u8         max_mtu[0x10];
-       u8         reserved_2[0x10];
+       u8         reserved_at_30[0x10];
 
        u8         admin_mtu[0x10];
-       u8         reserved_3[0x10];
+       u8         reserved_at_50[0x10];
 
        u8         oper_mtu[0x10];
-       u8         reserved_4[0x10];
+       u8         reserved_at_70[0x10];
 };
 
 struct mlx5_ifc_pmpr_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         module[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0x18];
+       u8         reserved_at_20[0x18];
        u8         attenuation_5g[0x8];
 
-       u8         reserved_3[0x18];
+       u8         reserved_at_40[0x18];
        u8         attenuation_7g[0x8];
 
-       u8         reserved_4[0x18];
+       u8         reserved_at_60[0x18];
        u8         attenuation_12g[0x8];
 };
 
 struct mlx5_ifc_pmpe_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         module[0x8];
-       u8         reserved_1[0xc];
+       u8         reserved_at_10[0xc];
        u8         module_status[0x4];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_pmpc_reg_bits {
@@ -6457,20 +6479,20 @@ struct mlx5_ifc_pmpc_reg_bits {
 };
 
 struct mlx5_ifc_pmlpn_reg_bits {
-       u8         reserved_0[0x4];
+       u8         reserved_at_0[0x4];
        u8         mlpn_status[0x4];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
        u8         e[0x1];
-       u8         reserved_2[0x1f];
+       u8         reserved_at_21[0x1f];
 };
 
 struct mlx5_ifc_pmlp_reg_bits {
        u8         rxtx[0x1];
-       u8         reserved_0[0x7];
+       u8         reserved_at_1[0x7];
        u8         local_port[0x8];
-       u8         reserved_1[0x8];
+       u8         reserved_at_10[0x8];
        u8         width[0x8];
 
        u8         lane0_module_mapping[0x20];
@@ -6481,36 +6503,36 @@ struct mlx5_ifc_pmlp_reg_bits {
 
        u8         lane3_module_mapping[0x20];
 
-       u8         reserved_2[0x160];
+       u8         reserved_at_a0[0x160];
 };
 
 struct mlx5_ifc_pmaos_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         module[0x8];
-       u8         reserved_1[0x4];
+       u8         reserved_at_10[0x4];
        u8         admin_status[0x4];
-       u8         reserved_2[0x4];
+       u8         reserved_at_18[0x4];
        u8         oper_status[0x4];
 
        u8         ase[0x1];
        u8         ee[0x1];
-       u8         reserved_3[0x1c];
+       u8         reserved_at_22[0x1c];
        u8         e[0x2];
 
-       u8         reserved_4[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_plpc_reg_bits {
-       u8         reserved_0[0x4];
+       u8         reserved_at_0[0x4];
        u8         profile_id[0xc];
-       u8         reserved_1[0x4];
+       u8         reserved_at_10[0x4];
        u8         proto_mask[0x4];
-       u8         reserved_2[0x8];
+       u8         reserved_at_18[0x8];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_20[0x10];
        u8         lane_speed[0x10];
 
-       u8         reserved_4[0x17];
+       u8         reserved_at_40[0x17];
        u8         lpbf[0x1];
        u8         fec_mode_policy[0x8];
 
@@ -6523,44 +6545,44 @@ struct mlx5_ifc_plpc_reg_bits {
        u8         retransmission_request_admin[0x8];
        u8         fec_mode_request_admin[0x18];
 
-       u8         reserved_5[0x80];
+       u8         reserved_at_c0[0x80];
 };
 
 struct mlx5_ifc_plib_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x8];
+       u8         reserved_at_10[0x8];
        u8         ib_port[0x8];
 
-       u8         reserved_2[0x60];
+       u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_plbf_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0xd];
+       u8         reserved_at_10[0xd];
        u8         lbf_mode[0x3];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_pipg_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
        u8         dic[0x1];
-       u8         reserved_2[0x19];
+       u8         reserved_at_21[0x19];
        u8         ipg[0x4];
-       u8         reserved_3[0x2];
+       u8         reserved_at_3e[0x2];
 };
 
 struct mlx5_ifc_pifr_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0xe0];
+       u8         reserved_at_20[0xe0];
 
        u8         port_filter[8][0x20];
 
@@ -6568,36 +6590,36 @@ struct mlx5_ifc_pifr_reg_bits {
 };
 
 struct mlx5_ifc_pfcc_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
        u8         ppan[0x4];
-       u8         reserved_2[0x4];
+       u8         reserved_at_24[0x4];
        u8         prio_mask_tx[0x8];
-       u8         reserved_3[0x8];
+       u8         reserved_at_30[0x8];
        u8         prio_mask_rx[0x8];
 
        u8         pptx[0x1];
        u8         aptx[0x1];
-       u8         reserved_4[0x6];
+       u8         reserved_at_42[0x6];
        u8         pfctx[0x8];
-       u8         reserved_5[0x10];
+       u8         reserved_at_50[0x10];
 
        u8         pprx[0x1];
        u8         aprx[0x1];
-       u8         reserved_6[0x6];
+       u8         reserved_at_62[0x6];
        u8         pfcrx[0x8];
-       u8         reserved_7[0x10];
+       u8         reserved_at_70[0x10];
 
-       u8         reserved_8[0x80];
+       u8         reserved_at_80[0x80];
 };
 
 struct mlx5_ifc_pelc_reg_bits {
        u8         op[0x4];
-       u8         reserved_0[0x4];
+       u8         reserved_at_4[0x4];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
        u8         op_admin[0x8];
        u8         op_capability[0x8];
@@ -6612,28 +6634,28 @@ struct mlx5_ifc_pelc_reg_bits {
 
        u8         active[0x40];
 
-       u8         reserved_2[0x80];
+       u8         reserved_at_140[0x80];
 };
 
 struct mlx5_ifc_peir_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_2[0xc];
+       u8         reserved_at_20[0xc];
        u8         error_count[0x4];
-       u8         reserved_3[0x10];
+       u8         reserved_at_30[0x10];
 
-       u8         reserved_4[0xc];
+       u8         reserved_at_40[0xc];
        u8         lane[0x4];
-       u8         reserved_5[0x8];
+       u8         reserved_at_50[0x8];
        u8         error_type[0x8];
 };
 
 struct mlx5_ifc_pcap_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         local_port[0x8];
-       u8         reserved_1[0x10];
+       u8         reserved_at_10[0x10];
 
        u8         port_capability_mask[4][0x20];
 };
@@ -6641,46 +6663,46 @@ struct mlx5_ifc_pcap_reg_bits {
 struct mlx5_ifc_paos_reg_bits {
        u8         swid[0x8];
        u8         local_port[0x8];
-       u8         reserved_0[0x4];
+       u8         reserved_at_10[0x4];
        u8         admin_status[0x4];
-       u8         reserved_1[0x4];
+       u8         reserved_at_18[0x4];
        u8         oper_status[0x4];
 
        u8         ase[0x1];
        u8         ee[0x1];
-       u8         reserved_2[0x1c];
+       u8         reserved_at_22[0x1c];
        u8         e[0x2];
 
-       u8         reserved_3[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_pamp_reg_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         opamp_group[0x8];
-       u8         reserved_1[0xc];
+       u8         reserved_at_10[0xc];
        u8         opamp_group_type[0x4];
 
        u8         start_index[0x10];
-       u8         reserved_2[0x4];
+       u8         reserved_at_30[0x4];
        u8         num_of_indices[0xc];
 
        u8         index_data[18][0x10];
 };
 
 struct mlx5_ifc_lane_2_module_mapping_bits {
-       u8         reserved_0[0x6];
+       u8         reserved_at_0[0x6];
        u8         rx_lane[0x2];
-       u8         reserved_1[0x6];
+       u8         reserved_at_8[0x6];
        u8         tx_lane[0x2];
-       u8         reserved_2[0x8];
+       u8         reserved_at_10[0x8];
        u8         module[0x8];
 };
 
 struct mlx5_ifc_bufferx_reg_bits {
-       u8         reserved_0[0x6];
+       u8         reserved_at_0[0x6];
        u8         lossy[0x1];
        u8         epsb[0x1];
-       u8         reserved_1[0xc];
+       u8         reserved_at_8[0xc];
        u8         size[0xc];
 
        u8         xoff_threshold[0x10];
@@ -6692,21 +6714,21 @@ struct mlx5_ifc_set_node_in_bits {
 };
 
 struct mlx5_ifc_register_power_settings_bits {
-       u8         reserved_0[0x18];
+       u8         reserved_at_0[0x18];
        u8         power_settings_level[0x8];
 
-       u8         reserved_1[0x60];
+       u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_register_host_endianness_bits {
        u8         he[0x1];
-       u8         reserved_0[0x1f];
+       u8         reserved_at_1[0x1f];
 
-       u8         reserved_1[0x60];
+       u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_umr_pointer_desc_argument_bits {
-       u8         reserved_0[0x20];
+       u8         reserved_at_0[0x20];
 
        u8         mkey[0x20];
 
@@ -6719,7 +6741,7 @@ struct mlx5_ifc_ud_adrs_vector_bits {
        u8         dc_key[0x40];
 
        u8         ext[0x1];
-       u8         reserved_0[0x7];
+       u8         reserved_at_41[0x7];
        u8         destination_qp_dct[0x18];
 
        u8         static_rate[0x4];
@@ -6728,7 +6750,7 @@ struct mlx5_ifc_ud_adrs_vector_bits {
        u8         mlid[0x7];
        u8         rlid_udp_sport[0x10];
 
-       u8         reserved_1[0x20];
+       u8         reserved_at_80[0x20];
 
        u8         rmac_47_16[0x20];
 
@@ -6736,9 +6758,9 @@ struct mlx5_ifc_ud_adrs_vector_bits {
        u8         tclass[0x8];
        u8         hop_limit[0x8];
 
-       u8         reserved_2[0x1];
+       u8         reserved_at_e0[0x1];
        u8         grh[0x1];
-       u8         reserved_3[0x2];
+       u8         reserved_at_e2[0x2];
        u8         src_addr_index[0x8];
        u8         flow_label[0x14];
 
@@ -6746,27 +6768,27 @@ struct mlx5_ifc_ud_adrs_vector_bits {
 };
 
 struct mlx5_ifc_pages_req_event_bits {
-       u8         reserved_0[0x10];
+       u8         reserved_at_0[0x10];
        u8         function_id[0x10];
 
        u8         num_pages[0x20];
 
-       u8         reserved_1[0xa0];
+       u8         reserved_at_40[0xa0];
 };
 
 struct mlx5_ifc_eqe_bits {
-       u8         reserved_0[0x8];
+       u8         reserved_at_0[0x8];
        u8         event_type[0x8];
-       u8         reserved_1[0x8];
+       u8         reserved_at_10[0x8];
        u8         event_sub_type[0x8];
 
-       u8         reserved_2[0xe0];
+       u8         reserved_at_20[0xe0];
 
        union mlx5_ifc_event_auto_bits event_data;
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_1e0[0x10];
        u8         signature[0x8];
-       u8         reserved_4[0x7];
+       u8         reserved_at_1f8[0x7];
        u8         owner[0x1];
 };
 
@@ -6776,14 +6798,14 @@ enum {
 
 struct mlx5_ifc_cmd_queue_entry_bits {
        u8         type[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         input_length[0x20];
 
        u8         input_mailbox_pointer_63_32[0x20];
 
        u8         input_mailbox_pointer_31_9[0x17];
-       u8         reserved_1[0x9];
+       u8         reserved_at_77[0x9];
 
        u8         command_input_inline_data[16][0x8];
 
@@ -6792,20 +6814,20 @@ struct mlx5_ifc_cmd_queue_entry_bits {
        u8         output_mailbox_pointer_63_32[0x20];
 
        u8         output_mailbox_pointer_31_9[0x17];
-       u8         reserved_2[0x9];
+       u8         reserved_at_1b7[0x9];
 
        u8         output_length[0x20];
 
        u8         token[0x8];
        u8         signature[0x8];
-       u8         reserved_3[0x8];
+       u8         reserved_at_1f0[0x8];
        u8         status[0x7];
        u8         ownership[0x1];
 };
 
 struct mlx5_ifc_cmd_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
@@ -6814,9 +6836,9 @@ struct mlx5_ifc_cmd_out_bits {
 
 struct mlx5_ifc_cmd_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
        u8         command[0][0x20];
@@ -6825,16 +6847,16 @@ struct mlx5_ifc_cmd_in_bits {
 struct mlx5_ifc_cmd_if_box_bits {
        u8         mailbox_data[512][0x8];
 
-       u8         reserved_0[0x180];
+       u8         reserved_at_1000[0x180];
 
        u8         next_pointer_63_32[0x20];
 
        u8         next_pointer_31_10[0x16];
-       u8         reserved_1[0xa];
+       u8         reserved_at_11b6[0xa];
 
        u8         block_number[0x20];
 
-       u8         reserved_2[0x8];
+       u8         reserved_at_11e0[0x8];
        u8         token[0x8];
        u8         ctrl_signature[0x8];
        u8         signature[0x8];
@@ -6844,7 +6866,7 @@ struct mlx5_ifc_mtt_bits {
        u8         ptag_63_32[0x20];
 
        u8         ptag_31_8[0x18];
-       u8         reserved_0[0x6];
+       u8         reserved_at_38[0x6];
        u8         wr_en[0x1];
        u8         rd_en[0x1];
 };
@@ -6882,38 +6904,38 @@ struct mlx5_ifc_initial_seg_bits {
        u8         cmd_interface_rev[0x10];
        u8         fw_rev_subminor[0x10];
 
-       u8         reserved_0[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         cmdq_phy_addr_63_32[0x20];
 
        u8         cmdq_phy_addr_31_12[0x14];
-       u8         reserved_1[0x2];
+       u8         reserved_at_b4[0x2];
        u8         nic_interface[0x2];
        u8         log_cmdq_size[0x4];
        u8         log_cmdq_stride[0x4];
 
        u8         command_doorbell_vector[0x20];
 
-       u8         reserved_2[0xf00];
+       u8         reserved_at_e0[0xf00];
 
        u8         initializing[0x1];
-       u8         reserved_3[0x4];
+       u8         reserved_at_fe1[0x4];
        u8         nic_interface_supported[0x3];
-       u8         reserved_4[0x18];
+       u8         reserved_at_fe8[0x18];
 
        struct mlx5_ifc_health_buffer_bits health_buffer;
 
        u8         no_dram_nic_offset[0x20];
 
-       u8         reserved_5[0x6e40];
+       u8         reserved_at_1220[0x6e40];
 
-       u8         reserved_6[0x1f];
+       u8         reserved_at_8060[0x1f];
        u8         clear_int[0x1];
 
        u8         health_syndrome[0x8];
        u8         health_counter[0x18];
 
-       u8         reserved_7[0x17fc0];
+       u8         reserved_at_80a0[0x17fc0];
 };
 
 union mlx5_ifc_ports_control_registers_document_bits {
@@ -6958,44 +6980,44 @@ union mlx5_ifc_ports_control_registers_document_bits {
        struct mlx5_ifc_pvlc_reg_bits pvlc_reg;
        struct mlx5_ifc_slrg_reg_bits slrg_reg;
        struct mlx5_ifc_sltp_reg_bits sltp_reg;
-       u8         reserved_0[0x60e0];
+       u8         reserved_at_0[0x60e0];
 };
 
 union mlx5_ifc_debug_enhancements_document_bits {
        struct mlx5_ifc_health_buffer_bits health_buffer;
-       u8         reserved_0[0x200];
+       u8         reserved_at_0[0x200];
 };
 
 union mlx5_ifc_uplink_pci_interface_document_bits {
        struct mlx5_ifc_initial_seg_bits initial_seg;
-       u8         reserved_0[0x20060];
+       u8         reserved_at_0[0x20060];
 };
 
 struct mlx5_ifc_set_flow_table_root_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_flow_table_root_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x40];
+       u8         reserved_at_40[0x40];
 
        u8         table_type[0x8];
-       u8         reserved_3[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_4[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_5[0x140];
+       u8         reserved_at_c0[0x140];
 };
 
 enum {
@@ -7004,39 +7026,39 @@ enum {
 
 struct mlx5_ifc_modify_flow_table_out_bits {
        u8         status[0x8];
-       u8         reserved_0[0x18];
+       u8         reserved_at_8[0x18];
 
        u8         syndrome[0x20];
 
-       u8         reserved_1[0x40];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_flow_table_in_bits {
        u8         opcode[0x10];
-       u8         reserved_0[0x10];
+       u8         reserved_at_10[0x10];
 
-       u8         reserved_1[0x10];
+       u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_2[0x20];
+       u8         reserved_at_40[0x20];
 
-       u8         reserved_3[0x10];
+       u8         reserved_at_60[0x10];
        u8         modify_field_select[0x10];
 
        u8         table_type[0x8];
-       u8         reserved_4[0x18];
+       u8         reserved_at_88[0x18];
 
-       u8         reserved_5[0x8];
+       u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_6[0x4];
+       u8         reserved_at_c0[0x4];
        u8         table_miss_mode[0x4];
-       u8         reserved_7[0x18];
+       u8         reserved_at_c8[0x18];
 
-       u8         reserved_8[0x8];
+       u8         reserved_at_e0[0x8];
        u8         table_miss_id[0x18];
 
-       u8         reserved_9[0x100];
+       u8         reserved_at_100[0x100];
 };
 
 #endif /* MLX5_IFC_H */
index f079fb1a31f7f7953f0bb28f6f3a145279598dc7..5b8c89ffaa5830fdf05fbc40a1fce0b5d2ffa7f3 100644 (file)
@@ -85,7 +85,16 @@ enum mlx5_qp_state {
        MLX5_QP_STATE_ERR                       = 6,
        MLX5_QP_STATE_SQ_DRAINING               = 7,
        MLX5_QP_STATE_SUSPENDED                 = 9,
-       MLX5_QP_NUM_STATE
+       MLX5_QP_NUM_STATE,
+       MLX5_QP_STATE,
+       MLX5_QP_STATE_BAD,
+};
+
+enum {
+       MLX5_SQ_STATE_NA        = MLX5_SQC_STATE_ERR + 1,
+       MLX5_SQ_NUM_STATE       = MLX5_SQ_STATE_NA + 1,
+       MLX5_RQ_STATE_NA        = MLX5_RQC_STATE_ERR + 1,
+       MLX5_RQ_NUM_STATE       = MLX5_RQ_STATE_NA + 1,
 };
 
 enum {
@@ -130,6 +139,9 @@ enum {
        MLX5_QP_BIT_RWE                         = 1 << 14,
        MLX5_QP_BIT_RAE                         = 1 << 13,
        MLX5_QP_BIT_RIC                         = 1 <<  4,
+       MLX5_QP_BIT_CC_SLAVE_RECV               = 1 <<  2,
+       MLX5_QP_BIT_CC_SLAVE_SEND               = 1 <<  1,
+       MLX5_QP_BIT_CC_MASTER                   = 1 <<  0
 };
 
 enum {
@@ -248,8 +260,12 @@ struct mlx5_av {
        __be32  dqp_dct;
        u8      stat_rate_sl;
        u8      fl_mlid;
-       __be16  rlid;
-       u8      reserved0[10];
+       union {
+               __be16  rlid;
+               __be16  udp_sport;
+       };
+       u8      reserved0[4];
+       u8      rmac[6];
        u8      tclass;
        u8      hop_limit;
        __be32  grh_gid_fl;
@@ -456,11 +472,16 @@ struct mlx5_qp_path {
        u8                      static_rate;
        u8                      hop_limit;
        __be32                  tclass_flowlabel;
-       u8                      rgid[16];
-       u8                      rsvd1[4];
-       u8                      sl;
+       union {
+               u8              rgid[16];
+               u8              rip[16];
+       };
+       u8                      f_dscp_ecn_prio;
+       u8                      ecn_dscp;
+       __be16                  udp_sport;
+       u8                      dci_cfi_prio_sl;
        u8                      port;
-       u8                      rsvd2[6];
+       u8                      rmac[6];
 };
 
 struct mlx5_qp_context {
@@ -620,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,
                        int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
-                       enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
                        struct mlx5_modify_qp_mbox_in *in, int sqd_event,
                        struct mlx5_core_qp *qp);
 int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
@@ -639,6 +659,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
                                u8 context, int error);
 #endif
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *rq);
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *sq);
 
 static inline const char *mlx5_qp_type_str(int type)
 {
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
new file mode 100644 (file)
index 0000000..88441f5
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __TRANSOBJ_H__
+#define __TRANSOBJ_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
+int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                       u32 *rqn);
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
+void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
+int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                       u32 *sqn);
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
+void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *tirn);
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *tisn);
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
+int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *rmpn);
+int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
+int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                         u32 *rmpn);
+int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+
+int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *rqtn);
+int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
+
+#endif /* __TRANSOBJ_H__ */
index 638f2ca7a527c01f63fe2c3b3cd8013fbaf70c86..123771003e68586571690f9a5211e5afaa0143e3 100644 (file)
@@ -45,6 +45,11 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 *addr);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
                                      u16 vport, u8 *addr);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+                                          u64 *system_image_guid);
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+                                       u16 *qkey_viol_cntr);
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
                             u8 port_num, u16  vf_num, u16 gid_index,
                             union ib_gid *gid);
@@ -85,4 +90,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
                                u16 vlans[],
                                int list_size);
 
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+
 #endif /* __MLX5_VPORT_H__ */
index f1cd22f2df1ac50438e7d70bb0df85c44580b8d3..516e149443397dcf712e4e62aca6fb3c78e542b6 100644 (file)
@@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSUP
 #else
-#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSDOWN
 #endif
 
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
@@ -1341,8 +1343,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
                !vma_growsup(vma->vm_next, addr);
 }
 
-extern struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group);
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
                unsigned long old_addr, struct vm_area_struct *new_vma,
index d3ebb9d21a5334d26e85bc865d318535f5864569..624b78b848b89fae506faa1f4f48ce86297ceace 100644 (file)
@@ -424,9 +424,9 @@ struct mm_struct {
        unsigned long total_vm;         /* Total pages mapped */
        unsigned long locked_vm;        /* Pages that have PG_mlocked set */
        unsigned long pinned_vm;        /* Refcount permanently increased */
-       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED/GROWSDOWN */
-       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE */
-       unsigned long stack_vm;         /* VM_GROWSUP/DOWN */
+       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+       unsigned long stack_vm;         /* VM_STACK */
        unsigned long def_flags;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long start_brk, brk, start_stack;
index 33bb1b19273e3ad165382f4c7f0f3cd8f8c0b407..7b6c2cfee390e939e13b0f14c92224bcb3aafcf4 100644 (file)
@@ -682,6 +682,12 @@ typedef struct pglist_data {
         */
        unsigned long first_deferred_pfn;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spinlock_t split_queue_lock;
+       struct list_head split_queue;
+       unsigned long split_queue_len;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)        (NODE_DATA(nid)->node_present_pages)
index 4560d8f1545d2cec7e6da19da460fff26151c105..2bb0c308570672e7105b14f85a4215b0f9500207 100644 (file)
@@ -324,6 +324,12 @@ struct module_layout {
 #define __module_layout_align
 #endif
 
+struct mod_kallsyms {
+       Elf_Sym *symtab;
+       unsigned int num_symtab;
+       char *strtab;
+};
+
 struct module {
        enum module_state state;
 
@@ -405,15 +411,10 @@ struct module {
 #endif
 
 #ifdef CONFIG_KALLSYMS
-       /*
-        * We keep the symbol and string tables for kallsyms.
-        * The core_* fields below are temporary, loader-only (they
-        * could really be discarded after module init).
-        */
-       Elf_Sym *symtab, *core_symtab;
-       unsigned int num_symtab, core_num_syms;
-       char *strtab, *core_strtab;
-
+       /* Protected by RCU and/or module_mutex: use rcu_dereference() */
+       struct mod_kallsyms *kallsyms;
+       struct mod_kallsyms core_kallsyms;
+       
        /* Section attributes */
        struct module_sect_attrs *sect_attrs;
 
index 5ac140dcb789499e51b951fa45fb34378d636407..5440b7b705eb1756f4d095e6eab3518893245568 100644 (file)
@@ -512,7 +512,6 @@ static inline void napi_enable(struct napi_struct *n)
        clear_bit(NAPI_STATE_NPSVC, &n->state);
 }
 
-#ifdef CONFIG_SMP
 /**
  *     napi_synchronize - wait until NAPI is not running
  *     @n: napi context
@@ -523,12 +522,12 @@ static inline void napi_enable(struct napi_struct *n)
  */
 static inline void napi_synchronize(const struct napi_struct *n)
 {
-       while (test_bit(NAPI_STATE_SCHED, &n->state))
-               msleep(1);
+       if (IS_ENABLED(CONFIG_SMP))
+               while (test_bit(NAPI_STATE_SCHED, &n->state))
+                       msleep(1);
+       else
+               barrier();
 }
-#else
-# define napi_synchronize(n)   barrier()
-#endif
 
 enum netdev_queue_state_t {
        __QUEUE_STATE_DRV_XOFF,
@@ -3719,7 +3718,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 void *netdev_lower_get_next(struct net_device *dev,
                                struct list_head **iter);
 #define netdev_for_each_lower_dev(dev, ldev, iter) \
-       for (iter = &(dev)->adj_list.lower, \
+       for (iter = (dev)->adj_list.lower.next, \
             ldev = netdev_lower_get_next(dev, &(iter)); \
             ldev; \
             ldev = netdev_lower_get_next(dev, &(iter)))
index 48e0320cd6432463b86cc3d7684d5ecd4ab48651..67300f8e5f2f0248e8651cbe17e5c14e1110ca2c 100644 (file)
@@ -550,9 +550,7 @@ extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 
 static inline loff_t nfs_size_to_loff_t(__u64 size)
 {
-       if (size > (__u64) OFFSET_MAX - 1)
-               return OFFSET_MAX - 1;
-       return (loff_t) size;
+       return min_t(u64, size, OFFSET_MAX);
 }
 
 static inline ino_t
index 791098a08a8765bea64b4137baaf12ef4af62ac8..d320906cf13e5d7d3d443a6249992202eb171f59 100644 (file)
@@ -275,6 +275,7 @@ struct nfs4_layoutcommit_args {
        size_t layoutupdate_len;
        struct page *layoutupdate_page;
        struct page **layoutupdate_pages;
+       __be32 *start_p;
 };
 
 struct nfs4_layoutcommit_res {
index 3af5f454c04ac17f3e45d2578519d9fe2f919f47..a55986f6fe38e087e9996415f6469db64d5a3238 100644 (file)
 
 #include <linux/types.h>
 
-struct nvme_bar {
-       __u64                   cap;    /* Controller Capabilities */
-       __u32                   vs;     /* Version */
-       __u32                   intms;  /* Interrupt Mask Set */
-       __u32                   intmc;  /* Interrupt Mask Clear */
-       __u32                   cc;     /* Controller Configuration */
-       __u32                   rsvd1;  /* Reserved */
-       __u32                   csts;   /* Controller Status */
-       __u32                   nssr;   /* Subsystem Reset */
-       __u32                   aqa;    /* Admin Queue Attributes */
-       __u64                   asq;    /* Admin SQ Base Address */
-       __u64                   acq;    /* Admin CQ Base Address */
-       __u32                   cmbloc; /* Controller Memory Buffer Location */
-       __u32                   cmbsz;  /* Controller Memory Buffer Size */
+enum {
+       NVME_REG_CAP    = 0x0000,       /* Controller Capabilities */
+       NVME_REG_VS     = 0x0008,       /* Version */
+       NVME_REG_INTMS  = 0x000c,       /* Interrupt Mask Set */
+       NVME_REG_INTMC  = 0x0010,       /* Interrupt Mask Set */
+       NVME_REG_CC     = 0x0014,       /* Controller Configuration */
+       NVME_REG_CSTS   = 0x001c,       /* Controller Status */
+       NVME_REG_NSSR   = 0x0020,       /* NVM Subsystem Reset */
+       NVME_REG_AQA    = 0x0024,       /* Admin Queue Attributes */
+       NVME_REG_ASQ    = 0x0028,       /* Admin SQ Base Address */
+       NVME_REG_ACQ    = 0x0030,       /* Admin SQ Base Address */
+       NVME_REG_CMBLOC = 0x0038,       /* Controller Memory Buffer Location */
+       NVME_REG_CMBSZ  = 0x003c,       /* Controller Memory Buffer Size */
 };
 
 #define NVME_CAP_MQES(cap)     ((cap) & 0xffff)
index dd10626a615fb160587ecf09f860de36ba583c19..dc6e39696b64c8e925601f598b9cdab5684f1970 100644 (file)
@@ -929,7 +929,7 @@ static inline int of_get_available_child_count(const struct device_node *np)
        return num;
 }
 
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && !defined(MODULE)
 #define _OF_DECLARE(table, name, compat, fn, fn_type)                  \
        static const struct of_device_id __of_table_##name              \
                __used __section(__##table##_of_table)                  \
index 4d08b6c33557250edda8e949ff64f5f2fd1ffdc1..92395a0a7dc5c436cdd43317a798b6d15eba5a5e 100644 (file)
@@ -361,6 +361,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
                               unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
                        int tag, unsigned int nr_pages, struct page **pages);
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+                       int tag, unsigned int nr_entries,
+                       struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
                        pgoff_t index, unsigned flags);
index 27df4a6585daedcc6a74865bf048cfd06a0593ba..27716254dcc54bd6d6ca5e864f58e92fd94ea305 100644 (file)
@@ -988,23 +988,6 @@ static inline int pci_is_managed(struct pci_dev *pdev)
        return pdev->is_managed;
 }
 
-static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq)
-{
-       pdev->irq = irq;
-       pdev->irq_managed = 1;
-}
-
-static inline void pci_reset_managed_irq(struct pci_dev *pdev)
-{
-       pdev->irq = 0;
-       pdev->irq_managed = 0;
-}
-
-static inline bool pci_has_managed_irq(struct pci_dev *pdev)
-{
-       return pdev->irq_managed && pdev->irq > 0;
-}
-
 void pci_disable_device(struct pci_dev *dev);
 
 extern unsigned int pcibios_max_latency;
index f9828a48f16addab4737683f3c8345ab87e52a0a..f5c5a3fa2c8101cc37ea917d29dec30713ec0699 100644 (file)
@@ -397,6 +397,7 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
+       PERF_EVENT_STATE_DEAD           = -4,
        PERF_EVENT_STATE_EXIT           = -3,
        PERF_EVENT_STATE_ERROR          = -2,
        PERF_EVENT_STATE_OFF            = -1,
@@ -634,9 +635,6 @@ struct perf_event_context {
        int                             nr_cgroups;      /* cgroup evts */
        void                            *task_ctx_data; /* pmu specific data */
        struct rcu_head                 rcu_head;
-
-       struct delayed_work             orphans_remove;
-       bool                            orphans_remove_sched;
 };
 
 /*
@@ -729,7 +727,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -908,7 +906,7 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
        }
 }
 
-extern struct static_key_deferred perf_sched_events;
+extern struct static_key_false perf_sched_events;
 
 static __always_inline bool
 perf_sw_migrate_enabled(void)
@@ -927,7 +925,7 @@ static inline void perf_event_task_migrate(struct task_struct *task)
 static inline void perf_event_task_sched_in(struct task_struct *prev,
                                            struct task_struct *task)
 {
-       if (static_key_false(&perf_sched_events.key))
+       if (static_branch_unlikely(&perf_sched_events))
                __perf_event_task_sched_in(prev, task);
 
        if (perf_sw_migrate_enabled() && task->sched_migrated) {
@@ -944,7 +942,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
 {
        perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
 
-       if (static_key_false(&perf_sched_events.key))
+       if (static_branch_unlikely(&perf_sched_events))
                __perf_event_task_sched_out(prev, next);
 }
 
@@ -1044,7 +1042,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
 extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
@@ -1070,7 +1068,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)     { }
 static inline void perf_event_free_task(struct task_struct *task)      { }
 static inline void perf_event_delayed_put(struct task_struct *task)    { }
-static inline struct perf_event *perf_event_get(unsigned int fd)       { return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)     { return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
        return ERR_PTR(-EINVAL);
index 2d8e49711b6392a591ee352b12c162096d4c89f6..1132953235c0708bd87e1b9eea777bb705f54cb3 100644 (file)
@@ -10,7 +10,7 @@
  * backing is indicated by flags in the high bits of the value.
  */
 typedef struct {
-       unsigned long val;
+       u64 val;
 } pfn_t;
 #endif
 
index 0703b5360d31b5fa599a541bce9881941a170281..94994810c7c086e8410f3333d919ef5256595ad5 100644 (file)
@@ -9,14 +9,13 @@
  * PFN_DEV - pfn is not covered by system memmap by default
  * PFN_MAP - pfn has a dynamic page mapping established by a device driver
  */
-#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \
-               << (BITS_PER_LONG - PAGE_SHIFT))
-#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1))
-#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2))
-#define PFN_DEV (1UL << (BITS_PER_LONG - 3))
-#define PFN_MAP (1UL << (BITS_PER_LONG - 4))
-
-static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags)
+#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
+#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
+#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
+#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
+#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
+
+static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
 {
        pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
 
@@ -29,7 +28,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
        return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
+extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
@@ -48,7 +47,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
        return NULL;
 }
 
-static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
 {
        return PFN_PHYS(pfn_t_to_pfn(pfn));
 }
@@ -87,7 +86,7 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
 #ifdef __HAVE_ARCH_PTE_DEVMAP
 static inline bool pfn_t_devmap(pfn_t pfn)
 {
-       const unsigned long flags = PFN_DEV|PFN_MAP;
+       const u64 flags = PFN_DEV|PFN_MAP;
 
        return (pfn.val & flags) == flags;
 }
index eb8b8ac6df3c844e2bd84903e0a50ff07f1575fe..24f5470d394460c779ebdbd385d8f14766f14001 100644 (file)
@@ -42,6 +42,7 @@ struct pipe_buffer {
  *     @fasync_readers: reader side fasync
  *     @fasync_writers: writer side fasync
  *     @bufs: the circular array of pipe buffers
+ *     @user: the user who created this pipe
  **/
 struct pipe_inode_info {
        struct mutex mutex;
@@ -57,6 +58,7 @@ struct pipe_inode_info {
        struct fasync_struct *fasync_readers;
        struct fasync_struct *fasync_writers;
        struct pipe_buffer *bufs;
+       struct user_struct *user;
 };
 
 /*
@@ -123,6 +125,8 @@ void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
 
 extern unsigned int pipe_max_size, pipe_min_size;
+extern unsigned long pipe_user_pages_hard;
+extern unsigned long pipe_user_pages_soft;
 int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 
 
diff --git a/include/linux/platform_data/sdhci-pic32.h b/include/linux/platform_data/sdhci-pic32.h
new file mode 100644 (file)
index 0000000..7e0efe6
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef __PIC32_SDHCI_PDATA_H__
+#define __PIC32_SDHCI_PDATA_H__
+
+struct pic32_sdhci_platform_data {
+       /* read & write fifo threshold */
+       int (*setup_dma)(u32 rfifo, u32 wfifo);
+};
+
+#endif
index acfea8ce4a07be8b6291c76ca0f53728b06ae5fa..7c3d11a6b4ad5d8d35b68a33f87801865adada8d 100644 (file)
@@ -53,12 +53,18 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 {
        BUG();
 }
+
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+{
+       BUG();
+}
 #endif
 
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
  * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
+ * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
+ * and arch_has_wmb_pmem().
  */
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
 {
@@ -178,4 +184,18 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
        else
                default_clear_pmem(addr, size);
 }
+
+/**
+ * wb_cache_pmem - write back processor cache for PMEM memory range
+ * @addr:      virtual start address
+ * @size:      number of bytes to write back
+ *
+ * Write back the processor cache range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+{
+       if (arch_has_pmem_api())
+               arch_wb_cache_pmem(addr, size);
+}
 #endif /* __PMEM_H__ */
index 998d8f1c3c9172037ef185963f415dbb0fad999f..b50c0492629dbfa9f902f214ad714f1c2df372bc 100644 (file)
@@ -49,6 +49,7 @@ struct bq27xxx_reg_cache {
 
 struct bq27xxx_device_info {
        struct device *dev;
+       int id;
        enum bq27xxx_chip chip;
        const char *name;
        struct bq27xxx_access_methods bus;
index 57e7d87d2d4c15de691d8fa3075f779ddbda5172..f54be708220760f9a367f4a29ca1cbe7cfb7fba2 100644 (file)
 #define RADIX_TREE_EXCEPTIONAL_ENTRY   2
 #define RADIX_TREE_EXCEPTIONAL_SHIFT   2
 
+#define RADIX_DAX_MASK 0xf
+#define RADIX_DAX_SHIFT        4
+#define RADIX_DAX_PTE  (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_PMD  (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
+#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
+#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
+               RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
+
 static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
        return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
@@ -369,13 +378,29 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 void **radix_tree_next_chunk(struct radix_tree_root *root,
                             struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_retry - retry this chunk of the iteration
+ * @iter:      iterator state
+ *
+ * If we iterate over a tree protected only by the RCU lock, a race
+ * against deletion or creation may result in seeing a slot for which
+ * radix_tree_deref_retry() returns true.  If so, call this function
+ * and continue the iteration.
+ */
+static inline __must_check
+void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+{
+       iter->next_index = iter->index;
+       return NULL;
+}
+
 /**
  * radix_tree_chunk_size - get current chunk size
  *
  * @iter:      pointer to radix tree iterator
  * Returns:    current chunk size
  */
-static __always_inline unsigned
+static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
        return iter->next_index - iter->index;
@@ -409,9 +434,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
                        return slot + offset + 1;
                }
        } else {
-               unsigned size = radix_tree_chunk_size(iter) - 1;
+               long size = radix_tree_chunk_size(iter);
 
-               while (size--) {
+               while (--size > 0) {
                        slot++;
                        iter->index++;
                        if (likely(*slot))
index a7a06d1dcf9cb17ee65d1befe1ef6ac62d882d16..a0118d5929a9c9ed17df3b75a8b599472ff1f205 100644 (file)
@@ -152,6 +152,8 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 
 # define jiffies       raid6_jiffies()
 # define printk        printf
+# define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+# define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
 # define GFP_KERNEL    0
 # define __get_free_pages(x, y)        ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
                                                     PROT_READ|PROT_WRITE,   \
index a75840c1aa71414acc43468da6cc8f83ec06c1b8..9c29122037f95283cc02b8af995975e80e059abd 100644 (file)
@@ -34,6 +34,7 @@ extern const struct file_operations random_fops, urandom_fops;
 #endif
 
 unsigned int get_random_int(void);
+unsigned long get_random_long(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);
index bdf597c4f0be82965911266ed1668d06510492cb..a07f42bedda32687c94ed574f5a606b19912b042 100644 (file)
@@ -109,20 +109,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma)
                __put_anon_vma(anon_vma);
 }
 
-static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma *anon_vma = vma->anon_vma;
-       if (anon_vma)
-               down_write(&anon_vma->root->rwsem);
-}
-
-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma *anon_vma = vma->anon_vma;
-       if (anon_vma)
-               up_write(&anon_vma->root->rwsem);
-}
-
 static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
        down_write(&anon_vma->root->rwsem);
index f1e81e128592cbcf5b3c3e61949bcbe75b34c74f..a10494a94cc30f24d65ab866771833883c6f886d 100644 (file)
@@ -835,6 +835,7 @@ struct user_struct {
 #endif
        unsigned long locked_shm; /* How many pages of mlocked shm ? */
        unsigned long unix_inflight;    /* How many files in flight in unix sockets */
+       atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
 
 #ifdef CONFIG_KEYS
        struct key *uid_keyring;        /* UID specific keyring */
index a43f41cb3c430166afc7fda3f09571007fe74acf..4d4780c00d345e9e5fd50f5394b89d93c57c966b 100644 (file)
@@ -15,10 +15,7 @@ struct shmem_inode_info {
        unsigned int            seals;          /* shmem seals */
        unsigned long           flags;
        unsigned long           alloced;        /* data pages alloced to file */
-       union {
-               unsigned long   swapped;        /* subtotal assigned to swap */
-               char            *symlink;       /* unswappable short symlink */
-       };
+       unsigned long           swapped;        /* subtotal assigned to swap */
        struct shared_policy    policy;         /* NUMA memory alloc policy */
        struct list_head        swaplist;       /* chain of maybes on swap */
        struct simple_xattrs    xattrs;         /* list of xattrs */
index 11f935c1a090419d6cda938aa925bfa79de3616b..4ce9ff7086f4897a67f6037d88df21012cf28413 100644 (file)
@@ -299,6 +299,7 @@ struct sk_buff;
 #else
 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
 #endif
+extern int sysctl_max_skb_frags;
 
 typedef struct skb_frag_struct skb_frag_t;
 
index 343c13ac4f71f41f370b3bd51c8cd38c9ea75c2f..35cb9264e0d50bb8b8cce8f569d950de2d6694b2 100644 (file)
@@ -44,6 +44,7 @@
 
 #define KNAV_DMA_NUM_EPIB_WORDS                        4
 #define KNAV_DMA_NUM_PS_WORDS                  16
+#define KNAV_DMA_NUM_SW_DATA_WORDS             4
 #define KNAV_DMA_FDQ_PER_CHAN                  4
 
 /* Tx channel scheduling priority */
@@ -142,6 +143,7 @@ struct knav_dma_cfg {
  * @orig_buff:                 buff pointer since 'buff' can be overwritten
  * @epib:                      Extended packet info block
  * @psdata:                    Protocol specific
+ * @sw_data:                   Software private data not touched by h/w
  */
 struct knav_dma_desc {
        __le32  desc_info;
@@ -154,7 +156,7 @@ struct knav_dma_desc {
        __le32  orig_buff;
        __le32  epib[KNAV_DMA_NUM_EPIB_WORDS];
        __le32  psdata[KNAV_DMA_NUM_PS_WORDS];
-       __le32  pad[4];
+       u32     sw_data[KNAV_DMA_NUM_SW_DATA_WORDS];
 } ____cacheline_aligned;
 
 #if IS_ENABLED(CONFIG_KEYSTONE_NAVIGATOR_DMA)
index f869807a0d0e2ca93629a7d25092f268dbc8f520..5322fea6fe4c7995ae3ad0cfad15a1818356576f 100644 (file)
@@ -51,6 +51,7 @@
 /* RPC/RDMA parameters and stats */
 extern unsigned int svcrdma_ord;
 extern unsigned int svcrdma_max_requests;
+extern unsigned int svcrdma_max_bc_requests;
 extern unsigned int svcrdma_max_req_size;
 
 extern atomic_t rdma_stat_recv;
@@ -69,6 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
+       struct list_head free;
        struct svc_rdma_op_ctxt *read_hdr;
        struct svc_rdma_fastreg_mr *frmr;
        int hdr_count;
@@ -112,6 +114,7 @@ struct svc_rdma_fastreg_mr {
        struct list_head frmr_list;
 };
 struct svc_rdma_req_map {
+       struct list_head free;
        unsigned long count;
        union {
                struct kvec sge[RPCSVC_MAXPAGES];
@@ -132,28 +135,32 @@ struct svcxprt_rdma {
        int                  sc_max_sge;
        int                  sc_max_sge_rd;     /* max sge for read target */
 
-       int                  sc_sq_depth;       /* Depth of SQ */
        atomic_t             sc_sq_count;       /* Number of SQ WR on queue */
-
-       int                  sc_max_requests;   /* Depth of RQ */
+       unsigned int         sc_sq_depth;       /* Depth of SQ */
+       unsigned int         sc_rq_depth;       /* Depth of RQ */
+       u32                  sc_max_requests;   /* Forward credits */
+       u32                  sc_max_bc_requests;/* Backward credits */
        int                  sc_max_req_size;   /* Size of each RQ WR buf */
 
        struct ib_pd         *sc_pd;
 
        atomic_t             sc_dma_used;
-       atomic_t             sc_ctxt_used;
+       spinlock_t           sc_ctxt_lock;
+       struct list_head     sc_ctxts;
+       int                  sc_ctxt_used;
+       spinlock_t           sc_map_lock;
+       struct list_head     sc_maps;
+
        struct list_head     sc_rq_dto_q;
        spinlock_t           sc_rq_dto_lock;
        struct ib_qp         *sc_qp;
        struct ib_cq         *sc_rq_cq;
        struct ib_cq         *sc_sq_cq;
-       struct ib_mr         *sc_phys_mr;       /* MR for server memory */
        int                  (*sc_reader)(struct svcxprt_rdma *,
                                          struct svc_rqst *,
                                          struct svc_rdma_op_ctxt *,
                                          int *, u32 *, u32, u32, u64, bool);
        u32                  sc_dev_caps;       /* distilled device caps */
-       u32                  sc_dma_lkey;       /* local dma key */
        unsigned int         sc_frmr_pg_list_len;
        struct list_head     sc_frmr_q;
        spinlock_t           sc_frmr_q_lock;
@@ -179,8 +186,18 @@ struct svcxprt_rdma {
 #define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
+/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
+ * current NFSv4.1 implementation supports one backchannel slot.
+ */
+#define RPCRDMA_MAX_BC_REQUESTS        2
+
 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
 
+/* svc_rdma_backchannel.c */
+extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
+                                   struct rpcrdma_msg *rmsgp,
+                                   struct xdr_buf *rcvbuf);
+
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -206,6 +223,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
                                u32, u32, u64, bool);
 
 /* svc_rdma_sendto.c */
+extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
+                           struct svc_rdma_req_map *);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern struct rpcrdma_read_chunk *
        svc_rdma_get_read_chunk(struct rpcrdma_msg *);
@@ -214,13 +233,14 @@ extern struct rpcrdma_read_chunk *
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
                                enum rpcrdma_errcode);
-extern int svc_rdma_post_recv(struct svcxprt_rdma *);
+extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
-extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
+extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
+                                struct svc_rdma_req_map *);
 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
 extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
                              struct svc_rdma_fastreg_mr *);
@@ -234,6 +254,7 @@ extern struct svc_xprt_class svc_rdma_bc_class;
 #endif
 
 /* svc_rdma.c */
+extern struct workqueue_struct *svc_rdma_wq;
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
index e7a018eaf3a255db2198049f543ea3111004852a..017fced60242ecdf5eaf9daeb0b035e18333d6e3 100644 (file)
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;
index 613c29bd6baf4763e57530b794df843d9cea61ee..e13a1ace50e902ad0ae38a81f5563d67eaa4a00f 100644 (file)
@@ -43,6 +43,9 @@
 /* Default weight of a bound cooling device */
 #define THERMAL_WEIGHT_DEFAULT 0
 
+/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
+#define THERMAL_TEMP_INVALID   -274000
+
 /* Unit conversion macros */
 #define DECI_KELVIN_TO_CELSIUS(t)      ({                      \
        long _t = (t);                                          \
@@ -167,6 +170,7 @@ struct thermal_attr {
  * @forced_passive:    If > 0, temperature at which to switch on all ACPI
  *                     processor cooling devices.  Currently only used by the
  *                     step-wise governor.
+ * @need_update:       if equals 1, thermal_zone_device_update needs to be invoked.
  * @ops:       operations this &thermal_zone_device supports
  * @tzp:       thermal zone parameters
  * @governor:  pointer to the governor for this thermal zone
@@ -194,6 +198,7 @@ struct thermal_zone_device {
        int emul_temperature;
        int passive;
        unsigned int forced_passive;
+       atomic_t need_update;
        struct thermal_zone_device_ops *ops;
        struct thermal_zone_params *tzp;
        struct thermal_governor *governor;
index acd522a91539155db94143d5da4e4848a44a437e..acfdbf353a0b5bc7cfeb6ae58fa7e6a6acfa16ba 100644 (file)
  * See the file COPYING for more details.
  */
 
+#include <linux/smp.h>
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/cpumask.h>
 #include <linux/rcupdate.h>
 #include <linux/tracepoint-defs.h>
 
@@ -132,6 +134,9 @@ extern void syscall_unregfunc(void);
                void *it_func;                                          \
                void *__data;                                           \
                                                                        \
+               if (!cpu_online(raw_smp_processor_id()))                \
+                       return;                                         \
+                                                                       \
                if (!(cond))                                            \
                        return;                                         \
                prercu;                                                 \
index 2fd8708ea88893cf57e03f0ae23688ac42c9d24c..d9fb4b043f56dd45b406d3b68c59da3c860c1f43 100644 (file)
@@ -649,6 +649,7 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
 extern void __lockfunc tty_lock(struct tty_struct *tty);
+extern int  tty_lock_interruptible(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
index cbb20afdbc01cc4a600574446d5b19c33a0d90c1..bb679b48f408217a422d0cd427ede3e05e2e67bd 100644 (file)
@@ -11,4 +11,8 @@ unsigned long ucs2_strlen(const ucs2_char_t *s);
 unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength);
 int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len);
 
+unsigned long ucs2_utf8size(const ucs2_char_t *src);
+unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src,
+                          unsigned long maxlength);
+
 #endif /* _LINUX_UCS2_STRING_H_ */
index 0e32bc71245ef46b90aa21112e4d2bef42cc5950..ca73c503b92a758ad5ce9b6d022c53c0758951c5 100644 (file)
@@ -311,6 +311,7 @@ enum {
 
        __WQ_DRAINING           = 1 << 16, /* internal: workqueue is draining */
        __WQ_ORDERED            = 1 << 17, /* internal: workqueue is ordered */
+       __WQ_LEGACY             = 1 << 18, /* internal: create*_workqueue() */
 
        WQ_MAX_ACTIVE           = 512,    /* I like 512, better ideas? */
        WQ_MAX_UNBOUND_PER_CPU  = 4,      /* 4 * #cpus for unbound wq */
@@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
        alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)                                         \
-       alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
 #define create_freezable_workqueue(name)                               \
-       alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
-                       1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
+                       WQ_MEM_RECLAIM, 1, (name))
 #define create_singlethread_workqueue(name)                            \
-       alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
+       alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
index ef03ae56b1c1cc18d9541697b353a01842b50210..8a0f55b6c2ba80e25c67caf89e2050fa58ae322f 100644 (file)
@@ -533,7 +533,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
                const unsigned int requested_sizes[]);
 int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb);
 int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb);
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking);
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+                  bool nonblocking);
 
 int vb2_core_streamon(struct vb2_queue *q, unsigned int type);
 int vb2_core_streamoff(struct vb2_queue *q, unsigned int type);
index 2a91a0561a478393ca9e9d2f1993467cc4c5c9cb..9b4c418bebd84ae0a7debfbcc697ee92b136ec99 100644 (file)
@@ -6,8 +6,8 @@
 #include <linux/mutex.h>
 #include <net/sock.h>
 
-void unix_inflight(struct file *fp);
-void unix_notinflight(struct file *fp);
+void unix_inflight(struct user_struct *user, struct file *fp);
+void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct sock *unix_get_socket(struct file *filp);
index 52899291f40144c0dab97e135b0c9762ade5c8d1..5ee3c689c86370b4b325309d337611c9b51af5d1 100644 (file)
@@ -252,6 +252,12 @@ struct l2cap_conn_rsp {
 #define L2CAP_PSM_3DSP         0x0021
 #define L2CAP_PSM_IPSP         0x0023 /* 6LoWPAN */
 
+#define L2CAP_PSM_DYN_START    0x1001
+#define L2CAP_PSM_DYN_END      0xffff
+#define L2CAP_PSM_AUTO_END     0x10ff
+#define L2CAP_PSM_LE_DYN_START  0x0080
+#define L2CAP_PSM_LE_DYN_END   0x00ff
+
 /* channel identifier */
 #define L2CAP_CID_SIGNALING    0x0001
 #define L2CAP_CID_CONN_LESS    0x0002
index 6816f0fa5693dc275e1e4997375f29b5d99f7b64..30a56ab2ccfb05d7bc9a527ebdc09da19b3d5f7b 100644 (file)
@@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
        return dst && !(dst->flags & DST_METADATA);
 }
 
+static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
+                                      const struct sk_buff *skb_b)
+{
+       const struct metadata_dst *a, *b;
+
+       if (!(skb_a->_skb_refdst | skb_b->_skb_refdst))
+               return 0;
+
+       a = (const struct metadata_dst *) skb_dst(skb_a);
+       b = (const struct metadata_dst *) skb_dst(skb_b);
+
+       if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len)
+               return 1;
+
+       return memcmp(&a->u.tun_info, &b->u.tun_info,
+                     sizeof(a->u.tun_info) + a->u.tun_info.options_len);
+}
+
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
index 481fe1c9044cfd8b49585139e24df16b0716debf..49dcad4fe99e0ad5de491ef0e0675a3b516aabca 100644 (file)
@@ -270,8 +270,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
                                            struct sock *newsk,
                                            const struct request_sock *req);
 
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
-                             struct sock *child);
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+                                     struct request_sock *req,
+                                     struct sock *child);
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
                                   unsigned long timeout);
 struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
index 877f682989b892fb1d70256bda6b33f03e34a0f5..295d291269e2c88ed4930041597a28f7c9a7a2f7 100644 (file)
@@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 
 void ip6_route_input(struct sk_buff *skb);
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-                                  struct flowi6 *fl6);
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+                                        struct flowi6 *fl6, int flags);
+
+static inline struct dst_entry *ip6_route_output(struct net *net,
+                                                const struct sock *sk,
+                                                struct flowi6 *fl6)
+{
+       return ip6_route_output_flags(net, sk, fl6, 0);
+}
+
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
                                   int flags);
 
index 7029527725ddbb80f7b78e66565b69ad8aad5806..4079fc18ffe4643522178b394ed6d1b54d5a6093 100644 (file)
@@ -61,6 +61,7 @@ struct fib_nh_exception {
        struct rtable __rcu             *fnhe_rth_input;
        struct rtable __rcu             *fnhe_rth_output;
        unsigned long                   fnhe_stamp;
+       struct rcu_head                 rcu;
 };
 
 struct fnhe_hash_bucket {
index 6db96ea0144f0445c4e52c51cddcc3d25d60ccdd..dda9abf6b89c14afddf71562c763bdd6bb7d85c3 100644 (file)
@@ -230,6 +230,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
                    u8 *protocol, struct flowi4 *fl4);
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
index 788ef58a66b9b78807d164bce85060cc760ee336..62e17d1319ff7423dbcf176815f04cecfcdf3371 100644 (file)
@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *proto);
 
-#ifdef CONFIG_LOCKDEP
-# define CONNTRACK_LOCKS 8
-#else
-# define CONNTRACK_LOCKS 1024
-#endif
+#define CONNTRACK_LOCKS 1024
+
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+void nf_conntrack_lock(spinlock_t *lock);
 
 extern spinlock_t nf_conntrack_expect_lock;
 
index 262532d111f51e3a91a06af785b4721e8fab9d56..59fa93c01d2a16a129298498f1d56556b895acd9 100644 (file)
@@ -21,6 +21,7 @@ struct scm_creds {
 struct scm_fp_list {
        short                   count;
        short                   max;
+       struct user_struct      *user;
        struct file             *fp[SCM_MAX_FD];
 };
 
index 20e72129be1ce0063eeafcbaadcee1f37e0c614c..205630bb5010b8ac76b84651b302e488fc1c76ff 100644 (file)
@@ -756,7 +756,6 @@ struct sctp_transport {
 
        /* Reference counting. */
        atomic_t refcnt;
-       __u32    dead:1,
                /* RTO-Pending : A flag used to track if one of the DATA
                 *              chunks sent to this address is currently being
                 *              used to compute a RTT. If this flag is 0,
@@ -766,7 +765,7 @@ struct sctp_transport {
                 *              calculation completes (i.e. the DATA chunk
                 *              is SACK'd) clear this flag.
                 */
-                rto_pending:1,
+       __u32   rto_pending:1,
 
                /*
                 * hb_sent : a flag that signals that we have a pending
@@ -955,7 +954,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_timers(struct sctp_transport *);
-void sctp_transport_hold(struct sctp_transport *);
+int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
index b9e7b3d863a09db498e00004da4da8a781e90b9f..f5ea148853e2f76257aca80a4aef65a30d587538 100644 (file)
@@ -1035,18 +1035,6 @@ struct proto {
        struct list_head        node;
 #ifdef SOCK_REFCNT_DEBUG
        atomic_t                socks;
-#endif
-#ifdef CONFIG_MEMCG_KMEM
-       /*
-        * cgroup specific init/deinit functions. Called once for all
-        * protocols that implement it, from cgroups populate function.
-        * This function has to setup any files the protocol want to
-        * appear in the kmem cgroup filesystem.
-        */
-       int                     (*init_cgroup)(struct mem_cgroup *memcg,
-                                              struct cgroup_subsys *ss);
-       void                    (*destroy_cgroup)(struct mem_cgroup *memcg);
-       struct cg_proto         *(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
        int                     (*diag_destroy)(struct sock *sk, int err);
 };
index 7dda3d7adba8e905b3d14db7f3361d9a96493b46..aecd30308d500b53a1b46902f2b3d254bcbc39b6 100644 (file)
@@ -16,7 +16,7 @@ struct sock_reuseport {
 };
 
 extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
                                          u32 hash,
index 8ea19977ea530bffb57bebed734b977c95b603c5..ae6468f5c9f373d63d987fb114deba1064bc16d5 100644 (file)
@@ -216,7 +216,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* TCP thin-stream limits */
 #define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 
-/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
+/* TCP initial congestion window as per rfc6928 */
 #define TCP_INIT_CWND          10
 
 /* Bit Flags for sysctl_tcp_fastopen */
@@ -447,7 +447,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 void tcp_v4_mtu_reduced(struct sock *sk);
-void tcp_req_err(struct sock *sk, u32 seq);
+void tcp_req_err(struct sock *sk, u32 seq, bool abort);
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(const struct sock *sk,
                                      struct request_sock *req,
index 11528591d0d714f3ea1004566cb799952a9ca5bd..c34c9002460c567a8e7e3e8cd9cb6acb751c97d8 100644 (file)
@@ -83,6 +83,8 @@ struct rdma_dev_addr {
        int bound_dev_if;
        enum rdma_transport_type transport;
        struct net *net;
+       enum rdma_network_type network;
+       int hoplimit;
 };
 
 /**
@@ -91,8 +93,8 @@ struct rdma_dev_addr {
  *
  * The dev_addr->net field must be initialized.
  */
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
-                     u16 *vlan_id);
+int rdma_translate_ip(const struct sockaddr *addr,
+                     struct rdma_dev_addr *dev_addr, u16 *vlan_id);
 
 /**
  * rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -117,6 +119,10 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
                                     struct rdma_dev_addr *addr, void *context),
                    void *context);
 
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+                         const struct sockaddr *dst_addr,
+                         struct rdma_dev_addr *addr);
+
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
@@ -125,8 +131,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 int rdma_addr_size(struct sockaddr *addr);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *smac, u16 *vlan_id, int if_index);
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+                                const union ib_gid *dgid,
+                                u8 *smac, u16 *vlan_id, int *if_index,
+                                int *hoplimit);
 
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 {
index 269a27cf0a46f37c7fd9fdc3fc24df2d5070c2d2..e30f19bd4a41ef6900cda863c99f3b238a30cd47 100644 (file)
@@ -60,6 +60,7 @@ int ib_get_cached_gid(struct ib_device    *device,
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
  * @ndev: In RoCE, the net device of the device. NULL means ignore.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the cached GID table where the GID was found.  This
@@ -70,6 +71,7 @@ int ib_get_cached_gid(struct ib_device    *device,
  */
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
+                      enum ib_gid_type gid_type,
                       struct net_device *ndev,
                       u8               *port_num,
                       u16              *index);
@@ -79,6 +81,7 @@ int ib_find_cached_gid(struct ib_device *device,
  * GID value occurs
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
  * @port_num: The port number of the device where the GID value sould be
  *   searched.
  * @ndev: In RoCE, the net device of the device. Null means ignore.
@@ -90,6 +93,7 @@ int ib_find_cached_gid(struct ib_device *device,
  */
 int ib_find_cached_gid_by_port(struct ib_device *device,
                               const union ib_gid *gid,
+                              enum ib_gid_type gid_type,
                               u8               port_num,
                               struct net_device *ndev,
                               u16              *index);
index ec9b44dd3d806b20d80920a0cb37487ea2d12c24..0ff049bd9ad413c47b92aaaf116a43d1738e4b32 100644 (file)
@@ -438,6 +438,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
 /**
  * ib_mad_recv_handler - callback handler for a received MAD.
  * @mad_agent: MAD agent requesting the received MAD.
+ * @send_buf: Send buffer if found, else NULL
  * @mad_recv_wc: Received work completion information on the received MAD.
  *
  * MADs received in response to a send request operation will be handed to
@@ -447,6 +448,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
  * modify the data referenced by @mad_recv_wc.
  */
 typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+                                   struct ib_mad_send_buf *send_buf,
                                    struct ib_mad_recv_wc *mad_recv_wc);
 
 /**
index e99d8f9a4551d9889501c645e04139fd0f32a3f8..0f3daae44bf9bf5b440c9d15d6a4d90544d29b88 100644 (file)
@@ -41,6 +41,8 @@ enum {
        IB_ETH_BYTES  = 14,
        IB_VLAN_BYTES = 4,
        IB_GRH_BYTES  = 40,
+       IB_IP4_BYTES  = 20,
+       IB_UDP_BYTES  = 8,
        IB_BTH_BYTES  = 12,
        IB_DETH_BYTES = 8
 };
@@ -223,6 +225,27 @@ struct ib_unpacked_eth {
        __be16  type;
 };
 
+struct ib_unpacked_ip4 {
+       u8      ver;
+       u8      hdr_len;
+       u8      tos;
+       __be16  tot_len;
+       __be16  id;
+       __be16  frag_off;
+       u8      ttl;
+       u8      protocol;
+       __sum16 check;
+       __be32  saddr;
+       __be32  daddr;
+};
+
+struct ib_unpacked_udp {
+       __be16  sport;
+       __be16  dport;
+       __be16  length;
+       __be16  csum;
+};
+
 struct ib_unpacked_vlan {
        __be16  tag;
        __be16  type;
@@ -237,6 +260,10 @@ struct ib_ud_header {
        struct ib_unpacked_vlan vlan;
        int                     grh_present;
        struct ib_unpacked_grh  grh;
+       int                     ipv4_present;
+       struct ib_unpacked_ip4  ip4;
+       int                     udp_present;
+       struct ib_unpacked_udp  udp;
        struct ib_unpacked_bth  bth;
        struct ib_unpacked_deth deth;
        int                     immediate_present;
@@ -253,13 +280,17 @@ void ib_unpack(const struct ib_field        *desc,
               void                         *buf,
               void                         *structure);
 
-void ib_ud_header_init(int                 payload_bytes,
-                      int                  lrh_present,
-                      int                  eth_present,
-                      int                  vlan_present,
-                      int                  grh_present,
-                      int                  immediate_present,
-                      struct ib_ud_header *header);
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header);
+
+int ib_ud_header_init(int                  payload_bytes,
+                     int                   lrh_present,
+                     int                   eth_present,
+                     int                   vlan_present,
+                     int                   grh_present,
+                     int                   ip_version,
+                     int                   udp_present,
+                     int                   immediate_present,
+                     struct ib_ud_header *header);
 
 int ib_ud_header_pack(struct ib_ud_header *header,
                      void                *buf);
index a5889f18807b62f6f2d46f01a4650b5876647051..2f8a65c1fca7b77bc03c20e3b886036448645ad1 100644 (file)
@@ -42,6 +42,7 @@
  */
 #define IB_PMA_CLASS_CAP_ALLPORTSELECT  cpu_to_be16(1 << 8)
 #define IB_PMA_CLASS_CAP_EXT_WIDTH      cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10)
 #define IB_PMA_CLASS_CAP_XMIT_WAIT      cpu_to_be16(1 << 12)
 
 #define IB_PMA_CLASS_PORT_INFO          cpu_to_be16(0x0001)
index 301969552d0a51e34dcd872daa303a6339721916..cdc1c81aa275bda38f97f1e71c3c774dfbc8c983 100644 (file)
@@ -160,6 +160,7 @@ struct ib_sa_path_rec {
        int          ifindex;
        /* ignored in IB */
        struct net  *net;
+       enum ib_gid_type gid_type;
 };
 
 static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
@@ -402,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
  */
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr);
 
 /**
index 120da1d7f57eb578c327507ad2affa5b89488dbb..284b00c8fea4a4f67943aa7979d1b51728e5732e 100644 (file)
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
 #include <linux/socket.h>
+#include <linux/irq_poll.h>
 #include <uapi/linux/if_ether.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <linux/string.h>
+#include <linux/slab.h>
 
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 
 extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_comp_wq;
 
 union ib_gid {
        u8      raw[16];
@@ -67,7 +73,17 @@ union ib_gid {
 
 extern union ib_gid zgid;
 
+enum ib_gid_type {
+       /* If link layer is Ethernet, this is RoCE V1 */
+       IB_GID_TYPE_IB        = 0,
+       IB_GID_TYPE_ROCE      = 0,
+       IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+       IB_GID_TYPE_SIZE
+};
+
+#define ROCE_V2_UDP_DPORT      4791
 struct ib_gid_attr {
+       enum ib_gid_type        gid_type;
        struct net_device       *ndev;
 };
 
@@ -98,6 +114,35 @@ enum rdma_protocol_type {
 __attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type);
 
+enum rdma_network_type {
+       RDMA_NETWORK_IB,
+       RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+       RDMA_NETWORK_IPV4,
+       RDMA_NETWORK_IPV6
+};
+
+static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
+{
+       if (network_type == RDMA_NETWORK_IPV4 ||
+           network_type == RDMA_NETWORK_IPV6)
+               return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+       /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
+       return IB_GID_TYPE_IB;
+}
+
+static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
+                                                           union ib_gid *gid)
+{
+       if (gid_type == IB_GID_TYPE_IB)
+               return RDMA_NETWORK_IB;
+
+       if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+               return RDMA_NETWORK_IPV4;
+       else
+               return RDMA_NETWORK_IPV6;
+}
+
 enum rdma_link_layer {
        IB_LINK_LAYER_UNSPECIFIED,
        IB_LINK_LAYER_INFINIBAND,
@@ -105,24 +150,32 @@ enum rdma_link_layer {
 };
 
 enum ib_device_cap_flags {
-       IB_DEVICE_RESIZE_MAX_WR         = 1,
-       IB_DEVICE_BAD_PKEY_CNTR         = (1<<1),
-       IB_DEVICE_BAD_QKEY_CNTR         = (1<<2),
-       IB_DEVICE_RAW_MULTI             = (1<<3),
-       IB_DEVICE_AUTO_PATH_MIG         = (1<<4),
-       IB_DEVICE_CHANGE_PHY_PORT       = (1<<5),
-       IB_DEVICE_UD_AV_PORT_ENFORCE    = (1<<6),
-       IB_DEVICE_CURR_QP_STATE_MOD     = (1<<7),
-       IB_DEVICE_SHUTDOWN_PORT         = (1<<8),
-       IB_DEVICE_INIT_TYPE             = (1<<9),
-       IB_DEVICE_PORT_ACTIVE_EVENT     = (1<<10),
-       IB_DEVICE_SYS_IMAGE_GUID        = (1<<11),
-       IB_DEVICE_RC_RNR_NAK_GEN        = (1<<12),
-       IB_DEVICE_SRQ_RESIZE            = (1<<13),
-       IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
-       IB_DEVICE_LOCAL_DMA_LKEY        = (1<<15),
-       IB_DEVICE_RESERVED              = (1<<16), /* old SEND_W_INV */
-       IB_DEVICE_MEM_WINDOW            = (1<<17),
+       IB_DEVICE_RESIZE_MAX_WR                 = (1 << 0),
+       IB_DEVICE_BAD_PKEY_CNTR                 = (1 << 1),
+       IB_DEVICE_BAD_QKEY_CNTR                 = (1 << 2),
+       IB_DEVICE_RAW_MULTI                     = (1 << 3),
+       IB_DEVICE_AUTO_PATH_MIG                 = (1 << 4),
+       IB_DEVICE_CHANGE_PHY_PORT               = (1 << 5),
+       IB_DEVICE_UD_AV_PORT_ENFORCE            = (1 << 6),
+       IB_DEVICE_CURR_QP_STATE_MOD             = (1 << 7),
+       IB_DEVICE_SHUTDOWN_PORT                 = (1 << 8),
+       IB_DEVICE_INIT_TYPE                     = (1 << 9),
+       IB_DEVICE_PORT_ACTIVE_EVENT             = (1 << 10),
+       IB_DEVICE_SYS_IMAGE_GUID                = (1 << 11),
+       IB_DEVICE_RC_RNR_NAK_GEN                = (1 << 12),
+       IB_DEVICE_SRQ_RESIZE                    = (1 << 13),
+       IB_DEVICE_N_NOTIFY_CQ                   = (1 << 14),
+
+       /*
+        * This device supports a per-device lkey or stag that can be
+        * used without performing a memory registration for the local
+        * memory.  Note that ULPs should never check this flag, but
+        * instead of use the local_dma_lkey flag in the ib_pd structure,
+        * which will always contain a usable lkey.
+        */
+       IB_DEVICE_LOCAL_DMA_LKEY                = (1 << 15),
+       IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16),
+       IB_DEVICE_MEM_WINDOW                    = (1 << 17),
        /*
         * Devices should set IB_DEVICE_UD_IP_SUM if they support
         * insertion of UDP and TCP checksum on outgoing UD IPoIB
@@ -130,18 +183,35 @@ enum ib_device_cap_flags {
         * incoming messages.  Setting this flag implies that the
         * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
         */
-       IB_DEVICE_UD_IP_CSUM            = (1<<18),
-       IB_DEVICE_UD_TSO                = (1<<19),
-       IB_DEVICE_XRC                   = (1<<20),
-       IB_DEVICE_MEM_MGT_EXTENSIONS    = (1<<21),
-       IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
-       IB_DEVICE_MEM_WINDOW_TYPE_2A    = (1<<23),
-       IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24),
-       IB_DEVICE_RC_IP_CSUM            = (1<<25),
-       IB_DEVICE_RAW_IP_CSUM           = (1<<26),
-       IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
-       IB_DEVICE_SIGNATURE_HANDOVER    = (1<<30),
-       IB_DEVICE_ON_DEMAND_PAGING      = (1<<31),
+       IB_DEVICE_UD_IP_CSUM                    = (1 << 18),
+       IB_DEVICE_UD_TSO                        = (1 << 19),
+       IB_DEVICE_XRC                           = (1 << 20),
+
+       /*
+        * This device supports the IB "base memory management extension",
+        * which includes support for fast registrations (IB_WR_REG_MR,
+        * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs).  This flag should
+        * also be set by any iWarp device which must support FRs to comply
+        * to the iWarp verbs spec.  iWarp devices also support the
+        * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
+        * stag.
+        */
+       IB_DEVICE_MEM_MGT_EXTENSIONS            = (1 << 21),
+       IB_DEVICE_BLOCK_MULTICAST_LOOPBACK      = (1 << 22),
+       IB_DEVICE_MEM_WINDOW_TYPE_2A            = (1 << 23),
+       IB_DEVICE_MEM_WINDOW_TYPE_2B            = (1 << 24),
+       IB_DEVICE_RC_IP_CSUM                    = (1 << 25),
+       IB_DEVICE_RAW_IP_CSUM                   = (1 << 26),
+       /*
+        * Devices should set IB_DEVICE_CROSS_CHANNEL if they
+        * support execution of WQEs that involve synchronization
+        * of I/O operations with single completion queue managed
+        * by hardware.
+        */
+       IB_DEVICE_CROSS_CHANNEL         = (1 << 27),
+       IB_DEVICE_MANAGED_FLOW_STEERING         = (1 << 29),
+       IB_DEVICE_SIGNATURE_HANDOVER            = (1 << 30),
+       IB_DEVICE_ON_DEMAND_PAGING              = (1 << 31),
 };
 
 enum ib_signature_prot_cap {
@@ -184,6 +254,7 @@ struct ib_odp_caps {
 
 enum ib_cq_creation_flags {
        IB_CQ_FLAGS_TIMESTAMP_COMPLETION   = 1 << 0,
+       IB_CQ_FLAGS_IGNORE_OVERRUN         = 1 << 1,
 };
 
 struct ib_cq_init_attr {
@@ -393,6 +464,7 @@ union rdma_protocol_stats {
 #define RDMA_CORE_CAP_PROT_IB           0x00100000
 #define RDMA_CORE_CAP_PROT_ROCE         0x00200000
 #define RDMA_CORE_CAP_PROT_IWARP        0x00400000
+#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
 
 #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
                                        | RDMA_CORE_CAP_IB_MAD \
@@ -405,6 +477,12 @@ union rdma_protocol_stats {
                                        | RDMA_CORE_CAP_IB_CM   \
                                        | RDMA_CORE_CAP_AF_IB   \
                                        | RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP                      \
+                                       (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
+                                       | RDMA_CORE_CAP_IB_MAD  \
+                                       | RDMA_CORE_CAP_IB_CM   \
+                                       | RDMA_CORE_CAP_AF_IB   \
+                                       | RDMA_CORE_CAP_ETH_AH)
 #define RDMA_CORE_PORT_IWARP           (RDMA_CORE_CAP_PROT_IWARP \
                                        | RDMA_CORE_CAP_IW_CM)
 #define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
@@ -519,6 +597,17 @@ struct ib_grh {
        union ib_gid    dgid;
 };
 
+union rdma_network_hdr {
+       struct ib_grh ibgrh;
+       struct {
+               /* The IB spec states that if it's IPv4, the header
+                * is located in the last 20 bytes of the header.
+                */
+               u8              reserved[20];
+               struct iphdr    roce4grh;
+       };
+};
+
 enum {
        IB_MULTICAST_QPN = 0xffffff
 };
@@ -734,7 +823,6 @@ enum ib_wc_opcode {
        IB_WC_RDMA_READ,
        IB_WC_COMP_SWAP,
        IB_WC_FETCH_ADD,
-       IB_WC_BIND_MW,
        IB_WC_LSO,
        IB_WC_LOCAL_INV,
        IB_WC_REG_MR,
@@ -755,10 +843,14 @@ enum ib_wc_flags {
        IB_WC_IP_CSUM_OK        = (1<<3),
        IB_WC_WITH_SMAC         = (1<<4),
        IB_WC_WITH_VLAN         = (1<<5),
+       IB_WC_WITH_NETWORK_HDR_TYPE     = (1<<6),
 };
 
 struct ib_wc {
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        enum ib_wc_status       status;
        enum ib_wc_opcode       opcode;
        u32                     vendor_err;
@@ -777,6 +869,7 @@ struct ib_wc {
        u8                      port_num;       /* valid only for DR SMPs on switches */
        u8                      smac[ETH_ALEN];
        u16                     vlan_id;
+       u8                      network_hdr_type;
 };
 
 enum ib_cq_notify_flags {
@@ -866,6 +959,9 @@ enum ib_qp_type {
 enum ib_qp_create_flags {
        IB_QP_CREATE_IPOIB_UD_LSO               = 1 << 0,
        IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
+       IB_QP_CREATE_CROSS_CHANNEL              = 1 << 2,
+       IB_QP_CREATE_MANAGED_SEND               = 1 << 3,
+       IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
        IB_QP_CREATE_NETIF_QP                   = 1 << 5,
        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
        IB_QP_CREATE_USE_GFP_NOIO               = 1 << 7,
@@ -1027,7 +1123,6 @@ enum ib_wr_opcode {
        IB_WR_REG_MR,
        IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
        IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
-       IB_WR_BIND_MW,
        IB_WR_REG_SIG_MR,
        /* reserve values for low level drivers' internal use.
         * These values will not be used at all in the ib core layer.
@@ -1062,26 +1157,16 @@ struct ib_sge {
        u32     lkey;
 };
 
-/**
- * struct ib_mw_bind_info - Parameters for a memory window bind operation.
- * @mr: A memory region to bind the memory window to.
- * @addr: The address where the memory window should begin.
- * @length: The length of the memory window, in bytes.
- * @mw_access_flags: Access flags from enum ib_access_flags for the window.
- *
- * This struct contains the shared parameters for type 1 and type 2
- * memory window bind operations.
- */
-struct ib_mw_bind_info {
-       struct ib_mr   *mr;
-       u64             addr;
-       u64             length;
-       int             mw_access_flags;
+struct ib_cqe {
+       void (*done)(struct ib_cq *cq, struct ib_wc *wc);
 };
 
 struct ib_send_wr {
        struct ib_send_wr      *next;
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        struct ib_sge          *sg_list;
        int                     num_sge;
        enum ib_wr_opcode       opcode;
@@ -1147,19 +1232,6 @@ static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
        return container_of(wr, struct ib_reg_wr, wr);
 }
 
-struct ib_bind_mw_wr {
-       struct ib_send_wr       wr;
-       struct ib_mw            *mw;
-       /* The new rkey for the memory window. */
-       u32                     rkey;
-       struct ib_mw_bind_info  bind_info;
-};
-
-static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
-{
-       return container_of(wr, struct ib_bind_mw_wr, wr);
-}
-
 struct ib_sig_handover_wr {
        struct ib_send_wr       wr;
        struct ib_sig_attrs    *sig_attrs;
@@ -1175,7 +1247,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
 
 struct ib_recv_wr {
        struct ib_recv_wr      *next;
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        struct ib_sge          *sg_list;
        int                     num_sge;
 };
@@ -1190,20 +1265,10 @@ enum ib_access_flags {
        IB_ACCESS_ON_DEMAND     = (1<<6),
 };
 
-struct ib_phys_buf {
-       u64      addr;
-       u64      size;
-};
-
-struct ib_mr_attr {
-       struct ib_pd    *pd;
-       u64             device_virt_addr;
-       u64             size;
-       int             mr_access_flags;
-       u32             lkey;
-       u32             rkey;
-};
-
+/*
+ * XXX: these are apparently used for ->rereg_user_mr, no idea why they
+ * are hidden here instead of a uapi header!
+ */
 enum ib_mr_rereg_flags {
        IB_MR_REREG_TRANS       = 1,
        IB_MR_REREG_PD          = (1<<1),
@@ -1211,18 +1276,6 @@ enum ib_mr_rereg_flags {
        IB_MR_REREG_SUPPORTED   = ((IB_MR_REREG_ACCESS << 1) - 1)
 };
 
-/**
- * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
- * @wr_id:      Work request id.
- * @send_flags: Flags from ib_send_flags enum.
- * @bind_info:  More parameters of the bind operation.
- */
-struct ib_mw_bind {
-       u64                    wr_id;
-       int                    send_flags;
-       struct ib_mw_bind_info bind_info;
-};
-
 struct ib_fmr_attr {
        int     max_pages;
        int     max_maps;
@@ -1307,6 +1360,12 @@ struct ib_ah {
 
 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
 
+enum ib_poll_context {
+       IB_POLL_DIRECT,         /* caller context, no hw completions */
+       IB_POLL_SOFTIRQ,        /* poll from softirq context */
+       IB_POLL_WORKQUEUE,      /* poll from workqueue */
+};
+
 struct ib_cq {
        struct ib_device       *device;
        struct ib_uobject      *uobject;
@@ -1315,6 +1374,12 @@ struct ib_cq {
        void                   *cq_context;
        int                     cqe;
        atomic_t                usecnt; /* count number of work queues */
+       enum ib_poll_context    poll_ctx;
+       struct ib_wc            *wc;
+       union {
+               struct irq_poll         iop;
+               struct work_struct      work;
+       };
 };
 
 struct ib_srq {
@@ -1363,7 +1428,6 @@ struct ib_mr {
        u64                iova;
        u32                length;
        unsigned int       page_size;
-       atomic_t           usecnt; /* count number of MWs */
 };
 
 struct ib_mw {
@@ -1724,11 +1788,6 @@ struct ib_device {
                                                      int wc_cnt);
        struct ib_mr *             (*get_dma_mr)(struct ib_pd *pd,
                                                 int mr_access_flags);
-       struct ib_mr *             (*reg_phys_mr)(struct ib_pd *pd,
-                                                 struct ib_phys_buf *phys_buf_array,
-                                                 int num_phys_buf,
-                                                 int mr_access_flags,
-                                                 u64 *iova_start);
        struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd,
                                                  u64 start, u64 length,
                                                  u64 virt_addr,
@@ -1741,8 +1800,6 @@ struct ib_device {
                                                    int mr_access_flags,
                                                    struct ib_pd *pd,
                                                    struct ib_udata *udata);
-       int                        (*query_mr)(struct ib_mr *mr,
-                                              struct ib_mr_attr *mr_attr);
        int                        (*dereg_mr)(struct ib_mr *mr);
        struct ib_mr *             (*alloc_mr)(struct ib_pd *pd,
                                               enum ib_mr_type mr_type,
@@ -1750,18 +1807,8 @@ struct ib_device {
        int                        (*map_mr_sg)(struct ib_mr *mr,
                                                struct scatterlist *sg,
                                                int sg_nents);
-       int                        (*rereg_phys_mr)(struct ib_mr *mr,
-                                                   int mr_rereg_mask,
-                                                   struct ib_pd *pd,
-                                                   struct ib_phys_buf *phys_buf_array,
-                                                   int num_phys_buf,
-                                                   int mr_access_flags,
-                                                   u64 *iova_start);
        struct ib_mw *             (*alloc_mw)(struct ib_pd *pd,
                                               enum ib_mw_type type);
-       int                        (*bind_mw)(struct ib_qp *qp,
-                                             struct ib_mw *mw,
-                                             struct ib_mw_bind *mw_bind);
        int                        (*dealloc_mw)(struct ib_mw *mw);
        struct ib_fmr *            (*alloc_fmr)(struct ib_pd *pd,
                                                int mr_access_flags,
@@ -1823,6 +1870,7 @@ struct ib_device {
        u16                          is_switch:1;
        u8                           node_type;
        u8                           phys_port_cnt;
+       struct ib_device_attr        attrs;
 
        /**
         * The following mandatory functions are used only at device
@@ -1888,6 +1936,31 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
        return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
 }
 
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+                                      size_t offset,
+                                      size_t len)
+{
+       const void __user *p = udata->inbuf + offset;
+       bool ret = false;
+       u8 *buf;
+
+       if (len > USHRT_MAX)
+               return false;
+
+       buf = kmalloc(len, GFP_KERNEL);
+       if (!buf)
+               return false;
+
+       if (copy_from_user(buf, p, len))
+               goto free;
+
+       ret = !memchr_inv(buf, 0, len);
+
+free:
+       kfree(buf);
+       return ret;
+}
+
 /**
  * ib_modify_qp_is_ok - Check that the supplied attribute mask
  * contains all required attributes and no attributes not allowed for
@@ -1912,9 +1985,6 @@ int ib_register_event_handler  (struct ib_event_handler *event_handler);
 int ib_unregister_event_handler(struct ib_event_handler *event_handler);
 void ib_dispatch_event(struct ib_event *event);
 
-int ib_query_device(struct ib_device *device,
-                   struct ib_device_attr *device_attr);
-
 int ib_query_port(struct ib_device *device,
                  u8 port_num, struct ib_port_attr *port_attr);
 
@@ -1967,6 +2037,17 @@ static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
 }
 
 static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags &
+               (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+}
+
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
 {
        return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
 }
@@ -1978,8 +2059,8 @@ static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_n
 
 static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
 {
-       return device->port_immutable[port_num].core_cap_flags &
-               (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE);
+       return rdma_protocol_ib(device, port_num) ||
+               rdma_protocol_roce(device, port_num);
 }
 
 /**
@@ -2220,7 +2301,8 @@ int ib_modify_port(struct ib_device *device,
                   struct ib_port_modify *port_modify);
 
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               struct net_device *ndev, u8 *port_num, u16 *index);
+               enum ib_gid_type gid_type, struct net_device *ndev,
+               u8 *port_num, u16 *index);
 
 int ib_find_pkey(struct ib_device *device,
                 u8 port_num, u16 pkey, u16 *index);
@@ -2454,6 +2536,11 @@ static inline int ib_post_recv(struct ib_qp *qp,
        return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
 }
 
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+               int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
+void ib_free_cq(struct ib_cq *cq);
+int ib_process_cq_direct(struct ib_cq *cq, int budget);
+
 /**
  * ib_create_cq - Creates a CQ on the specified device.
  * @device: The device on which to create the CQ.
@@ -2838,13 +2925,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
                dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
 }
 
-/**
- * ib_query_mr - Retrieves information about a specific memory region.
- * @mr: The memory region to retrieve information about.
- * @mr_attr: The attributes of the specified memory region.
- */
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
 /**
  * ib_dereg_mr - Deregisters a memory region and removes it from the
  *   HCA translation table.
@@ -2881,42 +2961,6 @@ static inline u32 ib_inc_rkey(u32 rkey)
        return ((rkey + 1) & mask) | (rkey & ~mask);
 }
 
-/**
- * ib_alloc_mw - Allocates a memory window.
- * @pd: The protection domain associated with the memory window.
- * @type: The type of the memory window (1 or 2).
- */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-/**
- * ib_bind_mw - Posts a work request to the send queue of the specified
- *   QP, which binds the memory window to the given address range and
- *   remote access attributes.
- * @qp: QP to post the bind work request on.
- * @mw: The memory window to bind.
- * @mw_bind: Specifies information about the memory window, including
- *   its address range, remote access rights, and associated memory region.
- *
- * If there is no immediate error, the function will update the rkey member
- * of the mw parameter to its new value. The bind operation can still fail
- * asynchronously.
- */
-static inline int ib_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind)
-{
-       /* XXX reference counting in corresponding MR? */
-       return mw->device->bind_mw ?
-               mw->device->bind_mw(qp, mw, mw_bind) :
-               -ENOSYS;
-}
-
-/**
- * ib_dealloc_mw - Deallocates a memory window.
- * @mw: The memory window to deallocate.
- */
-int ib_dealloc_mw(struct ib_mw *mw);
-
 /**
  * ib_alloc_fmr - Allocates a unmapped fast memory region.
  * @pd: The protection domain associated with the unmapped region.
diff --git a/include/scsi/iser.h b/include/scsi/iser.h
new file mode 100644 (file)
index 0000000..2e678fa
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *     - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ISCSI_ISER_H
+#define ISCSI_ISER_H
+
+#define ISER_ZBVA_NOT_SUP              0x80
+#define ISER_SEND_W_INV_NOT_SUP                0x40
+#define ISERT_ZBVA_NOT_USED            0x80
+#define ISERT_SEND_W_INV_NOT_USED      0x40
+
+#define ISCSI_CTRL     0x10
+#define ISER_HELLO     0x20
+#define ISER_HELLORPLY 0x30
+
+#define ISER_VER       0x10
+#define ISER_WSV       0x08
+#define ISER_RSV       0x04
+
+/**
+ * struct iser_cm_hdr - iSER CM header (from iSER Annex A12)
+ *
+ * @flags:        flags support (zbva, send_w_inv)
+ * @rsvd:         reserved
+ */
+struct iser_cm_hdr {
+       u8      flags;
+       u8      rsvd[3];
+} __packed;
+
+/**
+ * struct iser_ctrl - iSER header of iSCSI control PDU
+ *
+ * @flags:        opcode and read/write valid bits
+ * @rsvd:         reserved
+ * @write_stag:   write rkey
+ * @write_va:     write virtual address
+ * @reaf_stag:    read rkey
+ * @read_va:      read virtual address
+ */
+struct iser_ctrl {
+       u8      flags;
+       u8      rsvd[3];
+       __be32  write_stag;
+       __be64  write_va;
+       __be32  read_stag;
+       __be64  read_va;
+} __packed;
+
+#endif /* ISCSI_ISER_H */
index e2b712c90d3f22d4bcbb75ded3a296304d23d7c6..c21c38ce74501dbdac327e9cac8b0b6d051a42b0 100644 (file)
@@ -343,7 +343,7 @@ void snd_hdac_bus_enter_link_reset(struct hdac_bus *bus);
 void snd_hdac_bus_exit_link_reset(struct hdac_bus *bus);
 
 void snd_hdac_bus_update_rirb(struct hdac_bus *bus);
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
                                    void (*ack)(struct hdac_bus *,
                                                struct hdac_stream *));
 
index fdabbb4ddba92f801e2952fafaeb997c7fa03a33..f730b91e472f19f97b15ec8982443fe099dd5db4 100644 (file)
@@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
 int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
                         unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+                             unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream,
+                              int count);
 
 /* main midi functions */
 
index 7990469a44ce3df7909bdd447cd743019f900f37..c4d76ff056c6efd3431a4ed7aa3c6f6888c69e80 100644 (file)
@@ -104,6 +104,7 @@ struct snd_timer_instance {
                           int event,
                           struct timespec * tstamp,
                           unsigned long resolution);
+       void (*disconnect)(struct snd_timer_instance *timeri);
        void *callback_data;
        unsigned long ticks;            /* auto-load ticks when expired */
        unsigned long cticks;           /* current ticks */
index 56cf8e485ef22101ac22b1120704664bf599eaef..28ee5c2e6bcd7b08abeda34669cc2da17f4dcf9a 100644 (file)
@@ -94,5 +94,8 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
        sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
 bool target_sense_desc_format(struct se_device *dev);
+sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
+bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+                                      struct request_queue *q, int block_size);
 
 #endif /* TARGET_CORE_BACKEND_H */
index 5d82816cc4e3dbb34068748d95dd86b3bbb5a537..e8c8c08bf575f4de6436d8c090b15f77b6717f2a 100644 (file)
@@ -140,6 +140,8 @@ enum se_cmd_flags_table {
        SCF_COMPARE_AND_WRITE           = 0x00080000,
        SCF_COMPARE_AND_WRITE_POST      = 0x00100000,
        SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000,
+       SCF_ACK_KREF                    = 0x00400000,
+       SCF_USE_CPUID                   = 0x00800000,
 };
 
 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
@@ -187,6 +189,7 @@ enum target_sc_flags_table {
        TARGET_SCF_BIDI_OP              = 0x01,
        TARGET_SCF_ACK_KREF             = 0x02,
        TARGET_SCF_UNKNOWN_SIZE         = 0x04,
+       TARGET_SCF_USE_CPUID    = 0x08,
 };
 
 /* fabric independent task management function values */
@@ -490,8 +493,9 @@ struct se_cmd {
 #define CMD_T_SENT             (1 << 4)
 #define CMD_T_STOP             (1 << 5)
 #define CMD_T_DEV_ACTIVE       (1 << 7)
-#define CMD_T_REQUEST_STOP     (1 << 8)
 #define CMD_T_BUSY             (1 << 9)
+#define CMD_T_TAS              (1 << 10)
+#define CMD_T_FABRIC_STOP      (1 << 11)
        spinlock_t              t_state_lock;
        struct kref             cmd_kref;
        struct completion       t_transport_stop_comp;
@@ -511,9 +515,6 @@ struct se_cmd {
 
        struct list_head        state_list;
 
-       /* old task stop completion, consider merging with some of the above */
-       struct completion       task_stop_comp;
-
        /* backend private data */
        void                    *priv;
 
index 594b4b29a2241ba0493e22e09b0b66ea2d6c3cc4..4e4b2fa786097c3638e78a6df76995e598a4cecf 100644 (file)
@@ -43,7 +43,7 @@ struct extent_status;
        { EXT4_GET_BLOCKS_METADATA_NOFAIL,      "METADATA_NOFAIL" },    \
        { EXT4_GET_BLOCKS_NO_NORMALIZE,         "NO_NORMALIZE" },       \
        { EXT4_GET_BLOCKS_KEEP_SIZE,            "KEEP_SIZE" },          \
-       { EXT4_GET_BLOCKS_NO_LOCK,              "NO_LOCK" })
+       { EXT4_GET_BLOCKS_ZERO,                 "ZERO" })
 
 #define show_mflags(flags) __print_flags(flags, "",    \
        { EXT4_MAP_NEW,         "N" },                  \
index 98feb1b82896504bfc0a40157c84a22ca137e259..d6dfa05ba322c9465d3027c0c992a97eff6a504e 100644 (file)
@@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on,
 
        TP_STRUCT__entry(
                __string(driver, fence->ops->get_driver_name(fence))
-               __string(timeline, fence->ops->get_driver_name(fence))
+               __string(timeline, fence->ops->get_timeline_name(fence))
                __field(unsigned int, context)
                __field(unsigned int, seqno)
 
index 0f803d2783e3834194cb3f5cd3dd4da5c017304c..47c6212d8f3cecb07d62cb9ea81f6ea65ae0c35f 100644 (file)
@@ -46,10 +46,10 @@ SCAN_STATUS
 
 TRACE_EVENT(mm_khugepaged_scan_pmd,
 
-       TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable,
+       TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
                 bool referenced, int none_or_zero, int status),
 
-       TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status),
+       TP_ARGS(mm, page, writable, referenced, none_or_zero, status),
 
        TP_STRUCT__entry(
                __field(struct mm_struct *, mm)
@@ -62,7 +62,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd,
 
        TP_fast_assign(
                __entry->mm = mm;
-               __entry->pfn = pfn;
+               __entry->pfn = page ? page_to_pfn(page) : -1;
                __entry->writable = writable;
                __entry->referenced = referenced;
                __entry->none_or_zero = none_or_zero;
@@ -104,10 +104,10 @@ TRACE_EVENT(mm_collapse_huge_page,
 
 TRACE_EVENT(mm_collapse_huge_page_isolate,
 
-       TP_PROTO(unsigned long pfn, int none_or_zero,
+       TP_PROTO(struct page *page, int none_or_zero,
                 bool referenced, bool  writable, int status),
 
-       TP_ARGS(pfn, none_or_zero, referenced, writable, status),
+       TP_ARGS(page, none_or_zero, referenced, writable, status),
 
        TP_STRUCT__entry(
                __field(unsigned long, pfn)
@@ -118,7 +118,7 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
        ),
 
        TP_fast_assign(
-               __entry->pfn = pfn;
+               __entry->pfn = page ? page_to_pfn(page) : -1;
                __entry->none_or_zero = none_or_zero;
                __entry->referenced = referenced;
                __entry->writable = writable;
index ff8f6c091a1504e3d18937244a07b6432c880fb2..f95f25e786ef0de423a3a036a1685ad53da496af 100644 (file)
@@ -15,7 +15,7 @@ struct softirq_action;
                         softirq_name(NET_TX)           \
                         softirq_name(NET_RX)           \
                         softirq_name(BLOCK)            \
-                        softirq_name(BLOCK_IOPOLL)     \
+                        softirq_name(IRQ_POLL)         \
                         softirq_name(TASKLET)          \
                         softirq_name(SCHED)            \
                         softirq_name(HRTIMER)          \
index 4cc989ad685164689072f90f1ef2c40f633c4088..f95e1c43c3fbc06fc2de769cd5360a5559d549b3 100644 (file)
@@ -48,6 +48,8 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 
index c2e5d6cb34e36ba2e409985c7d66c70347dbebc7..ebd10e6245984fb5c38b6ee13c3861b8e78ed040 100644 (file)
@@ -307,7 +307,7 @@ header-y += nfs_mount.h
 header-y += nl80211.h
 header-y += n_r3964.h
 header-y += nubus.h
-header-y += nvme.h
+header-y += nvme_ioctl.h
 header-y += nvram.h
 header-y += omap3isp.h
 header-y += omapfb.h
index b8a5b3b86ad63ef0825e9a9aea1784bcc68fe425..149bec83a907515dccea4425acf3b71d8d18410f 100644 (file)
@@ -2,8 +2,11 @@
 #define _UAPI_LINUX_FS_H
 
 /*
- * This file has definitions for some important file table
- * structures etc.
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
  */
 
 #include <linux/limits.h>
@@ -146,6 +149,37 @@ struct inodes_stat_t {
 #define MS_MGC_VAL 0xC0ED0000
 #define MS_MGC_MSK 0xffff0000
 
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       unsigned char   fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_DAX           0x00008000      /* use DAX for IO */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
+
 /* the read-only stuff doesn't really belong here, but any other place is
    probably as bad and I don't want to create yet another include file. */
 
@@ -188,7 +222,6 @@ struct inodes_stat_t {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
-#define BLKDAXSET _IO(0x12,128)
 #define BLKDAXGET _IO(0x12,129)
 
 #define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
@@ -210,9 +243,28 @@ struct inodes_stat_t {
 #define FS_IOC32_SETFLAGS              _IOW('f', 2, int)
 #define FS_IOC32_GETVERSION            _IOR('v', 1, int)
 #define FS_IOC32_SETVERSION            _IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR              _IOR ('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW ('X', 32, struct fsxattr)
 
 /*
  * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
  */
 #define        FS_SECRM_FL                     0x00000001 /* Secure deletion */
 #define        FS_UNRM_FL                      0x00000002 /* Undelete */
@@ -226,8 +278,8 @@ struct inodes_stat_t {
 #define FS_DIRTY_FL                    0x00000100
 #define FS_COMPRBLK_FL                 0x00000200 /* One or more compressed clusters */
 #define FS_NOCOMP_FL                   0x00000400 /* Don't compress */
-#define FS_ECOMPR_FL                   0x00000800 /* Compression error */
 /* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL                  0x00000800 /* Encrypted file */
 #define FS_BTREE_FL                    0x00001000 /* btree format dir */
 #define FS_INDEX_FL                    0x00001000 /* hash-indexed directory */
 #define FS_IMAGIC_FL                   0x00002000 /* AFS directory */
@@ -235,9 +287,12 @@ struct inodes_stat_t {
 #define FS_NOTAIL_FL                   0x00008000 /* file tail should not be merged */
 #define FS_DIRSYNC_FL                  0x00010000 /* dirsync behaviour (directories only) */
 #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL                        0x00040000 /* Reserved for ext4 */
 #define FS_EXTENT_FL                   0x00080000 /* Extents */
-#define FS_DIRECTIO_FL                 0x00100000 /* Use direct i/o */
+#define FS_EA_INODE_FL                 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL                        0x00400000 /* Reserved for ext4 */
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
 #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
 
index 928f98997d8a1811bfe31bfae00c3be12cbdce74..774a43128a7aa4c039576513e1e3ff7355d0fc0d 100644 (file)
@@ -33,6 +33,7 @@
 
 #define NVM_TTYPE_NAME_MAX 48
 #define NVM_TTYPE_MAX 63
+#define NVM_MMTYPE_LEN 8
 
 #define NVM_CTRL_FILE "/dev/lightnvm/control"
 
@@ -100,6 +101,26 @@ struct nvm_ioctl_remove {
        __u32 flags;
 };
 
+struct nvm_ioctl_dev_init {
+       char dev[DISK_NAME_LEN];                /* open-channel SSD device */
+       char mmtype[NVM_MMTYPE_LEN];            /* register to media manager */
+
+       __u32 flags;
+};
+
+enum {
+       NVM_FACTORY_ERASE_ONLY_USER     = 1 << 0, /* erase only blocks used as
+                                                  * host blks or grown blks */
+       NVM_FACTORY_RESET_HOST_BLKS     = 1 << 1, /* remove host blk marks */
+       NVM_FACTORY_RESET_GRWN_BBLKS    = 1 << 2, /* remove grown blk marks */
+       NVM_FACTORY_NR_BITS             = 1 << 3, /* stops here */
+};
+
+struct nvm_ioctl_dev_factory {
+       char dev[DISK_NAME_LEN];
+
+       __u32 flags;
+};
 
 /* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
 enum {
@@ -110,6 +131,12 @@ enum {
        /* device level cmds */
        NVM_DEV_CREATE_CMD,
        NVM_DEV_REMOVE_CMD,
+
+       /* Init a device to support LightNVM media managers */
+       NVM_DEV_INIT_CMD,
+
+       /* Factory reset device */
+       NVM_DEV_FACTORY_CMD,
 };
 
 #define NVM_IOCTL 'L' /* 0x4c */
@@ -122,6 +149,10 @@ enum {
                                                struct nvm_ioctl_create)
 #define NVM_DEV_REMOVE         _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
                                                struct nvm_ioctl_remove)
+#define NVM_DEV_INIT           _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \
+                                               struct nvm_ioctl_dev_init)
+#define NVM_DEV_FACTORY                _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \
+                                               struct nvm_ioctl_dev_factory)
 
 #define NVM_VERSION_MAJOR      1
 #define NVM_VERSION_MINOR      0
index 5b4a4be06e2b9301699db8da20d8df8d10bc0a89..cc68b92124d46108d43241d2b4cc603c2771e5b0 100644 (file)
@@ -66,14 +66,18 @@ struct nd_cmd_ars_cap {
        __u64 length;
        __u32 status;
        __u32 max_ars_out;
+       __u32 clear_err_unit;
+       __u32 reserved;
 } __packed;
 
 struct nd_cmd_ars_start {
        __u64 address;
        __u64 length;
        __u16 type;
-       __u8 reserved[6];
+       __u8 flags;
+       __u8 reserved[5];
        __u32 status;
+       __u32 scrub_time;
 } __packed;
 
 struct nd_cmd_ars_status {
@@ -81,11 +85,14 @@ struct nd_cmd_ars_status {
        __u32 out_length;
        __u64 address;
        __u64 length;
+       __u64 restart_address;
+       __u64 restart_length;
        __u16 type;
+       __u16 flags;
        __u32 num_records;
        struct nd_ars_record {
                __u32 handle;
-               __u32 flags;
+               __u32 reserved;
                __u64 err_address;
                __u64 length;
        } __packed records[0];
index c33e1c489eb2af5b05e1fa00a5daf33caed99c26..8b8cfadf7833b6f2f29f4c290711d36726b107f9 100644 (file)
 typedef uint16_t blkif_vdev_t;
 typedef uint64_t blkif_sector_t;
 
+/*
+ * Multiple hardware queues/rings:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vbd, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues" with the number they wish to use, which must be
+ * greater than zero, and no more than the value reported by the backend in
+ * "multi-queue-max-queues".
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel and ring-ref keys, instead writing those keys under sub-keys
+ * having the name "queue-N" where N is the integer ID of the queue/ring for
+ * which those keys belong. Queues are indexed from zero.
+ * For example, a frontend with two queues must write the following set of
+ * queue-related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ * It is also possible to use multiple queues/rings together with
+ * feature multi-page ring buffer.
+ * For example, a frontend requests two queues/rings and the size of each ring
+ * buffer is two pages must write the following set of related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/ring-page-order = "1"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ */
+
 /*
  * REQUEST CODES.
  */
index f4617cf07069213262f5c1c8bfb097ca7989b8ad..781c1399c6a3b0fa0b81c9567dd3096276a17d65 100644 (file)
@@ -795,7 +795,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
 
        ro = mnt_want_write(mnt);       /* we'll drop it in any case */
        error = 0;
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        path.dentry = lookup_one_len(name->name, root, strlen(name->name));
        if (IS_ERR(path.dentry)) {
                error = PTR_ERR(path.dentry);
@@ -841,7 +841,7 @@ out_putfd:
                put_unused_fd(fd);
                fd = error;
        }
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        if (!ro)
                mnt_drop_write(mnt);
 out_putname:
@@ -866,7 +866,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
        err = mnt_want_write(mnt);
        if (err)
                goto out_name;
-       mutex_lock_nested(&d_inode(mnt->mnt_root)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
        dentry = lookup_one_len(name->name, mnt->mnt_root,
                                strlen(name->name));
        if (IS_ERR(dentry)) {
@@ -884,7 +884,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
        dput(dentry);
 
 out_unlock:
-       mutex_unlock(&d_inode(mnt->mnt_root)->i_mutex);
+       inode_unlock(d_inode(mnt->mnt_root));
        if (inode)
                iput(inode);
        mnt_drop_write(mnt);
index b471e5a3863ddbca70f2bf4dee22f40df0345fbe..cddd5b5fde514a205ed24a46b7a273643ab62c7c 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1493,7 +1493,7 @@ out_rcu_wakeup:
        wake_up_sem_queue_do(&tasks);
 out_free:
        if (sem_io != fast_sem_io)
-               ipc_free(sem_io, sizeof(ushort)*nsems);
+               ipc_free(sem_io);
        return err;
 }
 
index ed3027d0f277a53745b41ea731c4783e3c554bee..331fc1b0b3c7952110f0223d741fc4fa19881d70 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -156,11 +156,12 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
        struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
 
        /*
-        * We raced in the idr lookup or with shm_destroy().  Either way, the
-        * ID is busted.
+        * Callers of shm_lock() must validate the status of the returned ipc
+        * object pointer (as returned by ipc_lock()), and error out as
+        * appropriate.
         */
-       WARN_ON(IS_ERR(ipcp));
-
+       if (IS_ERR(ipcp))
+               return (void *)ipcp;
        return container_of(ipcp, struct shmid_kernel, shm_perm);
 }
 
@@ -186,18 +187,33 @@ static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 }
 
 
-/* This is called by fork, once for every shm attach. */
-static void shm_open(struct vm_area_struct *vma)
+static int __shm_open(struct vm_area_struct *vma)
 {
        struct file *file = vma->vm_file;
        struct shm_file_data *sfd = shm_file_data(file);
        struct shmid_kernel *shp;
 
        shp = shm_lock(sfd->ns, sfd->id);
+
+       if (IS_ERR(shp))
+               return PTR_ERR(shp);
+
        shp->shm_atim = get_seconds();
        shp->shm_lprid = task_tgid_vnr(current);
        shp->shm_nattch++;
        shm_unlock(shp);
+       return 0;
+}
+
+/* This is called by fork, once for every shm attach. */
+static void shm_open(struct vm_area_struct *vma)
+{
+       int err = __shm_open(vma);
+       /*
+        * We raced in the idr lookup or with shm_destroy().
+        * Either way, the ID is busted.
+        */
+       WARN_ON_ONCE(err);
 }
 
 /*
@@ -260,6 +276,14 @@ static void shm_close(struct vm_area_struct *vma)
        down_write(&shm_ids(ns).rwsem);
        /* remove from the list of attaches of the shm segment */
        shp = shm_lock(ns, sfd->id);
+
+       /*
+        * We raced in the idr lookup or with shm_destroy().
+        * Either way, the ID is busted.
+        */
+       if (WARN_ON_ONCE(IS_ERR(shp)))
+               goto done; /* no-op */
+
        shp->shm_lprid = task_tgid_vnr(current);
        shp->shm_dtim = get_seconds();
        shp->shm_nattch--;
@@ -267,6 +291,7 @@ static void shm_close(struct vm_area_struct *vma)
                shm_destroy(ns, shp);
        else
                shm_unlock(shp);
+done:
        up_write(&shm_ids(ns).rwsem);
 }
 
@@ -388,17 +413,25 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
        struct shm_file_data *sfd = shm_file_data(file);
        int ret;
 
+       /*
+        * In case of remap_file_pages() emulation, the file can represent
+        * removed IPC ID: propogate shm_lock() error to caller.
+        */
+       ret =__shm_open(vma);
+       if (ret)
+               return ret;
+
        ret = sfd->file->f_op->mmap(sfd->file, vma);
-       if (ret != 0)
+       if (ret) {
+               shm_close(vma);
                return ret;
+       }
        sfd->vm_ops = vma->vm_ops;
 #ifdef CONFIG_MMU
        WARN_ON(!sfd->vm_ops->fault);
 #endif
        vma->vm_ops = &shm_vm_ops;
-       shm_open(vma);
-
-       return ret;
+       return 0;
 }
 
 static int shm_release(struct inode *ino, struct file *file)
index 0f401d94b7c657d5e7126fe78f149c94ffea8e24..798cad18dd878f2ee81f03e34431dbc001e2c2a9 100644 (file)
@@ -414,17 +414,12 @@ void *ipc_alloc(int size)
 /**
  * ipc_free - free ipc space
  * @ptr: pointer returned by ipc_alloc
- * @size: size of block
  *
- * Free a block created with ipc_alloc(). The caller must know the size
- * used in the allocation call.
+ * Free a block created with ipc_alloc().
  */
-void ipc_free(void *ptr, int size)
+void ipc_free(void *ptr)
 {
-       if (size > PAGE_SIZE)
-               vfree(ptr);
-       else
-               kfree(ptr);
+       kvfree(ptr);
 }
 
 /**
index 3a8a5a0eca6252f04cf188fa6921291f9ec6fc31..51f7ca58ac6742989c61c0e0fc8d73bc7eab6bf7 100644 (file)
@@ -118,7 +118,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
  * both function can sleep
  */
 void *ipc_alloc(int size);
-void ipc_free(void *ptr, int size);
+void ipc_free(void *ptr);
 
 /*
  * For allocation that need to be freed by RCU.
index 27c6046c2c3d459c1f2345b191e647b94fa7f02e..f84f8d06e1f6d1010b9e61065c71596f320e28d9 100644 (file)
@@ -95,7 +95,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
        if (IS_ERR(dentry))
                return (void *)dentry; /* returning an error */
        inode = path.dentry->d_inode;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
        if (unlikely(!audit_mark)) {
index 656c7e93ac0d30d3e42a8f7e0dfc7dd071360d78..9f194aad0adc0600ce257581315824f540eb9c0b 100644 (file)
@@ -364,7 +364,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
        struct dentry *d = kern_path_locked(watch->path, parent);
        if (IS_ERR(d))
                return PTR_ERR(d);
-       mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+       inode_unlock(d_backing_inode(parent->dentry));
        if (d_is_positive(d)) {
                /* update watch filter fields */
                watch->dev = d_backing_inode(d)->i_sb->s_dev;
index b0799bced518691bd847a16973be74184bcdcbaf..89ebbc4d1164fea26bdcb62561bd15838be6a2db 100644 (file)
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 {
        struct perf_event *event;
        const struct perf_event_attr *attr;
+       struct file *file;
 
-       event = perf_event_get(fd);
-       if (IS_ERR(event))
-               return event;
+       file = perf_event_get(fd);
+       if (IS_ERR(file))
+               return file;
+
+       event = file->private_data;
 
        attr = perf_event_attrs(event);
        if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
                goto err;
 
        if (attr->type == PERF_TYPE_RAW)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_HARDWARE)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_SOFTWARE &&
            attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-               return event;
+               return file;
 err:
-       perf_event_release_kernel(event);
+       fput(file);
        return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-       struct perf_event *event = ptr;
-
-       perf_event_release_kernel(event);
+       fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
index d1d3e8f57de907764fe3080632062485f5639443..2e7f7ab739e41c46072a69e787bf4e44f560ae55 100644 (file)
@@ -2082,7 +2082,7 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
                /* adjust offset of jmps if necessary */
                if (i < pos && i + insn->off + 1 > pos)
                        insn->off += delta;
-               else if (i > pos && i + insn->off + 1 < pos)
+               else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
                        insn->off -= delta;
        }
 }
index c03a640ef6da265db01b93c2970ab6b2da7abd67..d27904c193daa1d8a8680522093254cfde376177 100644 (file)
@@ -58,6 +58,7 @@
 #include <linux/kthread.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/cpuset.h>
 #include <net/sock.h>
 
 /*
@@ -2739,6 +2740,7 @@ out_unlock_rcu:
 out_unlock_threadgroup:
        percpu_up_write(&cgroup_threadgroup_rwsem);
        cgroup_kn_unlock(of->kn);
+       cpuset_post_attach_flush();
        return ret ?: nbytes;
 }
 
@@ -4655,14 +4657,15 @@ static void css_free_work_fn(struct work_struct *work)
 
        if (ss) {
                /* css free path */
+               struct cgroup_subsys_state *parent = css->parent;
                int id = css->id;
 
-               if (css->parent)
-                       css_put(css->parent);
-
                ss->css_free(css);
                cgroup_idr_remove(&ss->css_idr, id);
                cgroup_put(cgrp);
+
+               if (parent)
+                       css_put(parent);
        } else {
                /* cgroup free path */
                atomic_dec(&cgrp->root->nr_cgrps);
@@ -4758,6 +4761,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
        INIT_LIST_HEAD(&css->sibling);
        INIT_LIST_HEAD(&css->children);
        css->serial_nr = css_serial_nr_next++;
+       atomic_set(&css->online_cnt, 0);
 
        if (cgroup_parent(cgrp)) {
                css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4780,6 +4784,10 @@ static int online_css(struct cgroup_subsys_state *css)
        if (!ret) {
                css->flags |= CSS_ONLINE;
                rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
+
+               atomic_inc(&css->online_cnt);
+               if (css->parent)
+                       atomic_inc(&css->parent->online_cnt);
        }
        return ret;
 }
@@ -5017,10 +5025,15 @@ static void css_killed_work_fn(struct work_struct *work)
                container_of(work, struct cgroup_subsys_state, destroy_work);
 
        mutex_lock(&cgroup_mutex);
-       offline_css(css);
-       mutex_unlock(&cgroup_mutex);
 
-       css_put(css);
+       do {
+               offline_css(css);
+               css_put(css);
+               /* @css can't go away while we're holding cgroup_mutex */
+               css = css->parent;
+       } while (css && atomic_dec_and_test(&css->online_cnt));
+
+       mutex_unlock(&cgroup_mutex);
 }
 
 /* css kill confirmation processing requires process context, bounce */
@@ -5029,8 +5042,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
        struct cgroup_subsys_state *css =
                container_of(ref, struct cgroup_subsys_state, refcnt);
 
-       INIT_WORK(&css->destroy_work, css_killed_work_fn);
-       queue_work(cgroup_destroy_wq, &css->destroy_work);
+       if (atomic_dec_and_test(&css->online_cnt)) {
+               INIT_WORK(&css->destroy_work, css_killed_work_fn);
+               queue_work(cgroup_destroy_wq, &css->destroy_work);
+       }
 }
 
 /**
index 3e945fcd81796f954a7e1c81ae95e78bd1a91dba..41989ab4db571cbf93d1a12738bc9afc3411e019 100644 (file)
@@ -287,6 +287,8 @@ static struct cpuset top_cpuset = {
 static DEFINE_MUTEX(cpuset_mutex);
 static DEFINE_SPINLOCK(callback_lock);
 
+static struct workqueue_struct *cpuset_migrate_mm_wq;
+
 /*
  * CPU / memory hotplug is handled asynchronously.
  */
@@ -972,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 }
 
 /*
- * cpuset_migrate_mm
- *
- *    Migrate memory region from one set of nodes to another.
- *
- *    Temporarilly set tasks mems_allowed to target nodes of migration,
- *    so that the migration code can allocate pages on these nodes.
- *
- *    While the mm_struct we are migrating is typically from some
- *    other task, the task_struct mems_allowed that we are hacking
- *    is for our current task, which must allocate new pages for that
- *    migrating memory region.
+ * Migrate memory region from one set of nodes to another.  This is
+ * performed asynchronously as it can be called from process migration path
+ * holding locks involved in process management.  All mm migrations are
+ * performed in the queued order and can be waited for by flushing
+ * cpuset_migrate_mm_wq.
  */
 
+struct cpuset_migrate_mm_work {
+       struct work_struct      work;
+       struct mm_struct        *mm;
+       nodemask_t              from;
+       nodemask_t              to;
+};
+
+static void cpuset_migrate_mm_workfn(struct work_struct *work)
+{
+       struct cpuset_migrate_mm_work *mwork =
+               container_of(work, struct cpuset_migrate_mm_work, work);
+
+       /* on a wq worker, no need to worry about %current's mems_allowed */
+       do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
+       mmput(mwork->mm);
+       kfree(mwork);
+}
+
 static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
                                                        const nodemask_t *to)
 {
-       struct task_struct *tsk = current;
-
-       tsk->mems_allowed = *to;
+       struct cpuset_migrate_mm_work *mwork;
 
-       do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+       mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
+       if (mwork) {
+               mwork->mm = mm;
+               mwork->from = *from;
+               mwork->to = *to;
+               INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
+               queue_work(cpuset_migrate_mm_wq, &mwork->work);
+       } else {
+               mmput(mm);
+       }
+}
 
-       rcu_read_lock();
-       guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed);
-       rcu_read_unlock();
+void cpuset_post_attach_flush(void)
+{
+       flush_workqueue(cpuset_migrate_mm_wq);
 }
 
 /*
@@ -1097,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs)
                mpol_rebind_mm(mm, &cs->mems_allowed);
                if (migrate)
                        cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
-               mmput(mm);
+               else
+                       mmput(mm);
        }
        css_task_iter_end(&it);
 
@@ -1545,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset)
                         * @old_mems_allowed is the right nodesets that we
                         * migrate mm from.
                         */
-                       if (is_memory_migrate(cs)) {
+                       if (is_memory_migrate(cs))
                                cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
                                                  &cpuset_attach_nodemask_to);
-                       }
-                       mmput(mm);
+                       else
+                               mmput(mm);
                }
        }
 
@@ -1714,6 +1737,7 @@ out_unlock:
        mutex_unlock(&cpuset_mutex);
        kernfs_unbreak_active_protection(of->kn);
        css_put(&cs->css);
+       flush_workqueue(cpuset_migrate_mm_wq);
        return retval ?: nbytes;
 }
 
@@ -2359,6 +2383,9 @@ void __init cpuset_init_smp(void)
        top_cpuset.effective_mems = node_states[N_MEMORY];
 
        register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
+
+       cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
+       BUG_ON(!cpuset_migrate_mm_wq);
 }
 
 /**
index c0957416b32edd7f945201839c3058128bd718f5..614614821f00a02928439b068903904d81233808 100644 (file)
@@ -49,8 +49,6 @@
 
 #include <asm/irq_regs.h>
 
-static struct workqueue_struct *perf_wq;
-
 typedef int (*remote_function_f)(void *);
 
 struct remote_function_call {
@@ -66,8 +64,17 @@ static void remote_function(void *data)
        struct task_struct *p = tfc->p;
 
        if (p) {
-               tfc->ret = -EAGAIN;
-               if (task_cpu(p) != smp_processor_id() || !task_curr(p))
+               /* -EAGAIN */
+               if (task_cpu(p) != smp_processor_id())
+                       return;
+
+               /*
+                * Now that we're on right CPU with IRQs disabled, we can test
+                * if we hit the right task without races.
+                */
+
+               tfc->ret = -ESRCH; /* No such (running) process */
+               if (p != current)
                        return;
        }
 
@@ -94,13 +101,17 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
                .p      = p,
                .func   = func,
                .info   = info,
-               .ret    = -ESRCH, /* No such (running) process */
+               .ret    = -EAGAIN,
        };
+       int ret;
 
-       if (task_curr(p))
-               smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+       do {
+               ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+               if (!ret)
+                       ret = data.ret;
+       } while (ret == -EAGAIN);
 
-       return data.ret;
+       return ret;
 }
 
 /**
@@ -126,44 +137,170 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
        return data.ret;
 }
 
-static void event_function_call(struct perf_event *event,
-                               int (*active)(void *),
-                               void (*inactive)(void *),
-                               void *data)
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+                         struct perf_event_context *ctx)
+{
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx)
+               raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+                           struct perf_event_context *ctx)
+{
+       if (ctx)
+               raw_spin_unlock(&ctx->lock);
+       raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
+#define TASK_TOMBSTONE ((void *)-1L)
+
+static bool is_kernel_event(struct perf_event *event)
+{
+       return READ_ONCE(event->owner) == TASK_TOMBSTONE;
+}
+
+/*
+ * On task ctx scheduling...
+ *
+ * When !ctx->nr_events a task context will not be scheduled. This means
+ * we can disable the scheduler hooks (for performance) without leaving
+ * pending task ctx state.
+ *
+ * This however results in two special cases:
+ *
+ *  - removing the last event from a task ctx; this is relatively straight
+ *    forward and is done in __perf_remove_from_context.
+ *
+ *  - adding the first event to a task ctx; this is tricky because we cannot
+ *    rely on ctx->is_active and therefore cannot use event_function_call().
+ *    See perf_install_in_context().
+ *
+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
+ */
+
+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
+                       struct perf_event_context *, void *);
+
+struct event_function_struct {
+       struct perf_event *event;
+       event_f func;
+       void *data;
+};
+
+static int event_function(void *info)
 {
+       struct event_function_struct *efs = info;
+       struct perf_event *event = efs->event;
        struct perf_event_context *ctx = event->ctx;
-       struct task_struct *task = ctx->task;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event_context *task_ctx = cpuctx->task_ctx;
+       int ret = 0;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       perf_ctx_lock(cpuctx, task_ctx);
+       /*
+        * Since we do the IPI call without holding ctx->lock things can have
+        * changed, double check we hit the task we set out to hit.
+        */
+       if (ctx->task) {
+               if (ctx->task != current) {
+                       ret = -ESRCH;
+                       goto unlock;
+               }
+
+               /*
+                * We only use event_function_call() on established contexts,
+                * and event_function() is only ever called when active (or
+                * rather, we'll have bailed in task_function_call() or the
+                * above ctx->task != current test), therefore we must have
+                * ctx->is_active here.
+                */
+               WARN_ON_ONCE(!ctx->is_active);
+               /*
+                * And since we have ctx->is_active, cpuctx->task_ctx must
+                * match.
+                */
+               WARN_ON_ONCE(task_ctx != ctx);
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       efs->func(event, cpuctx, ctx, efs->data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+
+       return ret;
+}
+
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       int ret = event_function(&efs);
+       WARN_ON_ONCE(ret);
+}
+
+static void event_function_call(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       if (!event->parent) {
+               /*
+                * If this is a !child event, we must hold ctx::mutex to
+                * stabilize the the event->ctx relation. See
+                * perf_event_ctx_lock().
+                */
+               lockdep_assert_held(&ctx->mutex);
+       }
 
        if (!task) {
-               cpu_function_call(event->cpu, active, data);
+               cpu_function_call(event->cpu, event_function, &efs);
                return;
        }
 
+       if (task == TASK_TOMBSTONE)
+               return;
+
 again:
-       if (!task_function_call(task, active, data))
+       if (!task_function_call(task, event_function, &efs))
                return;
 
        raw_spin_lock_irq(&ctx->lock);
+       /*
+        * Reload the task pointer, it might have been changed by
+        * a concurrent perf_event_context_sched_out().
+        */
+       task = ctx->task;
+       if (task == TASK_TOMBSTONE) {
+               raw_spin_unlock_irq(&ctx->lock);
+               return;
+       }
        if (ctx->is_active) {
-               /*
-                * Reload the task pointer, it might have been changed by
-                * a concurrent perf_event_context_sched_out().
-                */
-               task = ctx->task;
                raw_spin_unlock_irq(&ctx->lock);
                goto again;
        }
-       inactive(data);
+       func(event, NULL, ctx, data);
        raw_spin_unlock_irq(&ctx->lock);
 }
 
-#define EVENT_OWNER_KERNEL ((void *) -1)
-
-static bool is_kernel_event(struct perf_event *event)
-{
-       return event->owner == EVENT_OWNER_KERNEL;
-}
-
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP |\
@@ -179,6 +316,7 @@ static bool is_kernel_event(struct perf_event *event)
 enum event_type_t {
        EVENT_FLEXIBLE = 0x1,
        EVENT_PINNED = 0x2,
+       EVENT_TIME = 0x4,
        EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
 };
 
@@ -186,7 +324,13 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-struct static_key_deferred perf_sched_events __read_mostly;
+
+static void perf_sched_delayed(struct work_struct *work);
+DEFINE_STATIC_KEY_FALSE(perf_sched_events);
+static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
+static DEFINE_MUTEX(perf_sched_mutex);
+static atomic_t perf_sched_count;
+
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
@@ -368,28 +512,6 @@ static inline u64 perf_event_clock(struct perf_event *event)
        return event->clock();
 }
 
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
-       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
-                         struct perf_event_context *ctx)
-{
-       raw_spin_lock(&cpuctx->ctx.lock);
-       if (ctx)
-               raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
-                           struct perf_event_context *ctx)
-{
-       if (ctx)
-               raw_spin_unlock(&ctx->lock);
-       raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
 #ifdef CONFIG_CGROUP_PERF
 
 static inline bool
@@ -579,13 +701,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /*
-        * next is NULL when called from perf_event_enable_on_exec()
-        * that will systematically cause a cgroup_switch()
-        */
-       if (next)
-               cgrp2 = perf_cgroup_from_task(next, NULL);
+       cgrp2 = perf_cgroup_from_task(next, NULL);
 
        /*
         * only schedule out current cgroup events if we know
@@ -611,8 +727,6 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /* prev can never be NULL */
        cgrp2 = perf_cgroup_from_task(prev, NULL);
 
        /*
@@ -917,7 +1031,7 @@ static void put_ctx(struct perf_event_context *ctx)
        if (atomic_dec_and_test(&ctx->refcount)) {
                if (ctx->parent_ctx)
                        put_ctx(ctx->parent_ctx);
-               if (ctx->task)
+               if (ctx->task && ctx->task != TASK_TOMBSTONE)
                        put_task_struct(ctx->task);
                call_rcu(&ctx->rcu_head, free_ctx);
        }
@@ -934,9 +1048,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * perf_event_context::mutex nests and those are:
  *
  *  - perf_event_exit_task_context()   [ child , 0 ]
- *      __perf_event_exit_task()
- *        sync_child_event()
- *          put_event()                        [ parent, 1 ]
+ *      perf_event_exit_event()
+ *        put_event()                  [ parent, 1 ]
  *
  *  - perf_event_init_context()                [ parent, 0 ]
  *      inherit_task_group()
@@ -979,8 +1092,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * Lock order:
  *     task_struct::perf_event_mutex
  *       perf_event_context::mutex
- *         perf_event_context::lock
  *         perf_event::child_mutex;
+ *           perf_event_context::lock
  *         perf_event::mmap_mutex
  *         mmap_sem
  */
@@ -1078,6 +1191,7 @@ static u64 primary_event_id(struct perf_event *event)
 
 /*
  * Get the perf_event_context for a task and lock it.
+ *
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
@@ -1118,9 +1232,12 @@ retry:
                        goto retry;
                }
 
-               if (!atomic_inc_not_zero(&ctx->refcount)) {
+               if (ctx->task == TASK_TOMBSTONE ||
+                   !atomic_inc_not_zero(&ctx->refcount)) {
                        raw_spin_unlock(&ctx->lock);
                        ctx = NULL;
+               } else {
+                       WARN_ON_ONCE(ctx->task != task);
                }
        }
        rcu_read_unlock();
@@ -1180,16 +1297,18 @@ static u64 perf_event_time(struct perf_event *event)
 
 /*
  * Update the total_time_enabled and total_time_running fields for a event.
- * The caller of this function needs to hold the ctx->lock.
  */
 static void update_event_times(struct perf_event *event)
 {
        struct perf_event_context *ctx = event->ctx;
        u64 run_end;
 
+       lockdep_assert_held(&ctx->lock);
+
        if (event->state < PERF_EVENT_STATE_INACTIVE ||
            event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
                return;
+
        /*
         * in cgroup mode, time_enabled represents
         * the time the event was enabled AND active
@@ -1246,6 +1365,8 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+       lockdep_assert_held(&ctx->lock);
+
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
        event->attach_state |= PERF_ATTACH_CONTEXT;
 
@@ -1448,11 +1569,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
        if (is_cgroup_event(event)) {
                ctx->nr_cgroups--;
+               /*
+                * Because cgroup events are always per-cpu events, this will
+                * always be called from the right CPU.
+                */
                cpuctx = __get_cpu_context(ctx);
                /*
-                * if there are no more cgroup events
-                * then cler cgrp to avoid stale pointer
-                * in update_cgrp_time_from_cpuctx()
+                * If there are no more cgroup events then clear cgrp to avoid
+                * stale pointer in update_cgrp_time_from_cpuctx().
                 */
                if (!ctx->nr_cgroups)
                        cpuctx->cgrp = NULL;
@@ -1530,45 +1654,11 @@ out:
                perf_event__header_size(tmp);
 }
 
-/*
- * User event without the task.
- */
 static bool is_orphaned_event(struct perf_event *event)
 {
-       return event && !is_kernel_event(event) && !event->owner;
-}
-
-/*
- * Event has a parent but parent's task finished and it's
- * alive only because of children holding refference.
- */
-static bool is_orphaned_child(struct perf_event *event)
-{
-       return is_orphaned_event(event->parent);
+       return event->state == PERF_EVENT_STATE_DEAD;
 }
 
-static void orphans_remove_work(struct work_struct *work);
-
-static void schedule_orphans_remove(struct perf_event_context *ctx)
-{
-       if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
-               return;
-
-       if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
-               get_ctx(ctx);
-               ctx->orphans_remove_sched = true;
-       }
-}
-
-static int __init perf_workqueue_init(void)
-{
-       perf_wq = create_singlethread_workqueue("perf");
-       WARN(!perf_wq, "failed to create perf workqueue\n");
-       return perf_wq ? 0 : -1;
-}
-
-core_initcall(perf_workqueue_init);
-
 static inline int pmu_filter_match(struct perf_event *event)
 {
        struct pmu *pmu = event->pmu;
@@ -1611,14 +1701,14 @@ event_sched_out(struct perf_event *event,
 
        perf_pmu_disable(event->pmu);
 
+       event->tstamp_stopped = tstamp;
+       event->pmu->del(event, 0);
+       event->oncpu = -1;
        event->state = PERF_EVENT_STATE_INACTIVE;
        if (event->pending_disable) {
                event->pending_disable = 0;
                event->state = PERF_EVENT_STATE_OFF;
        }
-       event->tstamp_stopped = tstamp;
-       event->pmu->del(event, 0);
-       event->oncpu = -1;
 
        if (!is_software_event(event))
                cpuctx->active_oncpu--;
@@ -1629,9 +1719,6 @@ event_sched_out(struct perf_event *event,
        if (event->attr.exclusive || !cpuctx->active_oncpu)
                cpuctx->exclusive = 0;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
        perf_pmu_enable(event->pmu);
 }
 
@@ -1655,21 +1742,7 @@ group_sched_out(struct perf_event *group_event,
                cpuctx->exclusive = 0;
 }
 
-struct remove_event {
-       struct perf_event *event;
-       bool detach_group;
-};
-
-static void ___perf_remove_from_context(void *info)
-{
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-
-       if (re->detach_group)
-               perf_group_detach(event);
-       list_del_event(event, ctx);
-}
+#define DETACH_GROUP   0x01UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -1677,33 +1750,31 @@ static void ___perf_remove_from_context(void *info)
  * We disable the event on the hardware level first. After that we
  * remove it from the context list.
  */
-static int __perf_remove_from_context(void *info)
+static void
+__perf_remove_from_context(struct perf_event *event,
+                          struct perf_cpu_context *cpuctx,
+                          struct perf_event_context *ctx,
+                          void *info)
 {
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       unsigned long flags = (unsigned long)info;
 
-       raw_spin_lock(&ctx->lock);
        event_sched_out(event, cpuctx, ctx);
-       if (re->detach_group)
+       if (flags & DETACH_GROUP)
                perf_group_detach(event);
        list_del_event(event, ctx);
-       if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
+
+       if (!ctx->nr_events && ctx->is_active) {
                ctx->is_active = 0;
-               cpuctx->task_ctx = NULL;
+               if (ctx->task) {
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+                       cpuctx->task_ctx = NULL;
+               }
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 /*
  * Remove the event from a task's (or a CPU's) list of events.
  *
- * CPU events are removed with a smp call. For task events we only
- * call when the task is on a CPU.
- *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
  * remains valid.  This is OK when called from perf_release since
@@ -1711,73 +1782,32 @@ static int __perf_remove_from_context(void *info)
  * When called from perf_event_exit_task, it's OK because the
  * context has been detached from its task.
  */
-static void perf_remove_from_context(struct perf_event *event, bool detach_group)
+static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-       struct perf_event_context *ctx = event->ctx;
-       struct remove_event re = {
-               .event = event,
-               .detach_group = detach_group,
-       };
-
-       lockdep_assert_held(&ctx->mutex);
+       lockdep_assert_held(&event->ctx->mutex);
 
-       event_function_call(event, __perf_remove_from_context,
-                           ___perf_remove_from_context, &re);
+       event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
  * Cross CPU call to disable a performance event
  */
-int __perf_event_disable(void *info)
-{
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
-       /*
-        * If this is a per-task event, need to check whether this
-        * event's task is the current task on this cpu.
-        *
-        * Can trigger due to concurrent perf_event_context_sched_out()
-        * flipping contexts around.
-        */
-       if (ctx->task && cpuctx->task_ctx != ctx)
-               return -EINVAL;
-
-       raw_spin_lock(&ctx->lock);
-
-       /*
-        * If the event is on, turn it off.
-        * If it is in error state, leave it in error state.
-        */
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
-               update_context_time(ctx);
-               update_cgrp_time_from_event(event);
-               update_group_times(event);
-               if (event == event->group_leader)
-                       group_sched_out(event, cpuctx, ctx);
-               else
-                       event_sched_out(event, cpuctx, ctx);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
-
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
-}
-
-void ___perf_event_disable(void *info)
+static void __perf_event_disable(struct perf_event *event,
+                                struct perf_cpu_context *cpuctx,
+                                struct perf_event_context *ctx,
+                                void *info)
 {
-       struct perf_event *event = info;
+       if (event->state < PERF_EVENT_STATE_INACTIVE)
+               return;
 
-       /*
-        * Since we have the lock this context can't be scheduled
-        * in, so we can change the state safely.
-        */
-       if (event->state == PERF_EVENT_STATE_INACTIVE) {
-               update_group_times(event);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
+       update_context_time(ctx);
+       update_cgrp_time_from_event(event);
+       update_group_times(event);
+       if (event == event->group_leader)
+               group_sched_out(event, cpuctx, ctx);
+       else
+               event_sched_out(event, cpuctx, ctx);
+       event->state = PERF_EVENT_STATE_OFF;
 }
 
 /*
@@ -1788,7 +1818,8 @@ void ___perf_event_disable(void *info)
  * remains valid.  This condition is satisifed when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
- * goes to exit will block in sync_child_event.
+ * goes to exit will block in perf_event_exit_event().
+ *
  * When called from perf_pending_event it's OK because event->ctx
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
@@ -1804,8 +1835,12 @@ static void _perf_event_disable(struct perf_event *event)
        }
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_disable,
-                           ___perf_event_disable, event);
+       event_function_call(event, __perf_event_disable, NULL);
+}
+
+void perf_event_disable_local(struct perf_event *event)
+{
+       event_function_local(event, __perf_event_disable, NULL);
 }
 
 /*
@@ -1918,9 +1953,6 @@ event_sched_in(struct perf_event *event,
        if (event->attr.exclusive)
                cpuctx->exclusive = 1;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
 out:
        perf_pmu_enable(event->pmu);
 
@@ -2039,13 +2071,27 @@ static void add_event_to_ctx(struct perf_event *event,
        event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx);
+static void ctx_sched_out(struct perf_event_context *ctx,
+                         struct perf_cpu_context *cpuctx,
+                         enum event_type_t event_type);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
             struct perf_cpu_context *cpuctx,
             enum event_type_t event_type,
             struct task_struct *task);
 
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx)
+{
+       if (!cpuctx->task_ctx)
+               return;
+
+       if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+               return;
+
+       ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+}
+
 static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
                                struct perf_event_context *ctx,
                                struct task_struct *task)
@@ -2058,22 +2104,22 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
                ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
 }
 
-static void ___perf_install_in_context(void *info)
+static void ctx_resched(struct perf_cpu_context *cpuctx,
+                       struct perf_event_context *task_ctx)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-
-       /*
-        * Since the task isn't running, its safe to add the event, us holding
-        * the ctx->lock ensures the task won't get scheduled in.
-        */
-       add_event_to_ctx(event, ctx);
+       perf_pmu_disable(cpuctx->ctx.pmu);
+       if (task_ctx)
+               task_ctx_sched_out(cpuctx, task_ctx);
+       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+       perf_event_sched_in(cpuctx, task_ctx, current);
+       perf_pmu_enable(cpuctx->ctx.pmu);
 }
 
 /*
  * Cross CPU call to install and enable a performance event
  *
- * Must be called with ctx->mutex held
+ * Very similar to remote_function() + event_function() but cannot assume that
+ * things like ctx->is_active and cpuctx->task_ctx are set.
  */
 static int  __perf_install_in_context(void *info)
 {
@@ -2081,79 +2127,106 @@ static int  __perf_install_in_context(void *info)
        struct perf_event_context *ctx = event->ctx;
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
        struct perf_event_context *task_ctx = cpuctx->task_ctx;
-       struct task_struct *task = current;
-
-       perf_ctx_lock(cpuctx, task_ctx);
-       perf_pmu_disable(cpuctx->ctx.pmu);
-
-       /*
-        * If there was an active task_ctx schedule it out.
-        */
-       if (task_ctx)
-               task_ctx_sched_out(task_ctx);
+       bool activate = true;
+       int ret = 0;
 
-       /*
-        * If the context we're installing events in is not the
-        * active task_ctx, flip them.
-        */
-       if (ctx->task && task_ctx != ctx) {
-               if (task_ctx)
-                       raw_spin_unlock(&task_ctx->lock);
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx->task) {
                raw_spin_lock(&ctx->lock);
                task_ctx = ctx;
-       }
-
-       if (task_ctx) {
-               cpuctx->task_ctx = task_ctx;
-               task = task_ctx->task;
-       }
 
-       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+               /* If we're on the wrong CPU, try again */
+               if (task_cpu(ctx->task) != smp_processor_id()) {
+                       ret = -ESRCH;
+                       goto unlock;
+               }
 
-       update_context_time(ctx);
-       /*
-        * update cgrp time only if current cgrp
-        * matches event->cgrp. Must be done before
-        * calling add_event_to_ctx()
-        */
-       update_cgrp_time_from_event(event);
+               /*
+                * If we're on the right CPU, see if the task we target is
+                * current, if not we don't have to activate the ctx, a future
+                * context switch will do that for us.
+                */
+               if (ctx->task != current)
+                       activate = false;
+               else
+                       WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
 
-       add_event_to_ctx(event, ctx);
+       } else if (task_ctx) {
+               raw_spin_lock(&task_ctx->lock);
+       }
 
-       /*
-        * Schedule everything back in
-        */
-       perf_event_sched_in(cpuctx, task_ctx, task);
+       if (activate) {
+               ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+               add_event_to_ctx(event, ctx);
+               ctx_resched(cpuctx, task_ctx);
+       } else {
+               add_event_to_ctx(event, ctx);
+       }
 
-       perf_pmu_enable(cpuctx->ctx.pmu);
+unlock:
        perf_ctx_unlock(cpuctx, task_ctx);
 
-       return 0;
+       return ret;
 }
 
 /*
- * Attach a performance event to a context
- *
- * First we add the event to the list with the hardware enable bit
- * in event->hw_config cleared.
+ * Attach a performance event to a context.
  *
- * If the event is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
+ * Very similar to event_function_call, see comment there.
  */
 static void
 perf_install_in_context(struct perf_event_context *ctx,
                        struct perf_event *event,
                        int cpu)
 {
+       struct task_struct *task = READ_ONCE(ctx->task);
+
        lockdep_assert_held(&ctx->mutex);
 
        event->ctx = ctx;
        if (event->cpu != -1)
                event->cpu = cpu;
 
-       event_function_call(event, __perf_install_in_context,
-                           ___perf_install_in_context, event);
+       if (!task) {
+               cpu_function_call(cpu, __perf_install_in_context, event);
+               return;
+       }
+
+       /*
+        * Should not happen, we validate the ctx is still alive before calling.
+        */
+       if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
+               return;
+
+       /*
+        * Installing events is tricky because we cannot rely on ctx->is_active
+        * to be set in case this is the nr_events 0 -> 1 transition.
+        */
+again:
+       /*
+        * Cannot use task_function_call() because we need to run on the task's
+        * CPU regardless of whether its current or not.
+        */
+       if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+               return;
+
+       raw_spin_lock_irq(&ctx->lock);
+       task = ctx->task;
+       if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
+               /*
+                * Cannot happen because we already checked above (which also
+                * cannot happen), and we hold ctx->mutex, which serializes us
+                * against perf_event_exit_task_context().
+                */
+               raw_spin_unlock_irq(&ctx->lock);
+               return;
+       }
+       raw_spin_unlock_irq(&ctx->lock);
+       /*
+        * Since !ctx->is_active doesn't mean anything, we must IPI
+        * unconditionally.
+        */
+       goto again;
 }
 
 /*
@@ -2180,85 +2253,47 @@ static void __perf_event_mark_enabled(struct perf_event *event)
 /*
  * Cross CPU call to enable a performance event
  */
-static int __perf_event_enable(void *info)
+static void __perf_event_enable(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
        struct perf_event *leader = event->group_leader;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-       int err;
-
-       /*
-        * There's a time window between 'ctx->is_active' check
-        * in perf_event_enable function and this place having:
-        *   - IRQs on
-        *   - ctx->lock unlocked
-        *
-        * where the task could be killed and 'ctx' deactivated
-        * by perf_event_exit_task.
-        */
-       if (!ctx->is_active)
-               return -EINVAL;
-
-       raw_spin_lock(&ctx->lock);
-       update_context_time(ctx);
+       struct perf_event_context *task_ctx;
 
-       if (event->state >= PERF_EVENT_STATE_INACTIVE)
-               goto unlock;
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <= PERF_EVENT_STATE_ERROR)
+               return;
 
-       /*
-        * set current task's cgroup time reference point
-        */
-       perf_cgroup_set_timestamp(current, ctx);
+       if (ctx->is_active)
+               ctx_sched_out(ctx, cpuctx, EVENT_TIME);
 
        __perf_event_mark_enabled(event);
 
+       if (!ctx->is_active)
+               return;
+
        if (!event_filter_match(event)) {
                if (is_cgroup_event(event))
                        perf_cgroup_defer_enabled(event);
-               goto unlock;
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
+               return;
        }
 
        /*
         * If the event is in a group and isn't the group leader,
         * then don't put it on unless the group is on.
         */
-       if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
-               goto unlock;
-
-       if (!group_can_go_on(event, cpuctx, 1)) {
-               err = -EEXIST;
-       } else {
-               if (event == leader)
-                       err = group_sched_in(event, cpuctx, ctx);
-               else
-                       err = event_sched_in(event, cpuctx, ctx);
-       }
-
-       if (err) {
-               /*
-                * If this event can't go on and it's part of a
-                * group, then the whole group has to come off.
-                */
-               if (leader != event) {
-                       group_sched_out(leader, cpuctx, ctx);
-                       perf_mux_hrtimer_restart(cpuctx);
-               }
-               if (leader->attr.pinned) {
-                       update_group_times(leader);
-                       leader->state = PERF_EVENT_STATE_ERROR;
-               }
+       if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
+               return;
        }
 
-unlock:
-       raw_spin_unlock(&ctx->lock);
+       task_ctx = cpuctx->task_ctx;
+       if (ctx->task)
+               WARN_ON_ONCE(task_ctx != ctx);
 
-       return 0;
-}
-
-void ___perf_event_enable(void *info)
-{
-       __perf_event_mark_enabled((struct perf_event *)info);
+       ctx_resched(cpuctx, task_ctx);
 }
 
 /*
@@ -2275,7 +2310,8 @@ static void _perf_event_enable(struct perf_event *event)
        struct perf_event_context *ctx = event->ctx;
 
        raw_spin_lock_irq(&ctx->lock);
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <  PERF_EVENT_STATE_ERROR) {
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
@@ -2291,8 +2327,7 @@ static void _perf_event_enable(struct perf_event *event)
                event->state = PERF_EVENT_STATE_OFF;
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_enable,
-                           ___perf_event_enable, event);
+       event_function_call(event, __perf_event_enable, NULL);
 }
 
 /*
@@ -2342,25 +2377,49 @@ static void ctx_sched_out(struct perf_event_context *ctx,
                          struct perf_cpu_context *cpuctx,
                          enum event_type_t event_type)
 {
-       struct perf_event *event;
        int is_active = ctx->is_active;
+       struct perf_event *event;
 
-       ctx->is_active &= ~event_type;
-       if (likely(!ctx->nr_events))
+       lockdep_assert_held(&ctx->lock);
+
+       if (likely(!ctx->nr_events)) {
+               /*
+                * See __perf_remove_from_context().
+                */
+               WARN_ON_ONCE(ctx->is_active);
+               if (ctx->task)
+                       WARN_ON_ONCE(cpuctx->task_ctx);
                return;
+       }
 
-       update_context_time(ctx);
-       update_cgrp_time_from_cpuctx(cpuctx);
-       if (!ctx->nr_active)
+       ctx->is_active &= ~event_type;
+       if (!(ctx->is_active & EVENT_ALL))
+               ctx->is_active = 0;
+
+       if (ctx->task) {
+               WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+               if (!ctx->is_active)
+                       cpuctx->task_ctx = NULL;
+       }
+
+       is_active ^= ctx->is_active; /* changed bits */
+
+       if (is_active & EVENT_TIME) {
+               /* update (and stop) ctx time */
+               update_context_time(ctx);
+               update_cgrp_time_from_cpuctx(cpuctx);
+       }
+
+       if (!ctx->nr_active || !(is_active & EVENT_ALL))
                return;
 
        perf_pmu_disable(ctx->pmu);
-       if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
+       if (is_active & EVENT_PINNED) {
                list_for_each_entry(event, &ctx->pinned_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
        }
 
-       if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
+       if (is_active & EVENT_FLEXIBLE) {
                list_for_each_entry(event, &ctx->flexible_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
        }
@@ -2518,17 +2577,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
                raw_spin_lock(&ctx->lock);
                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {
-                       /*
-                        * XXX do we need a memory barrier of sorts
-                        * wrt to rcu_dereference() of perf_event_ctxp
-                        */
-                       task->perf_event_ctxp[ctxn] = next_ctx;
-                       next->perf_event_ctxp[ctxn] = ctx;
-                       ctx->task = next;
-                       next_ctx->task = task;
+                       WRITE_ONCE(ctx->task, next);
+                       WRITE_ONCE(next_ctx->task, task);
 
                        swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 
+                       /*
+                        * RCU_INIT_POINTER here is safe because we've not
+                        * modified the ctx and the above modification of
+                        * ctx->task and ctx->task_ctx_data are immaterial
+                        * since those values are always verified under
+                        * ctx->lock which we're now holding.
+                        */
+                       RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
+                       RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
+
                        do_switch = 0;
 
                        perf_event_sync_stat(ctx, next_ctx);
@@ -2541,8 +2604,7 @@ unlock:
 
        if (do_switch) {
                raw_spin_lock(&ctx->lock);
-               ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-               cpuctx->task_ctx = NULL;
+               task_ctx_sched_out(cpuctx, ctx);
                raw_spin_unlock(&ctx->lock);
        }
 }
@@ -2637,20 +2699,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
                perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx)
-{
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
-       if (!cpuctx->task_ctx)
-               return;
-
-       if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
-               return;
-
-       ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-       cpuctx->task_ctx = NULL;
-}
-
 /*
  * Called with IRQs disabled
  */
@@ -2725,25 +2773,40 @@ ctx_sched_in(struct perf_event_context *ctx,
             enum event_type_t event_type,
             struct task_struct *task)
 {
-       u64 now;
        int is_active = ctx->is_active;
+       u64 now;
+
+       lockdep_assert_held(&ctx->lock);
 
-       ctx->is_active |= event_type;
        if (likely(!ctx->nr_events))
                return;
 
-       now = perf_clock();
-       ctx->timestamp = now;
-       perf_cgroup_set_timestamp(task, ctx);
+       ctx->is_active |= (event_type | EVENT_TIME);
+       if (ctx->task) {
+               if (!is_active)
+                       cpuctx->task_ctx = ctx;
+               else
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+       }
+
+       is_active ^= ctx->is_active; /* changed bits */
+
+       if (is_active & EVENT_TIME) {
+               /* start ctx time */
+               now = perf_clock();
+               ctx->timestamp = now;
+               perf_cgroup_set_timestamp(task, ctx);
+       }
+
        /*
         * First go through the list and put on any pinned groups
         * in order to give them the best chance of going on.
         */
-       if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
+       if (is_active & EVENT_PINNED)
                ctx_pinned_sched_in(ctx, cpuctx);
 
        /* Then walk through the lower prio flexible groups */
-       if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
+       if (is_active & EVENT_FLEXIBLE)
                ctx_flexible_sched_in(ctx, cpuctx);
 }
 
@@ -2773,12 +2836,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
         * cpu flexible, task flexible.
         */
        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-
-       if (ctx->nr_events)
-               cpuctx->task_ctx = ctx;
-
-       perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
-
+       perf_event_sched_in(cpuctx, ctx, task);
        perf_pmu_enable(ctx->pmu);
        perf_ctx_unlock(cpuctx, ctx);
 }
@@ -2800,6 +2858,16 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        struct perf_event_context *ctx;
        int ctxn;
 
+       /*
+        * If cgroup events exist on this CPU, then we need to check if we have
+        * to switch in PMU state; cgroup event are system-wide mode only.
+        *
+        * Since cgroup events are CPU events, we must schedule these in before
+        * we schedule in the task events.
+        */
+       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
+               perf_cgroup_sched_in(prev, task);
+
        for_each_task_context_nr(ctxn) {
                ctx = task->perf_event_ctxp[ctxn];
                if (likely(!ctx))
@@ -2807,13 +2875,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
                perf_event_context_sched_in(ctx, task);
        }
-       /*
-        * if cgroup events exist on this CPU, then we need
-        * to check if we have to switch in PMU state.
-        * cgroup event are system-wide mode only
-        */
-       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_sched_in(prev, task);
 
        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, prev, true);
@@ -3099,46 +3160,31 @@ static int event_enable_on_exec(struct perf_event *event,
 static void perf_event_enable_on_exec(int ctxn)
 {
        struct perf_event_context *ctx, *clone_ctx = NULL;
+       struct perf_cpu_context *cpuctx;
        struct perf_event *event;
        unsigned long flags;
        int enabled = 0;
-       int ret;
 
        local_irq_save(flags);
        ctx = current->perf_event_ctxp[ctxn];
        if (!ctx || !ctx->nr_events)
                goto out;
 
-       /*
-        * We must ctxsw out cgroup events to avoid conflict
-        * when invoking perf_task_event_sched_in() later on
-        * in this function. Otherwise we end up trying to
-        * ctxswin cgroup events which are already scheduled
-        * in.
-        */
-       perf_cgroup_sched_out(current, NULL);
-
-       raw_spin_lock(&ctx->lock);
-       task_ctx_sched_out(ctx);
-
-       list_for_each_entry(event, &ctx->event_list, event_entry) {
-               ret = event_enable_on_exec(event, ctx);
-               if (ret)
-                       enabled = 1;
-       }
-
-       /*
-        * Unclone this context if we enabled any event.
-        */
-       if (enabled)
-               clone_ctx = unclone_ctx(ctx);
-
-       raw_spin_unlock(&ctx->lock);
+       cpuctx = __get_cpu_context(ctx);
+       perf_ctx_lock(cpuctx, ctx);
+       ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               enabled |= event_enable_on_exec(event, ctx);
 
        /*
-        * Also calls ctxswin for cgroup events, if any:
+        * Unclone and reschedule this context if we enabled any event.
         */
-       perf_event_context_sched_in(ctx, ctx->task);
+       if (enabled) {
+               clone_ctx = unclone_ctx(ctx);
+               ctx_resched(cpuctx, ctx);
+       }
+       perf_ctx_unlock(cpuctx, ctx);
+
 out:
        local_irq_restore(flags);
 
@@ -3334,7 +3380,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
        INIT_LIST_HEAD(&ctx->flexible_groups);
        INIT_LIST_HEAD(&ctx->event_list);
        atomic_set(&ctx->refcount, 1);
-       INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3521,11 +3566,13 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 
 static void unaccount_event(struct perf_event *event)
 {
+       bool dec = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_dec(&nr_mmap_events);
        if (event->attr.comm)
@@ -3535,17 +3582,30 @@ static void unaccount_event(struct perf_event *event)
        if (event->attr.freq)
                atomic_dec(&nr_freq_events);
        if (event->attr.context_switch) {
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
                atomic_dec(&nr_switch_events);
        }
        if (is_cgroup_event(event))
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (has_branch_stack(event))
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
+
+       if (dec) {
+               if (!atomic_add_unless(&perf_sched_count, -1, 1))
+                       schedule_delayed_work(&perf_sched_work, HZ);
+       }
 
        unaccount_event_cpu(event, event->cpu);
 }
 
+static void perf_sched_delayed(struct work_struct *work)
+{
+       mutex_lock(&perf_sched_mutex);
+       if (atomic_dec_and_test(&perf_sched_count))
+               static_branch_disable(&perf_sched_events);
+       mutex_unlock(&perf_sched_mutex);
+}
+
 /*
  * The following implement mutual exclusion of events on "exclusive" pmus
  * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
@@ -3556,7 +3616,7 @@ static void unaccount_event(struct perf_event *event)
  *  3) two matching events on the same context.
  *
  * The former two cases are handled in the allocation path (perf_event_alloc(),
- * __free_event()), the latter -- before the first perf_install_in_context().
+ * _free_event()), the latter -- before the first perf_install_in_context().
  */
 static int exclusive_event_init(struct perf_event *event)
 {
@@ -3631,29 +3691,6 @@ static bool exclusive_event_installable(struct perf_event *event,
        return true;
 }
 
-static void __free_event(struct perf_event *event)
-{
-       if (!event->parent) {
-               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-                       put_callchain_buffers();
-       }
-
-       perf_event_free_bpf_prog(event);
-
-       if (event->destroy)
-               event->destroy(event);
-
-       if (event->ctx)
-               put_ctx(event->ctx);
-
-       if (event->pmu) {
-               exclusive_event_destroy(event);
-               module_put(event->pmu->module);
-       }
-
-       call_rcu(&event->rcu_head, free_event_rcu);
-}
-
 static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending);
@@ -3675,7 +3712,25 @@ static void _free_event(struct perf_event *event)
        if (is_cgroup_event(event))
                perf_detach_cgroup(event);
 
-       __free_event(event);
+       if (!event->parent) {
+               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+                       put_callchain_buffers();
+       }
+
+       perf_event_free_bpf_prog(event);
+
+       if (event->destroy)
+               event->destroy(event);
+
+       if (event->ctx)
+               put_ctx(event->ctx);
+
+       if (event->pmu) {
+               exclusive_event_destroy(event);
+               module_put(event->pmu->module);
+       }
+
+       call_rcu(&event->rcu_head, free_event_rcu);
 }
 
 /*
@@ -3702,14 +3757,13 @@ static void perf_remove_from_owner(struct perf_event *event)
        struct task_struct *owner;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(event->owner);
        /*
-        * Matches the smp_wmb() in perf_event_exit_task(). If we observe
-        * !owner it means the list deletion is complete and we can indeed
-        * free this event, otherwise we need to serialize on
+        * Matches the smp_store_release() in perf_event_exit_task(). If we
+        * observe !owner it means the list deletion is complete and we can
+        * indeed free this event, otherwise we need to serialize on
         * owner->perf_event_mutex.
         */
-       smp_read_barrier_depends();
+       owner = lockless_dereference(event->owner);
        if (owner) {
                /*
                 * Since delayed_put_task_struct() also drops the last
@@ -3737,8 +3791,10 @@ static void perf_remove_from_owner(struct perf_event *event)
                 * ensured they're done, and we can proceed with freeing the
                 * event.
                 */
-               if (event->owner)
+               if (event->owner) {
                        list_del_init(&event->owner_entry);
+                       smp_store_release(&event->owner, NULL);
+               }
                mutex_unlock(&owner->perf_event_mutex);
                put_task_struct(owner);
        }
@@ -3746,37 +3802,111 @@ static void perf_remove_from_owner(struct perf_event *event)
 
 static void put_event(struct perf_event *event)
 {
-       struct perf_event_context *ctx;
-
        if (!atomic_long_dec_and_test(&event->refcount))
                return;
 
+       _free_event(event);
+}
+
+/*
+ * Kill an event dead; while event:refcount will preserve the event
+ * object, it will not preserve its functionality. Once the last 'user'
+ * gives up the object, we'll destroy the thing.
+ */
+int perf_event_release_kernel(struct perf_event *event)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct perf_event *child, *tmp;
+
+       /*
+        * If we got here through err_file: fput(event_file); we will not have
+        * attached to a context yet.
+        */
+       if (!ctx) {
+               WARN_ON_ONCE(event->attach_state &
+                               (PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
+               goto no_ctx;
+       }
+
        if (!is_kernel_event(event))
                perf_remove_from_owner(event);
 
+       ctx = perf_event_ctx_lock(event);
+       WARN_ON_ONCE(ctx->parent_ctx);
+       perf_remove_from_context(event, DETACH_GROUP);
+
+       raw_spin_lock_irq(&ctx->lock);
        /*
-        * There are two ways this annotation is useful:
+        * Mark this even as STATE_DEAD, there is no external reference to it
+        * anymore.
         *
-        *  1) there is a lock recursion from perf_event_exit_task
-        *     see the comment there.
+        * Anybody acquiring event->child_mutex after the below loop _must_
+        * also see this, most importantly inherit_event() which will avoid
+        * placing more children on the list.
         *
-        *  2) there is a lock-inversion with mmap_sem through
-        *     perf_read_group(), which takes faults while
-        *     holding ctx->mutex, however this is called after
-        *     the last filedesc died, so there is no possibility
-        *     to trigger the AB-BA case.
+        * Thus this guarantees that we will in fact observe and kill _ALL_
+        * child events.
         */
-       ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
-       WARN_ON_ONCE(ctx->parent_ctx);
-       perf_remove_from_context(event, true);
+       event->state = PERF_EVENT_STATE_DEAD;
+       raw_spin_unlock_irq(&ctx->lock);
+
        perf_event_ctx_unlock(event, ctx);
 
-       _free_event(event);
-}
+again:
+       mutex_lock(&event->child_mutex);
+       list_for_each_entry(child, &event->child_list, child_list) {
 
-int perf_event_release_kernel(struct perf_event *event)
-{
-       put_event(event);
+               /*
+                * Cannot change, child events are not migrated, see the
+                * comment with perf_event_ctx_lock_nested().
+                */
+               ctx = lockless_dereference(child->ctx);
+               /*
+                * Since child_mutex nests inside ctx::mutex, we must jump
+                * through hoops. We start by grabbing a reference on the ctx.
+                *
+                * Since the event cannot get freed while we hold the
+                * child_mutex, the context must also exist and have a !0
+                * reference count.
+                */
+               get_ctx(ctx);
+
+               /*
+                * Now that we have a ctx ref, we can drop child_mutex, and
+                * acquire ctx::mutex without fear of it going away. Then we
+                * can re-acquire child_mutex.
+                */
+               mutex_unlock(&event->child_mutex);
+               mutex_lock(&ctx->mutex);
+               mutex_lock(&event->child_mutex);
+
+               /*
+                * Now that we hold ctx::mutex and child_mutex, revalidate our
+                * state, if child is still the first entry, it didn't get freed
+                * and we can continue doing so.
+                */
+               tmp = list_first_entry_or_null(&event->child_list,
+                                              struct perf_event, child_list);
+               if (tmp == child) {
+                       perf_remove_from_context(child, DETACH_GROUP);
+                       list_del(&child->child_list);
+                       free_event(child);
+                       /*
+                        * This matches the refcount bump in inherit_event();
+                        * this can't be the last reference.
+                        */
+                       put_event(event);
+               }
+
+               mutex_unlock(&event->child_mutex);
+               mutex_unlock(&ctx->mutex);
+               put_ctx(ctx);
+               goto again;
+       }
+       mutex_unlock(&event->child_mutex);
+
+no_ctx:
+       put_event(event); /* Must be the 'last' reference */
        return 0;
 }
 EXPORT_SYMBOL_GPL(perf_event_release_kernel);
@@ -3786,46 +3916,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
  */
 static int perf_release(struct inode *inode, struct file *file)
 {
-       put_event(file->private_data);
+       perf_event_release_kernel(file->private_data);
        return 0;
 }
 
-/*
- * Remove all orphanes events from the context.
- */
-static void orphans_remove_work(struct work_struct *work)
-{
-       struct perf_event_context *ctx;
-       struct perf_event *event, *tmp;
-
-       ctx = container_of(work, struct perf_event_context,
-                          orphans_remove.work);
-
-       mutex_lock(&ctx->mutex);
-       list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
-               struct perf_event *parent_event = event->parent;
-
-               if (!is_orphaned_child(event))
-                       continue;
-
-               perf_remove_from_context(event, true);
-
-               mutex_lock(&parent_event->child_mutex);
-               list_del_init(&event->child_list);
-               mutex_unlock(&parent_event->child_mutex);
-
-               free_event(event);
-               put_event(parent_event);
-       }
-
-       raw_spin_lock_irq(&ctx->lock);
-       ctx->orphans_remove_sched = false;
-       raw_spin_unlock_irq(&ctx->lock);
-       mutex_unlock(&ctx->mutex);
-
-       put_ctx(ctx);
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
@@ -3969,7 +4063,7 @@ static bool is_event_hup(struct perf_event *event)
 {
        bool no_children;
 
-       if (event->state != PERF_EVENT_STATE_EXIT)
+       if (event->state > PERF_EVENT_STATE_EXIT)
                return false;
 
        mutex_lock(&event->child_mutex);
@@ -4054,7 +4148,7 @@ static void _perf_event_reset(struct perf_event *event)
 /*
  * Holding the top-level event's child_mutex means that any
  * descendant process that has inherited this event will block
- * in sync_child_event if it goes to exit, thus satisfying the
+ * in perf_event_exit_event() if it goes to exit, thus satisfying the
  * task existence requirements of perf_event_enable/disable.
  */
 static void perf_event_for_each_child(struct perf_event *event,
@@ -4086,36 +4180,14 @@ static void perf_event_for_each(struct perf_event *event,
                perf_event_for_each_child(sibling, func);
 }
 
-struct period_event {
-       struct perf_event *event;
-       u64 value;
-};
-
-static void ___perf_event_period(void *info)
-{
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       u64 value = pe->value;
-
-       if (event->attr.freq) {
-               event->attr.sample_freq = value;
-       } else {
-               event->attr.sample_period = value;
-               event->hw.sample_period = value;
-       }
-
-       local64_set(&event->hw.period_left, 0);
-}
-
-static int __perf_event_period(void *info)
+static void __perf_event_period(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       struct perf_event_context *ctx = event->ctx;
-       u64 value = pe->value;
+       u64 value = *((u64 *)info);
        bool active;
 
-       raw_spin_lock(&ctx->lock);
        if (event->attr.freq) {
                event->attr.sample_freq = value;
        } else {
@@ -4135,14 +4207,10 @@ static int __perf_event_period(void *info)
                event->pmu->start(event, PERF_EF_RELOAD);
                perf_pmu_enable(ctx->pmu);
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 static int perf_event_period(struct perf_event *event, u64 __user *arg)
 {
-       struct period_event pe = { .event = event, };
        u64 value;
 
        if (!is_sampling_event(event))
@@ -4157,10 +4225,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        if (event->attr.freq && value > sysctl_perf_event_sample_rate)
                return -EINVAL;
 
-       pe.value = value;
-
-       event_function_call(event, __perf_event_period,
-                           ___perf_event_period, &pe);
+       event_function_call(event, __perf_event_period, &value);
 
        return 0;
 }
@@ -4872,9 +4937,9 @@ static int perf_fasync(int fd, struct file *filp, int on)
        struct perf_event *event = filp->private_data;
        int retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        retval = fasync_helper(fd, filp, on, &event->fasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (retval < 0)
                return retval;
@@ -4932,7 +4997,7 @@ static void perf_pending_event(struct irq_work *entry)
 
        if (event->pending_disable) {
                event->pending_disable = 0;
-               __perf_event_disable(event);
+               perf_event_disable_local(event);
        }
 
        if (event->pending_wakeup) {
@@ -7753,11 +7818,13 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 
 static void account_event(struct perf_event *event)
 {
+       bool inc = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_inc(&nr_mmap_events);
        if (event->attr.comm)
@@ -7770,12 +7837,35 @@ static void account_event(struct perf_event *event)
        }
        if (event->attr.context_switch) {
                atomic_inc(&nr_switch_events);
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        }
        if (has_branch_stack(event))
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (is_cgroup_event(event))
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
+
+       if (inc) {
+               if (atomic_inc_not_zero(&perf_sched_count))
+                       goto enabled;
+
+               mutex_lock(&perf_sched_mutex);
+               if (!atomic_read(&perf_sched_count)) {
+                       static_branch_enable(&perf_sched_events);
+                       /*
+                        * Guarantee that all CPUs observe they key change and
+                        * call the perf scheduling hooks before proceeding to
+                        * install events that need them.
+                        */
+                       synchronize_sched();
+               }
+               /*
+                * Now that we have waited for the sync_sched(), allow further
+                * increments to by-pass the mutex.
+                */
+               atomic_inc(&perf_sched_count);
+               mutex_unlock(&perf_sched_mutex);
+       }
+enabled:
 
        account_event_cpu(event, event->cpu);
 }
@@ -8394,10 +8484,19 @@ SYSCALL_DEFINE5(perf_event_open,
        if (move_group) {
                gctx = group_leader->ctx;
                mutex_lock_double(&gctx->mutex, &ctx->mutex);
+               if (gctx->task == TASK_TOMBSTONE) {
+                       err = -ESRCH;
+                       goto err_locked;
+               }
        } else {
                mutex_lock(&ctx->mutex);
        }
 
+       if (ctx->task == TASK_TOMBSTONE) {
+               err = -ESRCH;
+               goto err_locked;
+       }
+
        if (!perf_event_validate_size(event)) {
                err = -E2BIG;
                goto err_locked;
@@ -8422,11 +8521,11 @@ SYSCALL_DEFINE5(perf_event_open,
                 * See perf_event_ctx_lock() for comments on the details
                 * of swizzling perf_event::ctx.
                 */
-               perf_remove_from_context(group_leader, false);
+               perf_remove_from_context(group_leader, 0);
 
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
-                       perf_remove_from_context(sibling, false);
+                       perf_remove_from_context(sibling, 0);
                        put_ctx(gctx);
                }
 
@@ -8479,6 +8578,8 @@ SYSCALL_DEFINE5(perf_event_open,
        perf_event__header_size(event);
        perf_event__id_header_size(event);
 
+       event->owner = current;
+
        perf_install_in_context(ctx, event, event->cpu);
        perf_unpin_context(ctx);
 
@@ -8488,8 +8589,6 @@ SYSCALL_DEFINE5(perf_event_open,
 
        put_online_cpus();
 
-       event->owner = current;
-
        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
@@ -8514,7 +8613,12 @@ err_context:
        perf_unpin_context(ctx);
        put_ctx(ctx);
 err_alloc:
-       free_event(event);
+       /*
+        * If event_file is set, the fput() above will have called ->release()
+        * and that will take care of freeing the event.
+        */
+       if (!event_file)
+               free_event(event);
 err_cpus:
        put_online_cpus();
 err_task:
@@ -8556,7 +8660,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        }
 
        /* Mark owner so we could distinguish it from user events. */
-       event->owner = EVENT_OWNER_KERNEL;
+       event->owner = TASK_TOMBSTONE;
 
        account_event(event);
 
@@ -8568,12 +8672,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);
+       if (ctx->task == TASK_TOMBSTONE) {
+               err = -ESRCH;
+               goto err_unlock;
+       }
+
        if (!exclusive_event_installable(event, ctx)) {
-               mutex_unlock(&ctx->mutex);
-               perf_unpin_context(ctx);
-               put_ctx(ctx);
                err = -EBUSY;
-               goto err_free;
+               goto err_unlock;
        }
 
        perf_install_in_context(ctx, event, cpu);
@@ -8582,6 +8688,10 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
        return event;
 
+err_unlock:
+       mutex_unlock(&ctx->mutex);
+       perf_unpin_context(ctx);
+       put_ctx(ctx);
 err_free:
        free_event(event);
 err:
@@ -8606,7 +8716,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
        mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
        list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
                                 event_entry) {
-               perf_remove_from_context(event, false);
+               perf_remove_from_context(event, 0);
                unaccount_event_cpu(event, src_cpu);
                put_ctx(src_ctx);
                list_add(&event->migrate_entry, &events);
@@ -8673,33 +8783,15 @@ static void sync_child_event(struct perf_event *child_event,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,
                     &parent_event->child_total_time_running);
-
-       /*
-        * Remove this event from the parent's list
-        */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
-       list_del_init(&child_event->child_list);
-       mutex_unlock(&parent_event->child_mutex);
-
-       /*
-        * Make sure user/parent get notified, that we just
-        * lost one event.
-        */
-       perf_event_wakeup(parent_event);
-
-       /*
-        * Release the parent event, if this was the last
-        * reference to it.
-        */
-       put_event(parent_event);
 }
 
 static void
-__perf_event_exit_task(struct perf_event *child_event,
-                        struct perf_event_context *child_ctx,
-                        struct task_struct *child)
+perf_event_exit_event(struct perf_event *child_event,
+                     struct perf_event_context *child_ctx,
+                     struct task_struct *child)
 {
+       struct perf_event *parent_event = child_event->parent;
+
        /*
         * Do not destroy the 'original' grouping; because of the context
         * switch optimization the original events could've ended up in a
@@ -8712,57 +8804,86 @@ __perf_event_exit_task(struct perf_event *child_event,
         * Do destroy all inherited groups, we don't care about those
         * and being thorough is better.
         */
-       perf_remove_from_context(child_event, !!child_event->parent);
+       raw_spin_lock_irq(&child_ctx->lock);
+       WARN_ON_ONCE(child_ctx->is_active);
+
+       if (parent_event)
+               perf_group_detach(child_event);
+       list_del_event(child_event, child_ctx);
+       child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        /*
-        * It can happen that the parent exits first, and has events
-        * that are still around due to the child reference. These
-        * events need to be zapped.
+        * Parent events are governed by their filedesc, retain them.
         */
-       if (child_event->parent) {
-               sync_child_event(child_event, child);
-               free_event(child_event);
-       } else {
-               child_event->state = PERF_EVENT_STATE_EXIT;
+       if (!parent_event) {
                perf_event_wakeup(child_event);
+               return;
        }
+       /*
+        * Child events can be cleaned up.
+        */
+
+       sync_child_event(child_event, child);
+
+       /*
+        * Remove this event from the parent's list
+        */
+       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+       mutex_lock(&parent_event->child_mutex);
+       list_del_init(&child_event->child_list);
+       mutex_unlock(&parent_event->child_mutex);
+
+       /*
+        * Kick perf_poll() for is_event_hup().
+        */
+       perf_event_wakeup(parent_event);
+       free_event(child_event);
+       put_event(parent_event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-       struct perf_event *child_event, *next;
        struct perf_event_context *child_ctx, *clone_ctx = NULL;
-       unsigned long flags;
+       struct perf_event *child_event, *next;
 
-       if (likely(!child->perf_event_ctxp[ctxn]))
+       WARN_ON_ONCE(child != current);
+
+       child_ctx = perf_pin_task_context(child, ctxn);
+       if (!child_ctx)
                return;
 
-       local_irq_save(flags);
        /*
-        * We can't reschedule here because interrupts are disabled,
-        * and either child is current or it is a task that can't be
-        * scheduled, so we are now safe from rescheduling changing
-        * our context.
+        * In order to reduce the amount of tricky in ctx tear-down, we hold
+        * ctx::mutex over the entire thing. This serializes against almost
+        * everything that wants to access the ctx.
+        *
+        * The exception is sys_perf_event_open() /
+        * perf_event_create_kernel_count() which does find_get_context()
+        * without ctx::mutex (it cannot because of the move_group double mutex
+        * lock thing). See the comments in perf_install_in_context().
         */
-       child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
+       mutex_lock(&child_ctx->mutex);
 
        /*
-        * Take the context lock here so that if find_get_context is
-        * reading child->perf_event_ctxp, we wait until it has
-        * incremented the context's refcount before we do put_ctx below.
+        * In a single ctx::lock section, de-schedule the events and detach the
+        * context from the task such that we cannot ever get it scheduled back
+        * in.
         */
-       raw_spin_lock(&child_ctx->lock);
-       task_ctx_sched_out(child_ctx);
-       child->perf_event_ctxp[ctxn] = NULL;
+       raw_spin_lock_irq(&child_ctx->lock);
+       task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 
        /*
-        * If this context is a clone; unclone it so it can't get
-        * swapped to another process while we're removing all
-        * the events from it.
+        * Now that the context is inactive, destroy the task <-> ctx relation
+        * and mark the context dead.
         */
+       RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
+       put_ctx(child_ctx); /* cannot be last */
+       WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+       put_task_struct(current); /* cannot be last */
+
        clone_ctx = unclone_ctx(child_ctx);
-       update_context_time(child_ctx);
-       raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        if (clone_ctx)
                put_ctx(clone_ctx);
@@ -8774,20 +8895,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        perf_event_task(child, child_ctx, 0);
 
-       /*
-        * We can recurse on the same lock type through:
-        *
-        *   __perf_event_exit_task()
-        *     sync_child_event()
-        *       put_event()
-        *         mutex_lock(&ctx->mutex)
-        *
-        * But since its the parent context it won't be the same instance.
-        */
-       mutex_lock(&child_ctx->mutex);
-
        list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-               __perf_event_exit_task(child_event, child_ctx, child);
+               perf_event_exit_event(child_event, child_ctx, child);
 
        mutex_unlock(&child_ctx->mutex);
 
@@ -8812,8 +8921,7 @@ void perf_event_exit_task(struct task_struct *child)
                 * the owner, closes a race against perf_release() where
                 * we need to serialize on the owner->perf_event_mutex.
                 */
-               smp_wmb();
-               event->owner = NULL;
+               smp_store_release(&event->owner, NULL);
        }
        mutex_unlock(&child->perf_event_mutex);
 
@@ -8896,21 +9004,20 @@ void perf_event_delayed_put(struct task_struct *task)
                WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-       int err;
-       struct fd f;
-       struct perf_event *event;
+       struct file *file;
 
-       err = perf_fget_light(fd, &f);
-       if (err)
-               return ERR_PTR(err);
+       file = fget_raw(fd);
+       if (!file)
+               return ERR_PTR(-EBADF);
 
-       event = f.file->private_data;
-       atomic_long_inc(&event->refcount);
-       fdput(f);
+       if (file->f_op != &perf_fops) {
+               fput(file);
+               return ERR_PTR(-EBADF);
+       }
 
-       return event;
+       return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
@@ -8953,8 +9060,16 @@ inherit_event(struct perf_event *parent_event,
        if (IS_ERR(child_event))
                return child_event;
 
+       /*
+        * is_orphaned_event() and list_add_tail(&parent_event->child_list)
+        * must be under the same lock in order to serialize against
+        * perf_event_release_kernel(), such that either we must observe
+        * is_orphaned_event() or they will observe us on the child_list.
+        */
+       mutex_lock(&parent_event->child_mutex);
        if (is_orphaned_event(parent_event) ||
            !atomic_long_inc_not_zero(&parent_event->refcount)) {
+               mutex_unlock(&parent_event->child_mutex);
                free_event(child_event);
                return NULL;
        }
@@ -9002,8 +9117,6 @@ inherit_event(struct perf_event *parent_event,
        /*
         * Link this into the parent event's child list
         */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
        list_add_tail(&child_event->child_list, &parent_event->child_list);
        mutex_unlock(&parent_event->child_mutex);
 
@@ -9208,7 +9321,7 @@ static void perf_event_init_cpu(int cpu)
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
        mutex_lock(&swhash->hlist_mutex);
-       if (swhash->hlist_refcount > 0) {
+       if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) {
                struct swevent_hlist *hlist;
 
                hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -9221,13 +9334,14 @@ static void perf_event_init_cpu(int cpu)
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
 static void __perf_event_exit_context(void *__info)
 {
-       struct remove_event re = { .detach_group = true };
        struct perf_event_context *ctx = __info;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event *event;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
-               __perf_remove_from_context(&re);
-       rcu_read_unlock();
+       raw_spin_lock(&ctx->lock);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
+       raw_spin_unlock(&ctx->lock);
 }
 
 static void perf_event_exit_cpu_context(int cpu)
@@ -9283,11 +9397,9 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
        switch (action & ~CPU_TASKS_FROZEN) {
 
        case CPU_UP_PREPARE:
-       case CPU_DOWN_FAILED:
                perf_event_init_cpu(cpu);
                break;
 
-       case CPU_UP_CANCELED:
        case CPU_DOWN_PREPARE:
                perf_event_exit_cpu(cpu);
                break;
@@ -9316,9 +9428,6 @@ void __init perf_event_init(void)
        ret = init_hw_breakpoint();
        WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 
-       /* do not patch jump label more than once per second */
-       jump_label_rate_limit(&perf_sched_events, HZ);
-
        /*
         * Build time assertion that we keep the data_head at the intended
         * location.  IOW, validation we got the __reserved[] size right.
index 92ce5f4ccc264e01a5551965d173e39e41392143..3f8cb1e14588fe3258a1c3aa49874e008f5a266a 100644 (file)
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
         * current task.
         */
        if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
-               __perf_event_disable(bp);
+               perf_event_disable_local(bp);
        else
                perf_event_disable(bp);
 
index adfdc0536117c1f10bf8f0dd8014798011e1f855..1faad2cfdb9e4572c91431e4b11941266e23a231 100644 (file)
@@ -459,6 +459,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
        __free_page(page);
 }
 
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+       int pg;
+
+       if (rb->aux_priv) {
+               rb->free_aux(rb->aux_priv);
+               rb->free_aux = NULL;
+               rb->aux_priv = NULL;
+       }
+
+       if (rb->aux_nr_pages) {
+               for (pg = 0; pg < rb->aux_nr_pages; pg++)
+                       rb_free_aux_page(rb, pg);
+
+               kfree(rb->aux_pages);
+               rb->aux_nr_pages = 0;
+       }
+}
+
 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
                 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
@@ -547,30 +566,11 @@ out:
        if (!ret)
                rb->aux_pgoff = pgoff;
        else
-               rb_free_aux(rb);
+               __rb_free_aux(rb);
 
        return ret;
 }
 
-static void __rb_free_aux(struct ring_buffer *rb)
-{
-       int pg;
-
-       if (rb->aux_priv) {
-               rb->free_aux(rb->aux_priv);
-               rb->free_aux = NULL;
-               rb->aux_priv = NULL;
-       }
-
-       if (rb->aux_nr_pages) {
-               for (pg = 0; pg < rb->aux_nr_pages; pg++)
-                       rb_free_aux_page(rb, pg);
-
-               kfree(rb->aux_pages);
-               rb->aux_nr_pages = 0;
-       }
-}
-
 void rb_free_aux(struct ring_buffer *rb)
 {
        if (atomic_dec_and_test(&rb->aux_refcount))
index 0773f2b23b107dcc3be9e5caabdbaa0a25de01d0..5d6ce6413ef1d227b32c99a4330bee1289f9f571 100644 (file)
@@ -1191,7 +1191,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        if (pi_state->owner != current)
                return -EINVAL;
 
-       raw_spin_lock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
        /*
@@ -1217,22 +1217,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        else if (curval != uval)
                ret = -EINVAL;
        if (ret) {
-               raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
                return ret;
        }
 
-       raw_spin_lock_irq(&pi_state->owner->pi_lock);
+       raw_spin_lock(&pi_state->owner->pi_lock);
        WARN_ON(list_empty(&pi_state->list));
        list_del_init(&pi_state->list);
-       raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+       raw_spin_unlock(&pi_state->owner->pi_lock);
 
-       raw_spin_lock_irq(&new_owner->pi_lock);
+       raw_spin_lock(&new_owner->pi_lock);
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &new_owner->pi_state_list);
        pi_state->owner = new_owner;
-       raw_spin_unlock_irq(&new_owner->pi_lock);
+       raw_spin_unlock(&new_owner->pi_lock);
 
-       raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 
        deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
 
@@ -2127,11 +2127,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
                 * we returned due to timeout or signal without taking the
                 * rt_mutex. Too late.
                 */
-               raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
                owner = rt_mutex_owner(&q->pi_state->pi_mutex);
                if (!owner)
                        owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
-               raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
                ret = fixup_pi_state_owner(uaddr, q, owner);
                goto out;
        }
index a302cf9a2126c8a911ff4da1a944c88c6d3b3f8a..57bff7857e879bf2301a92723ade0a968870e637 100644 (file)
@@ -138,7 +138,8 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
        unsigned int flags = 0, irq = desc->irq_data.irq;
        struct irqaction *action = desc->action;
 
-       do {
+       /* action might have become NULL since we dropped the lock */
+       while (action) {
                irqreturn_t res;
 
                trace_irq_handler_entry(irq, action);
@@ -173,7 +174,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
                retval |= res;
                action = action->next;
-       } while (action);
+       }
 
        add_interrupt_randomness(irq, flags);
 
index 6e655f7acd3bf79b68f62fc4fbdf6806cfd11884..3e56d2f03e24e6ee147e837727e6aac7a479f9aa 100644 (file)
@@ -575,10 +575,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
        unsigned int type = IRQ_TYPE_NONE;
        int virq;
 
-       if (fwspec->fwnode)
-               domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
-       else
+       if (fwspec->fwnode) {
+               domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                 DOMAIN_BUS_WIRED);
+               if (!domain)
+                       domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                         DOMAIN_BUS_ANY);
+       } else {
                domain = irq_default_domain;
+       }
 
        if (!domain) {
                pr_warn("no irq domain found for %s !\n",
index 60ace56618f6c222de90e569bf1702b6dba05e26..716547fdb8731dcc395d70f2664ec6fb22378db6 100644 (file)
@@ -292,7 +292,7 @@ LIST_HEAD(all_lock_classes);
 #define __classhashfn(key)     hash_long((unsigned long)key, CLASSHASH_BITS)
 #define classhashentry(key)    (classhash_table + __classhashfn((key)))
 
-static struct list_head classhash_table[CLASSHASH_SIZE];
+static struct hlist_head classhash_table[CLASSHASH_SIZE];
 
 /*
  * We put the lock dependency chains into a hash-table as well, to cache
@@ -303,7 +303,7 @@ static struct list_head classhash_table[CLASSHASH_SIZE];
 #define __chainhashfn(chain)   hash_long(chain, CHAINHASH_BITS)
 #define chainhashentry(chain)  (chainhash_table + __chainhashfn((chain)))
 
-static struct list_head chainhash_table[CHAINHASH_SIZE];
+static struct hlist_head chainhash_table[CHAINHASH_SIZE];
 
 /*
  * The hash key of the lock dependency chains is a hash itself too:
@@ -666,7 +666,7 @@ static inline struct lock_class *
 look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
 {
        struct lockdep_subclass_key *key;
-       struct list_head *hash_head;
+       struct hlist_head *hash_head;
        struct lock_class *class;
 
 #ifdef CONFIG_DEBUG_LOCKDEP
@@ -719,7 +719,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return NULL;
 
-       list_for_each_entry_rcu(class, hash_head, hash_entry) {
+       hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
                if (class->key == key) {
                        /*
                         * Huh! same key, different name? Did someone trample
@@ -742,7 +742,7 @@ static inline struct lock_class *
 register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 {
        struct lockdep_subclass_key *key;
-       struct list_head *hash_head;
+       struct hlist_head *hash_head;
        struct lock_class *class;
 
        DEBUG_LOCKS_WARN_ON(!irqs_disabled());
@@ -774,7 +774,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
         * We have to do the hash-walk again, to avoid races
         * with another CPU:
         */
-       list_for_each_entry_rcu(class, hash_head, hash_entry) {
+       hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
                if (class->key == key)
                        goto out_unlock_set;
        }
@@ -805,7 +805,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
         * We use RCU's safe list-add method to make
         * parallel walking of the hash-list safe:
         */
-       list_add_tail_rcu(&class->hash_entry, hash_head);
+       hlist_add_head_rcu(&class->hash_entry, hash_head);
        /*
         * Add it to the global list of classes:
         */
@@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-              struct held_lock *next, int distance, int trylock_loop)
+              struct held_lock *next, int distance, int *stack_saved)
 {
        struct lock_list *entry;
        int ret;
@@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
                }
        }
 
-       if (!trylock_loop && !save_trace(&trace))
-               return 0;
+       if (!*stack_saved) {
+               if (!save_trace(&trace))
+                       return 0;
+               *stack_saved = 1;
+       }
 
        /*
         * Ok, all validations passed, add the new lock
@@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
         * Debugging printouts:
         */
        if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
+               /* We drop graph lock, so another thread can overwrite trace. */
+               *stack_saved = 0;
                graph_unlock();
                printk("\n new dependency: ");
                print_lock_name(hlock_class(prev));
@@ -1929,7 +1934,7 @@ static int
 check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
        int depth = curr->lockdep_depth;
-       int trylock_loop = 0;
+       int stack_saved = 0;
        struct held_lock *hlock;
 
        /*
@@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
                 */
                if (hlock->read != 2 && hlock->check) {
                        if (!check_prev_add(curr, hlock, next,
-                                               distance, trylock_loop))
+                                               distance, &stack_saved))
                                return 0;
                        /*
                         * Stop after the first non-trylock entry,
@@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
                if (curr->held_locks[depth].irq_context !=
                                curr->held_locks[depth-1].irq_context)
                        break;
-               trylock_loop = 1;
        }
        return 1;
 out_bug:
@@ -2017,7 +2021,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
                                     u64 chain_key)
 {
        struct lock_class *class = hlock_class(hlock);
-       struct list_head *hash_head = chainhashentry(chain_key);
+       struct hlist_head *hash_head = chainhashentry(chain_key);
        struct lock_chain *chain;
        struct held_lock *hlock_curr;
        int i, j;
@@ -2033,7 +2037,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
         * We can walk it lock-free, because entries only get added
         * to the hash:
         */
-       list_for_each_entry_rcu(chain, hash_head, entry) {
+       hlist_for_each_entry_rcu(chain, hash_head, entry) {
                if (chain->chain_key == chain_key) {
 cache_hit:
                        debug_atomic_inc(chain_lookup_hits);
@@ -2057,7 +2061,7 @@ cache_hit:
        /*
         * We have to walk the chain again locked - to avoid duplicates:
         */
-       list_for_each_entry(chain, hash_head, entry) {
+       hlist_for_each_entry(chain, hash_head, entry) {
                if (chain->chain_key == chain_key) {
                        graph_unlock();
                        goto cache_hit;
@@ -2091,7 +2095,7 @@ cache_hit:
                }
                chain_hlocks[chain->base + j] = class - lock_classes;
        }
-       list_add_tail_rcu(&chain->entry, hash_head);
+       hlist_add_head_rcu(&chain->entry, hash_head);
        debug_atomic_inc(chain_lookup_misses);
        inc_chains();
 
@@ -3875,7 +3879,7 @@ void lockdep_reset(void)
        nr_process_chains = 0;
        debug_locks = 1;
        for (i = 0; i < CHAINHASH_SIZE; i++)
-               INIT_LIST_HEAD(chainhash_table + i);
+               INIT_HLIST_HEAD(chainhash_table + i);
        raw_local_irq_restore(flags);
 }
 
@@ -3894,7 +3898,7 @@ static void zap_class(struct lock_class *class)
        /*
         * Unhash the class and remove it from the all_lock_classes list:
         */
-       list_del_rcu(&class->hash_entry);
+       hlist_del_rcu(&class->hash_entry);
        list_del_rcu(&class->lock_entry);
 
        RCU_INIT_POINTER(class->key, NULL);
@@ -3917,7 +3921,7 @@ static inline int within(const void *addr, void *start, unsigned long size)
 void lockdep_free_key_range(void *start, unsigned long size)
 {
        struct lock_class *class;
-       struct list_head *head;
+       struct hlist_head *head;
        unsigned long flags;
        int i;
        int locked;
@@ -3930,9 +3934,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
         */
        for (i = 0; i < CLASSHASH_SIZE; i++) {
                head = classhash_table + i;
-               if (list_empty(head))
-                       continue;
-               list_for_each_entry_rcu(class, head, hash_entry) {
+               hlist_for_each_entry_rcu(class, head, hash_entry) {
                        if (within(class->key, start, size))
                                zap_class(class);
                        else if (within(class->name, start, size))
@@ -3962,7 +3964,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
 void lockdep_reset_lock(struct lockdep_map *lock)
 {
        struct lock_class *class;
-       struct list_head *head;
+       struct hlist_head *head;
        unsigned long flags;
        int i, j;
        int locked;
@@ -3987,9 +3989,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
        locked = graph_lock();
        for (i = 0; i < CLASSHASH_SIZE; i++) {
                head = classhash_table + i;
-               if (list_empty(head))
-                       continue;
-               list_for_each_entry_rcu(class, head, hash_entry) {
+               hlist_for_each_entry_rcu(class, head, hash_entry) {
                        int match = 0;
 
                        for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
@@ -4027,10 +4027,10 @@ void lockdep_init(void)
                return;
 
        for (i = 0; i < CLASSHASH_SIZE; i++)
-               INIT_LIST_HEAD(classhash_table + i);
+               INIT_HLIST_HEAD(classhash_table + i);
 
        for (i = 0; i < CHAINHASH_SIZE; i++)
-               INIT_LIST_HEAD(chainhash_table + i);
+               INIT_HLIST_HEAD(chainhash_table + i);
 
        lockdep_initialized = 1;
 }
index 8251e75dd9c0bd67337754baf6f03fb2cf1f956f..3e746607abe5230bb9c650957582669c085d9a90 100644 (file)
@@ -99,13 +99,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  * 2) Drop lock->wait_lock
  * 3) Try to unlock the lock with cmpxchg
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        struct task_struct *owner = rt_mutex_owner(lock);
 
        clear_rt_mutex_waiters(lock);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        /*
         * If a new waiter comes in between the unlock and the cmpxchg
         * we have two situations:
@@ -147,11 +148,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 /*
  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        lock->owner = NULL;
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        return true;
 }
 #endif
@@ -433,7 +435,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        int ret = 0, depth = 0;
        struct rt_mutex *lock;
        bool detect_deadlock;
-       unsigned long flags;
        bool requeue = true;
 
        detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        /*
         * [1] Task cannot go away as we did a get_task() before !
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock_irq(&task->pi_lock);
 
        /*
         * [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
         * operations.
         */
        if (!raw_spin_trylock(&lock->wait_lock)) {
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock_irq(&task->pi_lock);
                cpu_relax();
                goto retry;
        }
@@ -591,7 +592,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                /*
                 * No requeue[7] here. Just release @task [8]
                 */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock(&task->pi_lock);
                put_task_struct(task);
 
                /*
@@ -599,14 +600,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 * If there is no owner of the lock, end of chain.
                 */
                if (!rt_mutex_owner(lock)) {
-                       raw_spin_unlock(&lock->wait_lock);
+                       raw_spin_unlock_irq(&lock->wait_lock);
                        return 0;
                }
 
                /* [10] Grab the next task, i.e. owner of @lock */
                task = rt_mutex_owner(lock);
                get_task_struct(task);
-               raw_spin_lock_irqsave(&task->pi_lock, flags);
+               raw_spin_lock(&task->pi_lock);
 
                /*
                 * No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                top_waiter = rt_mutex_top_waiter(lock);
 
                /* [13] Drop locks */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock(&task->pi_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                /* If owner is not blocked, end of chain. */
                if (!next_lock)
@@ -643,7 +644,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        rt_mutex_enqueue(lock, waiter);
 
        /* [8] Release the task */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
        put_task_struct(task);
 
        /*
@@ -661,14 +662,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 */
                if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
                        wake_up_process(rt_mutex_top_waiter(lock)->task);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 0;
        }
 
        /* [10] Grab the next task, i.e. the owner of @lock */
        task = rt_mutex_owner(lock);
        get_task_struct(task);
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
 
        /* [11] requeue the pi waiters if necessary */
        if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        top_waiter = rt_mutex_top_waiter(lock);
 
        /* [13] Drop the locks */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock(&task->pi_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        /*
         * Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        goto again;
 
  out_unlock_pi:
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock_irq(&task->pi_lock);
  out_put_task:
        put_task_struct(task);
 
@@ -756,7 +757,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 /*
  * Try to take an rt-mutex
  *
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
  *
  * @lock:   The lock to be acquired.
  * @task:   The task which wants to acquire the lock
@@ -766,8 +767,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
                                struct rt_mutex_waiter *waiter)
 {
-       unsigned long flags;
-
        /*
         * Before testing whether we can acquire @lock, we set the
         * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         * case, but conditionals are more expensive than a redundant
         * store.
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        task->pi_blocked_on = NULL;
        /*
         * Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         */
        if (rt_mutex_has_waiters(lock))
                rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
 takeit:
        /* We got the lock. */
@@ -883,7 +882,7 @@ takeit:
  *
  * Prepare waiter and propagate pi chain
  *
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
                                   struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        struct rt_mutex_waiter *top_waiter = waiter;
        struct rt_mutex *next_lock;
        int chain_walk = 0, res;
-       unsigned long flags;
 
        /*
         * Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        if (owner == task)
                return -EDEADLK;
 
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        __rt_mutex_adjust_prio(task);
        waiter->task = task;
        waiter->lock = lock;
@@ -921,12 +919,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 
        task->pi_blocked_on = waiter;
 
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
        if (!owner)
                return 0;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
        if (waiter == rt_mutex_top_waiter(lock)) {
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
        /*
         * Even if full deadlock detection is on, if the owner is not
         * blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
         */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
                                         next_lock, waiter, task);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        return res;
 }
@@ -971,15 +969,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  * Remove the top waiter from the current tasks pi waiter tree and
  * queue it up.
  *
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
  */
 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
                                    struct rt_mutex *lock)
 {
        struct rt_mutex_waiter *waiter;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
 
        waiter = rt_mutex_top_waiter(lock);
 
@@ -1001,7 +998,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
         */
        lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        wake_q_add(wake_q, waiter->task);
 }
@@ -1009,7 +1006,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 /*
  * Remove a waiter from a lock and give up
  *
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
  * have just failed to try_to_take_rt_mutex().
  */
 static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@ static void remove_waiter(struct rt_mutex *lock,
        bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
        struct task_struct *owner = rt_mutex_owner(lock);
        struct rt_mutex *next_lock;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
        rt_mutex_dequeue(lock, waiter);
        current->pi_blocked_on = NULL;
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        /*
         * Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@ static void remove_waiter(struct rt_mutex *lock,
        if (!owner || !is_top_waiter)
                return;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
 
        rt_mutex_dequeue_pi(owner, waiter);
 
@@ -1044,7 +1040,7 @@ static void remove_waiter(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
 
        /*
         * Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@ static void remove_waiter(struct rt_mutex *lock,
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
                                   next_lock, NULL, current);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 }
 
 /*
@@ -1097,11 +1093,11 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  * @lock:               the rt_mutex to take
  * @state:              the state the task should block in (TASK_INTERRUPTIBLE
- *                      or TASK_UNINTERRUPTIBLE)
+ *                      or TASK_UNINTERRUPTIBLE)
  * @timeout:            the pre-initialized and started timer, or NULL for none
  * @waiter:             the pre-initialized rt_mutex_waiter
  *
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
  */
 static int __sched
 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
                                break;
                }
 
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                debug_rt_mutex_print_deadlock(waiter);
 
                schedule();
 
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irq(&lock->wait_lock);
                set_current_state(state);
        }
 
@@ -1172,17 +1168,26 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
                  enum rtmutex_chainwalk chwalk)
 {
        struct rt_mutex_waiter waiter;
+       unsigned long flags;
        int ret = 0;
 
        debug_rt_mutex_init_waiter(&waiter);
        RB_CLEAR_NODE(&waiter.pi_tree_entry);
        RB_CLEAR_NODE(&waiter.tree_entry);
 
-       raw_spin_lock(&lock->wait_lock);
+       /*
+        * Technically we could use raw_spin_[un]lock_irq() here, but this can
+        * be called in early boot if the cmpxchg() fast path is disabled
+        * (debug, no architecture support). In this case we will acquire the
+        * rtmutex with lock->wait_lock held. But we cannot unconditionally
+        * enable interrupts in that early boot case. So we need to use the
+        * irqsave/restore variants.
+        */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        /* Try to acquire the lock again: */
        if (try_to_take_rt_mutex(lock, current, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
                return 0;
        }
 
@@ -1211,7 +1216,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* Remove pending timer: */
        if (unlikely(timeout))
@@ -1227,6 +1232,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  */
 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
+       unsigned long flags;
        int ret;
 
        /*
@@ -1238,10 +1244,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
                return 0;
 
        /*
-        * The mutex has currently no owner. Lock the wait lock and
-        * try to acquire the lock.
+        * The mutex has currently no owner. Lock the wait lock and try to
+        * acquire the lock. We use irqsave here to support early boot calls.
         */
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        ret = try_to_take_rt_mutex(lock, current, NULL);
 
@@ -1251,7 +1257,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        return ret;
 }
@@ -1263,7 +1269,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
                                        struct wake_q_head *wake_q)
 {
-       raw_spin_lock(&lock->wait_lock);
+       unsigned long flags;
+
+       /* irqsave required to support early boot calls */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        debug_rt_mutex_unlock(lock);
 
@@ -1302,10 +1311,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        while (!rt_mutex_has_waiters(lock)) {
                /* Drops lock->wait_lock ! */
-               if (unlock_rt_mutex_safe(lock) == true)
+               if (unlock_rt_mutex_safe(lock, flags) == true)
                        return false;
                /* Relock the rtmutex and try again */
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irqsave(&lock->wait_lock, flags);
        }
 
        /*
@@ -1316,7 +1325,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        mark_wakeup_next_waiter(wake_q, lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* check PI boosting */
        return true;
@@ -1596,10 +1605,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        if (try_to_take_rt_mutex(lock, task, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 1;
        }
 
@@ -1620,7 +1629,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
        if (unlikely(ret))
                remove_waiter(lock, waiter);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        debug_rt_mutex_print_deadlock(waiter);
 
@@ -1668,7 +1677,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1684,7 +1693,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        return ret;
 }
index e517a16cb426cc30b0251a43d22c5d2cb01bead9..b981a7b023f04c356df5b852f60caeae232fdf45 100644 (file)
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(memunmap);
 
 static void devm_memremap_release(struct device *dev, void *res)
 {
-       memunmap(res);
+       memunmap(*(void **)res);
 }
 
 static int devm_memremap_match(struct device *dev, void *res, void *match_data)
@@ -136,8 +136,10 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
        if (addr) {
                *ptr = addr;
                devres_add(dev, ptr);
-       } else
+       } else {
                devres_free(ptr);
+               return ERR_PTR(-ENXIO);
+       }
 
        return addr;
 }
@@ -150,7 +152,7 @@ void devm_memunmap(struct device *dev, void *addr)
 }
 EXPORT_SYMBOL(devm_memunmap);
 
-pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
 {
        return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
 }
@@ -183,7 +185,11 @@ EXPORT_SYMBOL(put_zone_device_page);
 
 static void pgmap_radix_release(struct resource *res)
 {
-       resource_size_t key;
+       resource_size_t key, align_start, align_size, align_end;
+
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
+       align_end = align_start + align_size - 1;
 
        mutex_lock(&pgmap_lock);
        for (key = res->start; key <= res->end; key += SECTION_SIZE)
@@ -226,12 +232,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
                percpu_ref_put(pgmap->ref);
        }
 
-       pgmap_radix_release(res);
-
        /* pages are dead and unused, undo the arch mapping */
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(resource_size(res), SECTION_SIZE);
        arch_remove_memory(align_start, align_size);
+       pgmap_radix_release(res);
        dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
                        "%s: failed to free all reserved pages\n", __func__);
 }
@@ -267,7 +272,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 {
        int is_ram = region_intersects(res->start, resource_size(res),
                        "System RAM");
-       resource_size_t key, align_start, align_size;
+       resource_size_t key, align_start, align_size, align_end;
        struct dev_pagemap *pgmap;
        struct page_map *page_map;
        unsigned long pfn;
@@ -309,7 +314,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 
        mutex_lock(&pgmap_lock);
        error = 0;
-       for (key = res->start; key <= res->end; key += SECTION_SIZE) {
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
+       align_end = align_start + align_size - 1;
+       for (key = align_start; key <= align_end; key += SECTION_SIZE) {
                struct dev_pagemap *dup;
 
                rcu_read_lock();
@@ -336,8 +344,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        if (nid < 0)
                nid = numa_mem_id();
 
-       align_start = res->start & ~(SECTION_SIZE - 1);
-       align_size = ALIGN(resource_size(res), SECTION_SIZE);
        error = arch_add_memory(nid, align_start, align_size, true);
        if (error)
                goto err_add_memory;
index 8358f4697c0c3aea77aba802833660d4082b7c13..794ebe8e878d55cf9a0b94a1ae7a994ac4dec80b 100644 (file)
@@ -303,6 +303,9 @@ struct load_info {
        struct _ddebug *debug;
        unsigned int num_debug;
        bool sig_ok;
+#ifdef CONFIG_KALLSYMS
+       unsigned long mod_kallsyms_init_off;
+#endif
        struct {
                unsigned int sym, str, mod, vers, info, pcpu;
        } index;
@@ -981,6 +984,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
                mod->exit();
        blocking_notifier_call_chain(&module_notify_list,
                                     MODULE_STATE_GOING, mod);
+       ftrace_release_mod(mod);
+
        async_synchronize_full();
 
        /* Store the name of the last unloaded module for diagnostic purposes */
@@ -2480,10 +2485,21 @@ static void layout_symtab(struct module *mod, struct load_info *info)
        strsect->sh_flags |= SHF_ALLOC;
        strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
                                         info->index.str) | INIT_OFFSET_MASK;
-       mod->init_layout.size = debug_align(mod->init_layout.size);
        pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
+
+       /* We'll tack temporary mod_kallsyms on the end. */
+       mod->init_layout.size = ALIGN(mod->init_layout.size,
+                                     __alignof__(struct mod_kallsyms));
+       info->mod_kallsyms_init_off = mod->init_layout.size;
+       mod->init_layout.size += sizeof(struct mod_kallsyms);
+       mod->init_layout.size = debug_align(mod->init_layout.size);
 }
 
+/*
+ * We use the full symtab and strtab which layout_symtab arranged to
+ * be appended to the init section.  Later we switch to the cut-down
+ * core-only ones.
+ */
 static void add_kallsyms(struct module *mod, const struct load_info *info)
 {
        unsigned int i, ndst;
@@ -2492,29 +2508,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
        char *s;
        Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
 
-       mod->symtab = (void *)symsec->sh_addr;
-       mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+       /* Set up to point into init section. */
+       mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
+
+       mod->kallsyms->symtab = (void *)symsec->sh_addr;
+       mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
        /* Make sure we get permanent strtab: don't use info->strtab. */
-       mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
+       mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
 
        /* Set types up while we still have access to sections. */
-       for (i = 0; i < mod->num_symtab; i++)
-               mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
-
-       mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
-       mod->core_strtab = s = mod->core_layout.base + info->stroffs;
-       src = mod->symtab;
-       for (ndst = i = 0; i < mod->num_symtab; i++) {
+       for (i = 0; i < mod->kallsyms->num_symtab; i++)
+               mod->kallsyms->symtab[i].st_info
+                       = elf_type(&mod->kallsyms->symtab[i], info);
+
+       /* Now populate the cut down core kallsyms for after init. */
+       mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
+       mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
+       src = mod->kallsyms->symtab;
+       for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
                if (i == 0 ||
                    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
                                   info->index.pcpu)) {
                        dst[ndst] = src[i];
-                       dst[ndst++].st_name = s - mod->core_strtab;
-                       s += strlcpy(s, &mod->strtab[src[i].st_name],
+                       dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
+                       s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
                                     KSYM_NAME_LEN) + 1;
                }
        }
-       mod->core_num_syms = ndst;
+       mod->core_kallsyms.num_symtab = ndst;
 }
 #else
 static inline void layout_symtab(struct module *mod, struct load_info *info)
@@ -3263,9 +3284,8 @@ static noinline int do_init_module(struct module *mod)
        module_put(mod);
        trim_init_extable(mod);
 #ifdef CONFIG_KALLSYMS
-       mod->num_symtab = mod->core_num_syms;
-       mod->symtab = mod->core_symtab;
-       mod->strtab = mod->core_strtab;
+       /* Switch to core kallsyms now init is done: kallsyms may be walking! */
+       rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
 #endif
        mod_tree_remove_init(mod);
        disable_ro_nx(&mod->init_layout);
@@ -3295,6 +3315,7 @@ fail:
        module_put(mod);
        blocking_notifier_call_chain(&module_notify_list,
                                     MODULE_STATE_GOING, mod);
+       ftrace_release_mod(mod);
        free_module(mod);
        wake_up_all(&module_wq);
        return ret;
@@ -3371,6 +3392,7 @@ static int complete_formation(struct module *mod, struct load_info *info)
        mod->state = MODULE_STATE_COMING;
        mutex_unlock(&module_mutex);
 
+       ftrace_module_enable(mod);
        blocking_notifier_call_chain(&module_notify_list,
                                     MODULE_STATE_COMING, mod);
        return 0;
@@ -3496,7 +3518,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 
        /* Module is ready to execute: parsing args may do that. */
        after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-                                 -32768, 32767, NULL,
+                                 -32768, 32767, mod,
                                  unknown_module_param_cb);
        if (IS_ERR(after_dashes)) {
                err = PTR_ERR(after_dashes);
@@ -3627,6 +3649,11 @@ static inline int is_arm_mapping_symbol(const char *str)
               && (str[2] == '\0' || str[2] == '.');
 }
 
+static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
+{
+       return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
+}
+
 static const char *get_ksymbol(struct module *mod,
                               unsigned long addr,
                               unsigned long *size,
@@ -3634,6 +3661,7 @@ static const char *get_ksymbol(struct module *mod,
 {
        unsigned int i, best = 0;
        unsigned long nextval;
+       struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
        /* At worse, next value is at end of module */
        if (within_module_init(addr, mod))
@@ -3643,32 +3671,32 @@ static const char *get_ksymbol(struct module *mod,
 
        /* Scan for closest preceding symbol, and next symbol. (ELF
           starts real symbols at 1). */
-       for (i = 1; i < mod->num_symtab; i++) {
-               if (mod->symtab[i].st_shndx == SHN_UNDEF)
+       for (i = 1; i < kallsyms->num_symtab; i++) {
+               if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
                        continue;
 
                /* We ignore unnamed symbols: they're uninformative
                 * and inserted at a whim. */
-               if (mod->symtab[i].st_value <= addr
-                   && mod->symtab[i].st_value > mod->symtab[best].st_value
-                   && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-                   && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
+               if (*symname(kallsyms, i) == '\0'
+                   || is_arm_mapping_symbol(symname(kallsyms, i)))
+                       continue;
+
+               if (kallsyms->symtab[i].st_value <= addr
+                   && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
                        best = i;
-               if (mod->symtab[i].st_value > addr
-                   && mod->symtab[i].st_value < nextval
-                   && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-                   && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
-                       nextval = mod->symtab[i].st_value;
+               if (kallsyms->symtab[i].st_value > addr
+                   && kallsyms->symtab[i].st_value < nextval)
+                       nextval = kallsyms->symtab[i].st_value;
        }
 
        if (!best)
                return NULL;
 
        if (size)
-               *size = nextval - mod->symtab[best].st_value;
+               *size = nextval - kallsyms->symtab[best].st_value;
        if (offset)
-               *offset = addr - mod->symtab[best].st_value;
-       return mod->strtab + mod->symtab[best].st_name;
+               *offset = addr - kallsyms->symtab[best].st_value;
+       return symname(kallsyms, best);
 }
 
 /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
@@ -3758,19 +3786,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 
        preempt_disable();
        list_for_each_entry_rcu(mod, &modules, list) {
+               struct mod_kallsyms *kallsyms;
+
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
-               if (symnum < mod->num_symtab) {
-                       *value = mod->symtab[symnum].st_value;
-                       *type = mod->symtab[symnum].st_info;
-                       strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
-                               KSYM_NAME_LEN);
+               kallsyms = rcu_dereference_sched(mod->kallsyms);
+               if (symnum < kallsyms->num_symtab) {
+                       *value = kallsyms->symtab[symnum].st_value;
+                       *type = kallsyms->symtab[symnum].st_info;
+                       strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
                        strlcpy(module_name, mod->name, MODULE_NAME_LEN);
                        *exported = is_exported(name, *value, mod);
                        preempt_enable();
                        return 0;
                }
-               symnum -= mod->num_symtab;
+               symnum -= kallsyms->num_symtab;
        }
        preempt_enable();
        return -ERANGE;
@@ -3779,11 +3809,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 static unsigned long mod_find_symname(struct module *mod, const char *name)
 {
        unsigned int i;
+       struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
-       for (i = 0; i < mod->num_symtab; i++)
-               if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
-                   mod->symtab[i].st_info != 'U')
-                       return mod->symtab[i].st_value;
+       for (i = 0; i < kallsyms->num_symtab; i++)
+               if (strcmp(name, symname(kallsyms, i)) == 0 &&
+                   kallsyms->symtab[i].st_info != 'U')
+                       return kallsyms->symtab[i].st_value;
        return 0;
 }
 
@@ -3822,11 +3853,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
        module_assert_mutex();
 
        list_for_each_entry(mod, &modules, list) {
+               /* We hold module_mutex: no need for rcu_dereference_sched */
+               struct mod_kallsyms *kallsyms = mod->kallsyms;
+
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
-               for (i = 0; i < mod->num_symtab; i++) {
-                       ret = fn(data, mod->strtab + mod->symtab[i].st_name,
-                                mod, mod->symtab[i].st_value);
+               for (i = 0; i < kallsyms->num_symtab; i++) {
+                       ret = fn(data, symname(kallsyms, i),
+                                mod, kallsyms->symtab[i].st_value);
                        if (ret != 0)
                                return ret;
                }
index f4ad91b746f1e4afd96b3615a4bad844a0875528..4d73a834c7e6590e29eb0d1a71645caf6c274016 100644 (file)
@@ -588,7 +588,7 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-       /* Veryify no one has done anything silly */
+       /* Verify no one has done anything silly: */
        BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
 
        /* bump default and minimum pid_max based on number of cpus */
index 02e8dfaa1ce2d2085bd85e165ad6f25b9697a11a..68d3ebc12601b6bdea4f9d2c6cfca997a0b6a0dc 100644 (file)
@@ -235,7 +235,7 @@ config PM_TRACE_RTC
 
 config APM_EMULATION
        tristate "Advanced Power Management Emulation"
-       depends on PM && SYS_SUPPORTS_APM_EMULATION
+       depends on SYS_SUPPORTS_APM_EMULATION
        help
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
index 0b4570cfacaeb2f5290d8f4b98d256943d22333c..074994bcfa9be14336ecb06912a969f6d9546c9d 100644 (file)
@@ -1133,7 +1133,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
        if (!desc->count)
                return 0;
 
-       mutex_lock(&file_inode(filp)->i_mutex);
+       inode_lock(file_inode(filp));
        do {
                if (!relay_file_read_avail(buf, *ppos))
                        break;
@@ -1153,7 +1153,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
                        *ppos = relay_file_read_end_pos(buf, read_start, ret);
                }
        } while (desc->count && ret);
-       mutex_unlock(&file_inode(filp)->i_mutex);
+       inode_unlock(file_inode(filp));
 
        return desc->written;
 }
index 09c0597840b02dc260c65baafdd1fde0715bcbb4..3669d1bfc4254213e0e42dacab374624d74cdd5c 100644 (file)
@@ -1083,9 +1083,10 @@ struct resource * __request_region(struct resource *parent,
                if (!conflict)
                        break;
                if (conflict != parent) {
-                       parent = conflict;
-                       if (!(conflict->flags & IORESOURCE_BUSY))
+                       if (!(conflict->flags & IORESOURCE_BUSY)) {
+                               parent = conflict;
                                continue;
+                       }
                }
                if (conflict->flags & flags & IORESOURCE_MUXED) {
                        add_wait_queue(&muxed_resource_wait, &wait);
index 44253adb3c36dc28fb84cf9fdda4609f59dc388c..9503d590e5ef5b81537947b9925ecfe689f72a91 100644 (file)
@@ -222,9 +222,9 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
 
        /* Ensure the static_key remains in a consistent state */
        inode = file_inode(filp);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        i = sched_feat_set(cmp);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (i == __SCHED_FEAT_NR)
                return -EINVAL;
 
@@ -6840,7 +6840,7 @@ static void sched_init_numa(void)
 
                        sched_domains_numa_masks[i][j] = mask;
 
-                       for (k = 0; k < nr_node_ids; k++) {
+                       for_each_node(k) {
                                if (node_distance(j, k) > sched_domains_numa_distance[i])
                                        continue;
 
index cd64c979d0e1857aceb8f9f4383c507ad5fa6809..57b939c81bce5d06fa587df8915f05affbe22b82 100644 (file)
@@ -420,7 +420,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
         * entity.
         */
        if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
-               printk_deferred_once("sched: DL replenish lagged to much\n");
+               printk_deferred_once("sched: DL replenish lagged too much\n");
                dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
                dl_se->runtime = pi_se->dl_runtime;
        }
index 1926606ece807361ab02ef51f4864543640e8d8d..56b7d4b839476b6ed1692e786abe9ed6cda64a5f 100644 (file)
@@ -1220,8 +1220,6 @@ static void task_numa_assign(struct task_numa_env *env,
 {
        if (env->best_task)
                put_task_struct(env->best_task);
-       if (p)
-               get_task_struct(p);
 
        env->best_task = p;
        env->best_imp = imp;
@@ -1289,20 +1287,30 @@ static void task_numa_compare(struct task_numa_env *env,
        long imp = env->p->numa_group ? groupimp : taskimp;
        long moveimp = imp;
        int dist = env->dist;
+       bool assigned = false;
 
        rcu_read_lock();
 
        raw_spin_lock_irq(&dst_rq->lock);
        cur = dst_rq->curr;
        /*
-        * No need to move the exiting task, and this ensures that ->curr
-        * wasn't reaped and thus get_task_struct() in task_numa_assign()
-        * is safe under RCU read lock.
-        * Note that rcu_read_lock() itself can't protect from the final
-        * put_task_struct() after the last schedule().
+        * No need to move the exiting task or idle task.
         */
        if ((cur->flags & PF_EXITING) || is_idle_task(cur))
                cur = NULL;
+       else {
+               /*
+                * The task_struct must be protected here to protect the
+                * p->numa_faults access in the task_weight since the
+                * numa_faults could already be freed in the following path:
+                * finish_task_switch()
+                *     --> put_task_struct()
+                *         --> __put_task_struct()
+                *             --> task_numa_free()
+                */
+               get_task_struct(cur);
+       }
+
        raw_spin_unlock_irq(&dst_rq->lock);
 
        /*
@@ -1386,6 +1394,7 @@ balance:
                 */
                if (!load_too_imbalanced(src_load, dst_load, env)) {
                        imp = moveimp - 1;
+                       put_task_struct(cur);
                        cur = NULL;
                        goto assign;
                }
@@ -1411,9 +1420,16 @@ balance:
                env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
 
 assign:
+       assigned = true;
        task_numa_assign(env, cur, imp);
 unlock:
        rcu_read_unlock();
+       /*
+        * The dst_rq->curr isn't assigned. The protection for task_struct is
+        * finished.
+        */
+       if (cur && !assigned)
+               put_task_struct(cur);
 }
 
 static void task_numa_find_cpu(struct task_numa_env *env,
index de0e786b26671b633ccaa56d9ebba6ce7b9d77cf..544a7133cbd1df2a6ee4369231148fc0309eae11 100644 (file)
@@ -162,7 +162,7 @@ static void cpuidle_idle_call(void)
         */
        if (idle_should_freeze()) {
                entered_state = cpuidle_enter_freeze(drv, dev);
-               if (entered_state >= 0) {
+               if (entered_state > 0) {
                        local_irq_enable();
                        goto exit_idle;
                }
index 580ac2d4024ffbdb29960ccdd86424fa730b1e78..15a1795bbba17d1140e3220c41d48dd26ac43c95 100644 (file)
@@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void)
                put_seccomp_filter(thread);
                smp_store_release(&thread->seccomp.filter,
                                  caller->seccomp.filter);
+
+               /*
+                * Don't let an unprivileged task work around
+                * the no_new_privs restriction by creating
+                * a thread that sets it up, enters seccomp,
+                * then dies.
+                */
+               if (task_no_new_privs(caller))
+                       task_set_no_new_privs(thread);
+
                /*
                 * Opt the other thread into seccomp if needed.
                 * As threads are considered to be trust-realm
                 * equivalent (see ptrace_may_access), it is safe to
                 * allow one thread to transition the other.
                 */
-               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-                       /*
-                        * Don't let an unprivileged task work around
-                        * the no_new_privs restriction by creating
-                        * a thread that sets it up, enters seccomp,
-                        * then dies.
-                        */
-                       if (task_no_new_privs(caller))
-                               task_set_no_new_privs(thread);
-
+               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
                        seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-               }
        }
 }
 
index f3f1f7a972fd40f3d437d6bf3b5db2cacaffe6ca..0508544c8ced0d96913905dc53af68f38b6ee618 100644 (file)
@@ -3508,8 +3508,10 @@ static int sigsuspend(sigset_t *set)
        current->saved_sigmask = current->blocked;
        set_current_blocked(set);
 
-       __set_current_state(TASK_INTERRUPTIBLE);
-       schedule();
+       while (!signal_pending(current)) {
+               __set_current_state(TASK_INTERRUPTIBLE);
+               schedule();
+       }
        set_restore_sigmask();
        return -ERESTARTNOHAND;
 }
index 91420362e0b339fcdfe930066dab863604357b13..97715fd9e790ade5d7cd7731107de2dafb8272e3 100644 (file)
@@ -1757,6 +1757,20 @@ static struct ctl_table fs_table[] = {
                .proc_handler   = &pipe_proc_fn,
                .extra1         = &pipe_min_size,
        },
+       {
+               .procname       = "pipe-user-pages-hard",
+               .data           = &pipe_user_pages_hard,
+               .maxlen         = sizeof(pipe_user_pages_hard),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
+       {
+               .procname       = "pipe-user-pages-soft",
+               .data           = &pipe_user_pages_soft,
+               .maxlen         = sizeof(pipe_user_pages_soft),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
        { }
 };
 
index 435b8850dd80a300c11ad128bc0cf51e3c23e15a..fa909f9fd5591801968523032e00f8fe816fb23f 100644 (file)
@@ -897,10 +897,10 @@ static int enqueue_hrtimer(struct hrtimer *timer,
  */
 static void __remove_hrtimer(struct hrtimer *timer,
                             struct hrtimer_clock_base *base,
-                            unsigned long newstate, int reprogram)
+                            u8 newstate, int reprogram)
 {
        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-       unsigned int state = timer->state;
+       u8 state = timer->state;
 
        timer->state = newstate;
        if (!(state & HRTIMER_STATE_ENQUEUED))
@@ -930,7 +930,7 @@ static inline int
 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
 {
        if (hrtimer_is_queued(timer)) {
-               unsigned long state = timer->state;
+               u8 state = timer->state;
                int reprogram;
 
                /*
@@ -954,6 +954,22 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
        return 0;
 }
 
+static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
+                                           const enum hrtimer_mode mode)
+{
+#ifdef CONFIG_TIME_LOW_RES
+       /*
+        * CONFIG_TIME_LOW_RES indicates that the system has no way to return
+        * granular time values. For relative timers we add hrtimer_resolution
+        * (i.e. one jiffie) to prevent short timeouts.
+        */
+       timer->is_rel = mode & HRTIMER_MODE_REL;
+       if (timer->is_rel)
+               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
+#endif
+       return tim;
+}
+
 /**
  * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
  * @timer:     the timer to be added
@@ -974,19 +990,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        /* Remove an active timer from the queue: */
        remove_hrtimer(timer, base, true);
 
-       if (mode & HRTIMER_MODE_REL) {
+       if (mode & HRTIMER_MODE_REL)
                tim = ktime_add_safe(tim, base->get_time());
-               /*
-                * CONFIG_TIME_LOW_RES is a temporary way for architectures
-                * to signal that they simply return xtime in
-                * do_gettimeoffset(). In this case we want to round up by
-                * resolution when starting a relative timer, to avoid short
-                * timeouts. This will go away with the GTOD framework.
-                */
-#ifdef CONFIG_TIME_LOW_RES
-               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
-#endif
-       }
+
+       tim = hrtimer_update_lowres(timer, tim, mode);
 
        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 
@@ -1074,19 +1081,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
 /**
  * hrtimer_get_remaining - get remaining time for the timer
  * @timer:     the timer to read
+ * @adjust:    adjust relative timers when CONFIG_TIME_LOW_RES=y
  */
-ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
 {
        unsigned long flags;
        ktime_t rem;
 
        lock_hrtimer_base(timer, &flags);
-       rem = hrtimer_expires_remaining(timer);
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
+               rem = hrtimer_expires_remaining_adjusted(timer);
+       else
+               rem = hrtimer_expires_remaining(timer);
        unlock_hrtimer_base(timer, &flags);
 
        return rem;
 }
-EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_HZ_COMMON
 /**
@@ -1219,6 +1230,14 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
        timer_stats_account_hrtimer(timer);
        fn = timer->function;
 
+       /*
+        * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
+        * timer is restarted with a period then it becomes an absolute
+        * timer. If its not restarted it does not matter.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES))
+               timer->is_rel = false;
+
        /*
         * Because we run timers from hardirq context, there is no chance
         * they get migrated to another cpu, therefore its safe to unlock
index 8d262b4675738d21bea752d65a47117c3d8ae514..1d5c7204ddc97bce881431b399e58741961c6391 100644 (file)
@@ -26,7 +26,7 @@
  */
 static struct timeval itimer_get_remtime(struct hrtimer *timer)
 {
-       ktime_t rem = hrtimer_get_remaining(timer);
+       ktime_t rem = __hrtimer_get_remaining(timer, true);
 
        /*
         * Racy but safe: if the itimer expires after the above
index 36f2ca09aa5e458bf1f31e3ec018e180dcc5b345..6df8927c58a5bbedcab9efceac7c21985bec0bf0 100644 (file)
@@ -685,8 +685,18 @@ int ntp_validate_timex(struct timex *txc)
                if (!capable(CAP_SYS_TIME))
                        return -EPERM;
 
-               if (!timeval_inject_offset_valid(&txc->time))
-                       return -EINVAL;
+               if (txc->modes & ADJ_NANO) {
+                       struct timespec ts;
+
+                       ts.tv_sec = txc->time.tv_sec;
+                       ts.tv_nsec = txc->time.tv_usec;
+                       if (!timespec_inject_offset_valid(&ts))
+                               return -EINVAL;
+
+               } else {
+                       if (!timeval_inject_offset_valid(&txc->time))
+                               return -EINVAL;
+               }
        }
 
        /*
index 31d11ac9fa4739789728c44470b82115ec307d11..f2826c35e9185f60586f06dff53f51ca3324c659 100644 (file)
@@ -760,7 +760,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
            (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
                timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
-       remaining = ktime_sub(hrtimer_get_expires(timer), now);
+       remaining = __hrtimer_expires_remaining_adjusted(timer, now);
        /* Return 0 only, when the timer is expired and not pending */
        if (remaining.tv64 <= 0) {
                /*
index 9d7a053545f5aca7a324f3530ee52ca9dac413f8..0b17424349eb4dafd76a2cf588748988ce51a295 100644 (file)
  */
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
-/*
- * The time, when the last jiffy update happened. Protected by jiffies_lock.
- */
-static ktime_t last_jiffies_update;
-
 struct tick_sched *tick_get_tick_sched(int cpu)
 {
        return &per_cpu(tick_cpu_sched, cpu);
 }
 
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+/*
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
+ */
+static ktime_t last_jiffies_update;
+
 /*
  * Must be called with interrupts disabled !
  */
@@ -151,6 +152,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);
 }
+#endif
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
@@ -993,9 +995,9 @@ static void tick_nohz_switch_to_nohz(void)
        /* Get the next period */
        next = tick_init_jiffy_update();
 
-       hrtimer_forward_now(&ts->sched_timer, tick_period);
        hrtimer_set_expires(&ts->sched_timer, next);
-       tick_program_event(next, 1);
+       hrtimer_forward_now(&ts->sched_timer, tick_period);
+       tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
        tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
 }
 
index f75e35b6014900da71ffa438a507c34f19e36e3a..ba7d8b288bb37250ce9e9c0e5f562b45832c4ca9 100644 (file)
@@ -69,7 +69,7 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
        print_name_offset(m, taddr);
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->function);
-       SEQ_printf(m, ", S:%02lx", timer->state);
+       SEQ_printf(m, ", S:%02x", timer->state);
 #ifdef CONFIG_TIMER_STATS
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->start_site);
index 45dd798bcd37e7bd529e61932e51a30366590626..326a75e884dbf3f46513538861a028c7be459f09 100644 (file)
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct perf_event *event;
+       struct file *file;
 
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (!event)
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        /* make sure event is local and doesn't have pmu::count */
        if (event->oncpu != smp_processor_id() ||
            event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        void *data = (void *) (long) r4;
        struct perf_sample_data sample_data;
        struct perf_event *event;
+       struct file *file;
        struct perf_raw_record raw = {
                .size = size,
                .data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (unlikely(!event))
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
                     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
                return -EINVAL;
index eca592f977b260689dc48911f867375b20ad5c8b..57a6eea8469408e7aba78669107eb3a1740362e4 100644 (file)
@@ -4961,7 +4961,7 @@ void ftrace_release_mod(struct module *mod)
        mutex_unlock(&ftrace_lock);
 }
 
-static void ftrace_module_enable(struct module *mod)
+void ftrace_module_enable(struct module *mod)
 {
        struct dyn_ftrace *rec;
        struct ftrace_page *pg;
@@ -5038,38 +5038,8 @@ void ftrace_module_init(struct module *mod)
        ftrace_process_locs(mod, mod->ftrace_callsites,
                            mod->ftrace_callsites + mod->num_ftrace_callsites);
 }
-
-static int ftrace_module_notify(struct notifier_block *self,
-                               unsigned long val, void *data)
-{
-       struct module *mod = data;
-
-       switch (val) {
-       case MODULE_STATE_COMING:
-               ftrace_module_enable(mod);
-               break;
-       case MODULE_STATE_GOING:
-               ftrace_release_mod(mod);
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-#else
-static int ftrace_module_notify(struct notifier_block *self,
-                               unsigned long val, void *data)
-{
-       return 0;
-}
 #endif /* CONFIG_MODULES */
 
-struct notifier_block ftrace_module_nb = {
-       .notifier_call = ftrace_module_notify,
-       .priority = INT_MIN,    /* Run after anything that can remove kprobes */
-};
-
 void __init ftrace_init(void)
 {
        extern unsigned long __start_mcount_loc[];
@@ -5098,10 +5068,6 @@ void __init ftrace_init(void)
                                  __start_mcount_loc,
                                  __stop_mcount_loc);
 
-       ret = register_module_notifier(&ftrace_module_nb);
-       if (ret)
-               pr_warning("Failed to register trace ftrace module exit notifier\n");
-
        set_ftrace_early_filters();
 
        return;
index 87fb9801bd9eead0d6e45bef49bc1eefc324c6a7..d9293402ee685ae59007fe1ade71866e121a0882 100644 (file)
@@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 {
        __buffer_unlock_commit(buffer, event);
 
-       ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+       ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
        ftrace_trace_userstack(buffer, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
index f333e57c4614a2a6aaac04061003f73cd3a6d844..ab09829d3b97ac83eb34c5afd25bf487abfdbe68 100644 (file)
@@ -869,7 +869,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                 * The ftrace subsystem is for showing formats only.
                 * They can not be enabled or disabled via the event files.
                 */
-               if (call->class && call->class->reg)
+               if (call->class && call->class->reg &&
+                   !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
                        return file;
        }
 
index dda9e6742950305f36fbe920f9fe0c6f68d83fbf..2a1abbaca10ec96126f3d8e380244214f77ee2a8 100644 (file)
@@ -125,6 +125,13 @@ check_stack(unsigned long ip, unsigned long *stack)
                        break;
        }
 
+       /*
+        * Some archs may not have the passed in ip in the dump.
+        * If that happens, we need to show everything.
+        */
+       if (i == stack_trace_max.nr_entries)
+               i = 0;
+
        /*
         * Now find where in the stack these are.
         */
@@ -149,7 +156,11 @@ check_stack(unsigned long ip, unsigned long *stack)
                for (; p < top && i < stack_trace_max.nr_entries; p++) {
                        if (stack_dump_trace[i] == ULONG_MAX)
                                break;
-                       if (*p == stack_dump_trace[i]) {
+                       /*
+                        * The READ_ONCE_NOCHECK is used to let KASAN know that
+                        * this is not a stack-out-of-bounds error.
+                        */
+                       if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
                                stack_dump_trace[x] = stack_dump_trace[i++];
                                this_size = stack_trace_index[x++] =
                                        (top - p) * sizeof(unsigned long);
index 61a0264e28f9b5917c0e8a60bb9668b21e59c4b2..7ff5dc7d2ac5f47395bc3be8484c642c5d577b6b 100644 (file)
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock);    /* protects wq->maydays list */
 static LIST_HEAD(workqueues);          /* PR: list of all workqueues */
 static bool workqueue_freezing;                /* PL: have wqs started freezing? */
 
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
+static cpumask_var_t wq_unbound_cpumask;
+
+/* CPU where unbound work was last round robin scheduled from this CPU */
+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
+
+/*
+ * Local execution of unbound work items is no longer guaranteed.  The
+ * following always forces round-robin CPU selection on unbound work items
+ * to uncover usages which depend on it.
+ */
+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
+static bool wq_debug_force_rr_cpu = true;
+#else
+static bool wq_debug_force_rr_cpu = false;
+#endif
+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
 
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
                                                  int node)
 {
        assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+
+       /*
+        * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+        * delayed item is pending.  The plan is to keep CPU -> NODE
+        * mapping valid and stable across CPU on/offlines.  Once that
+        * happens, this workaround can be removed.
+        */
+       if (unlikely(node == NUMA_NO_NODE))
+               return wq->dfl_pwq;
+
        return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
 }
 
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
        return worker && worker->current_pwq->wq == wq;
 }
 
+/*
+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
+ * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
+ * avoid perturbing sensitive tasks.
+ */
+static int wq_select_unbound_cpu(int cpu)
+{
+       static bool printed_dbg_warning;
+       int new_cpu;
+
+       if (likely(!wq_debug_force_rr_cpu)) {
+               if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
+                       return cpu;
+       } else if (!printed_dbg_warning) {
+               pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
+               printed_dbg_warning = true;
+       }
+
+       if (cpumask_empty(wq_unbound_cpumask))
+               return cpu;
+
+       new_cpu = __this_cpu_read(wq_rr_cpu_last);
+       new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+       if (unlikely(new_cpu >= nr_cpu_ids)) {
+               new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
+               if (unlikely(new_cpu >= nr_cpu_ids))
+                       return cpu;
+       }
+       __this_cpu_write(wq_rr_cpu_last, new_cpu);
+
+       return new_cpu;
+}
+
 static void __queue_work(int cpu, struct workqueue_struct *wq,
                         struct work_struct *work)
 {
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
                return;
 retry:
        if (req_cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
+               cpu = wq_select_unbound_cpu(raw_smp_processor_id());
 
        /* pwq which will be used unless @work is executing elsewhere */
        if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
        timer_stats_timer_set_start_info(&dwork->timer);
 
        dwork->wq = wq;
-       /* timer isn't guaranteed to run in this cpu, record earlier */
-       if (cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
        dwork->cpu = cpu;
        timer->expires = jiffies + delay;
 
-       add_timer_on(timer, cpu);
+       if (unlikely(cpu != WORK_CPU_UNBOUND))
+               add_timer_on(timer, cpu);
+       else
+               add_timer(timer);
 }
 
 /**
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
        WARN_ONCE(current->flags & PF_MEMALLOC,
                  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
                  current->pid, current->comm, target_wq->name, target_func);
-       WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+       WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
+                             (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
                  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
                  worker->current_pwq->wq->name, worker->current_func,
                  target_wq->name, target_func);
index 5a0c1c83cdf0af97ea9446657b3527dc1e206d45..133ebc0c17735bf14be352776609bab732347f9d 100644 (file)
@@ -210,9 +210,11 @@ config RANDOM32_SELFTEST
 # compression support is select'ed if needed
 #
 config 842_COMPRESS
+       select CRC32
        tristate
 
 config 842_DECOMPRESS
+       select CRC32
        tristate
 
 config ZLIB_INFLATE
@@ -475,6 +477,11 @@ config DDR
          information. This data is useful for drivers handling
          DDR SDRAM controllers.
 
+config IRQ_POLL
+       bool "IRQ polling library"
+       help
+         Helper library to poll interrupt mitigation using polling.
+
 config MPILIB
        tristate
        select CLZ_TAB
index ecb9e75614bf87f1e368074d6ef84bfe3f1003b8..8bfd1aca7a3d01a9887700b3f90d1d5697a9dab4 100644 (file)
@@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
 
 endmenu # "RCU Debugging"
 
+config DEBUG_WQ_FORCE_RR_CPU
+       bool "Force round-robin CPU selection for unbound work items"
+       depends on DEBUG_KERNEL
+       default n
+       help
+         Workqueue used to implicitly guarantee that work items queued
+         without explicit CPU specified are put on the local CPU.  This
+         guarantee is no longer true and while local CPU is still
+         preferred work items may be put on foreign CPUs.  Kernel
+         parameter "workqueue.debug_force_rr_cpu" is added to force
+         round-robin CPU selection to flush out usages which depend on the
+         now broken guarantee.  This config option enables the debug
+         feature by default.  When enabled, memory and cache locality will
+         be impacted.
+
 config DEBUG_BLOCK_EXT_DEVT
         bool "Force extended block device numbers and spread them"
        depends on DEBUG_KERNEL
index 49518fb48cabb6d8c35b4a90b7d17917ebbfb211..e07c1ba9ba1339a80dac79d59255d80d600b096e 100644 (file)
@@ -18,6 +18,8 @@ config UBSAN_SANITIZE_ALL
          This option activates instrumentation for the entire kernel.
          If you don't enable this option, you have to explicitly specify
          UBSAN_SANITIZE := y for the files/directories you want to check for UB.
+         Enabling this option will get kernel image size increased
+         significantly.
 
 config UBSAN_ALIGNMENT
        bool "Enable checking of pointers alignment"
@@ -25,5 +27,5 @@ config UBSAN_ALIGNMENT
        default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS
        help
          This option enables detection of unaligned memory accesses.
-         Enabling this option on architectures that support unalligned
+         Enabling this option on architectures that support unaligned
          accesses may produce a lot of false positives.
index 2d4bc33d09b42097085480d7ce93ec3a8882faba..a7c26a41a738bd1dee37a0654cc87e5c51d65653 100644 (file)
@@ -165,6 +165,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
 
 obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
+obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
               fdt_empty_tree.o
index 547f7f923dbcbd24f8bb99e13ef7c13db947b8dd..519b5a10fd704dd412c7cc9c7327fcb2da3c6f0e 100644 (file)
@@ -21,7 +21,7 @@
 #define ODEBUG_HASH_BITS       14
 #define ODEBUG_HASH_SIZE       (1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE       512
+#define ODEBUG_POOL_SIZE       1024
 #define ODEBUG_POOL_MIN_LEVEL  256
 
 #define ODEBUG_CHUNK_SHIFT     PAGE_SHIFT
index 6745c6230db3403629048256968443f51b777655..c30d07e99dba4cc32be6aeb4d353d79058509b4b 100644 (file)
@@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1);
 
 asmlinkage __visible void dump_stack(void)
 {
+       unsigned long flags;
        int was_locked;
        int old;
        int cpu;
@@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void)
         * Permit this cpu to perform nested stack dumps while serialising
         * against other CPUs
         */
-       preempt_disable();
-
 retry:
+       local_irq_save(flags);
        cpu = smp_processor_id();
        old = atomic_cmpxchg(&dump_lock, -1, cpu);
        if (old == -1) {
@@ -43,6 +43,7 @@ retry:
        } else if (old == cpu) {
                was_locked = 1;
        } else {
+               local_irq_restore(flags);
                cpu_relax();
                goto retry;
        }
@@ -52,7 +53,7 @@ retry:
        if (!was_locked)
                atomic_set(&dump_lock, -1);
 
-       preempt_enable();
+       local_irq_restore(flags);
 }
 #else
 asmlinkage __visible void dump_stack(void)
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
new file mode 100644 (file)
index 0000000..836f7db
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/irq_poll.h>
+#include <linux/delay.h>
+
+static unsigned int irq_poll_budget __read_mostly = 256;
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * irq_poll_sched - Schedule a run of the iopoll handler
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Add this irq_poll structure to the pending poll list and trigger the
+ *     raise of the blk iopoll softirq.
+ **/
+void irq_poll_sched(struct irq_poll *iop)
+{
+       unsigned long flags;
+
+       if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+               return;
+       if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+               return;
+
+       local_irq_save(flags);
+       list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
+       __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_sched);
+
+/**
+ * __irq_poll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     See irq_poll_complete(). This function must be called with interrupts
+ *     disabled.
+ **/
+static void __irq_poll_complete(struct irq_poll *iop)
+{
+       list_del(&iop->list);
+       smp_mb__before_atomic();
+       clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+
+/**
+ * irq_poll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     If a driver consumes less than the assigned budget in its run of the
+ *     iopoll handler, it'll end the polled mode by calling this function. The
+ *     iopoll handler will not be invoked again before irq_poll_sched()
+ *     is called.
+ **/
+void irq_poll_complete(struct irq_poll *iop)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __irq_poll_complete(iop);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_complete);
+
+static void irq_poll_softirq(struct softirq_action *h)
+{
+       struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
+       int rearm = 0, budget = irq_poll_budget;
+       unsigned long start_time = jiffies;
+
+       local_irq_disable();
+
+       while (!list_empty(list)) {
+               struct irq_poll *iop;
+               int work, weight;
+
+               /*
+                * If softirq window is exhausted then punt.
+                */
+               if (budget <= 0 || time_after(jiffies, start_time)) {
+                       rearm = 1;
+                       break;
+               }
+
+               local_irq_enable();
+
+               /* Even though interrupts have been re-enabled, this
+                * access is safe because interrupts can only add new
+                * entries to the tail of this list, and only ->poll()
+                * calls can remove this head entry from the list.
+                */
+               iop = list_entry(list->next, struct irq_poll, list);
+
+               weight = iop->weight;
+               work = 0;
+               if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
+                       work = iop->poll(iop, weight);
+
+               budget -= work;
+
+               local_irq_disable();
+
+               /*
+                * Drivers must not modify the iopoll state, if they
+                * consume their assigned weight (or more, some drivers can't
+                * easily just stop processing, they have to complete an
+                * entire mask of commands).In such cases this code
+                * still "owns" the iopoll instance and therefore can
+                * move the instance around on the list at-will.
+                */
+               if (work >= weight) {
+                       if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+                               __irq_poll_complete(iop);
+                       else
+                               list_move_tail(&iop->list, list);
+               }
+       }
+
+       if (rearm)
+               __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+
+       local_irq_enable();
+}
+
+/**
+ * irq_poll_disable - Disable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void irq_poll_disable(struct irq_poll *iop)
+{
+       set_bit(IRQ_POLL_F_DISABLE, &iop->state);
+       while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+               msleep(1);
+       clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_disable);
+
+/**
+ * irq_poll_enable - Enable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Enable iopoll on this @iop. Note that the handler run will not be
+ *     scheduled, it will only mark it as active.
+ **/
+void irq_poll_enable(struct irq_poll *iop)
+{
+       BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
+       smp_mb__before_atomic();
+       clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_enable);
+
+/**
+ * irq_poll_init - Initialize this @iop
+ * @iop:      The parent iopoll structure
+ * @weight:   The default weight (or command completion budget)
+ * @poll_fn:  The handler to invoke
+ *
+ * Description:
+ *     Initialize and enable this irq_poll structure.
+ **/
+void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
+{
+       memset(iop, 0, sizeof(*iop));
+       INIT_LIST_HEAD(&iop->list);
+       iop->weight = weight;
+       iop->poll = poll_fn;
+}
+EXPORT_SYMBOL(irq_poll_init);
+
+static int irq_poll_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+               int cpu = (unsigned long) hcpu;
+
+               local_irq_disable();
+               list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+                                this_cpu_ptr(&blk_cpu_iopoll));
+               __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+               local_irq_enable();
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block irq_poll_cpu_notifier = {
+       .notifier_call  = irq_poll_cpu_notify,
+};
+
+static __init int irq_poll_setup(void)
+{
+       int i;
+
+       for_each_possible_cpu(i)
+               INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+       open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
+       register_hotcpu_notifier(&irq_poll_cpu_notifier);
+       return 0;
+}
+subsys_initcall(irq_poll_setup);
index d74cf7a29afdb043112fee5c9ad7b37a631a9985..0507fa5d84c534917d0842a453bdbcaba386422a 100644 (file)
@@ -282,9 +282,9 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i,
                          struct klist_node *n)
 {
        i->i_klist = k;
-       i->i_cur = n;
-       if (n)
-               kref_get(&n->n_ref);
+       i->i_cur = NULL;
+       if (n && kref_get_unless_zero(&n->n_ref))
+               i->i_cur = n;
 }
 EXPORT_SYMBOL_GPL(klist_iter_init_node);
 
index 31ce853fbfb1be6a2b1905e593377520ab951eab..74a54b7f25626e8c6d224af2b7384f7dcbf2e72f 100644 (file)
@@ -75,3 +75,4 @@ module_exit(libcrc32c_mod_fini);
 MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
 MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
index 028f5d996eef64aa960546010e8321b1b0c5a369..28ba40b99337e7098783b6693c55527ea1952ab3 100644 (file)
@@ -238,7 +238,7 @@ void lc_reset(struct lru_cache *lc)
  * @seq: the seq_file to print into
  * @lc: the lru cache to print statistics of
  */
-size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
+void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
 {
        /* NOTE:
         * total calls to lc_get are
@@ -250,8 +250,6 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
        seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
                   lc->name, lc->used, lc->nr_elements,
                   lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
-
-       return 0;
 }
 
 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
index fcf5d98574ce46871dca087d2c803dbfb67c0b81..6b79e9026e24894000a2bdf4180db80f8190b357 100644 (file)
@@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                return 0;
 
        radix_tree_for_each_slot(slot, root, &iter, first_index) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                if (++ret == max_items)
                        break;
        }
@@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
                return 0;
 
        radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                if (++ret == max_items)
                        break;
        }
index 40e03ea2a967d978cffae1cbeffe23d609ce01bc..2c5de86460c5bb82f3d1c83b136b2f985484298a 100644 (file)
@@ -49,7 +49,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
                if (rs->missed)
                        printk(KERN_WARNING "%s: %d callbacks suppressed\n",
                                func, rs->missed);
-               rs->begin   = 0;
+               rs->begin   = jiffies;
                rs->printed = 0;
                rs->missed  = 0;
        }
index bafa9933fa768d6f76e4ade1296c873a3c38c139..004fc70fc56a3d06947f9e89c60e40e272ce551c 100644 (file)
@@ -598,9 +598,9 @@ EXPORT_SYMBOL(sg_miter_next);
  *
  * Description:
  *   Stops mapping iterator @miter.  @miter should have been started
- *   started using sg_miter_start().  A stopped iteration can be
- *   resumed by calling sg_miter_next() on it.  This is useful when
- *   resources (kmap) need to be released during iteration.
+ *   using sg_miter_start().  A stopped iteration can be resumed by
+ *   calling sg_miter_next() on it.  This is useful when resources (kmap)
+ *   need to be released during iteration.
  *
  * Context:
  *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
index 98866a770770c8bba0acf7bdbacea4699d9f14bf..25b5cbfb7615bd63da19677c877b9e49c3f519e3 100644 (file)
@@ -327,36 +327,67 @@ out:
 }
 
 #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
        do {                                                                   \
-               BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-               __test_string_get_size((size), (blk_size), (units),            \
-                                      (exp_result));                          \
+               BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+               BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+               __test_string_get_size((size), (blk_size), (exp_result10),     \
+                                      (exp_result2));                         \
        } while (0)
 
 
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-                                         const enum string_size_units units,
-                                         const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+                                             const char *exp,
+                                             char *res,
+                                             const u64 size,
+                                             const u64 blk_size)
 {
-       char buf[string_get_size_maxbuf];
-
-       string_get_size(size, blk_size, units, buf, sizeof(buf));
-       if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+       if (!memcmp(res, exp, strlen(exp) + 1))
                return;
 
-       buf[sizeof(buf) - 1] = '\0';
-       pr_warn("Test 'test_string_get_size_one' failed!\n");
-       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+       res[string_get_size_maxbuf - 1] = '\0';
+
+       pr_warn("Test 'test_string_get_size' failed!\n");
+       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
                size, blk_size, units);
-       pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+       pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+                                         const char *exp_result10,
+                                         const char *exp_result2)
+{
+       char buf10[string_get_size_maxbuf];
+       char buf2[string_get_size_maxbuf];
+
+       string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+       string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+       test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+                                  size, blk_size);
+
+       test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+                                  size, blk_size);
 }
 
 static __init void test_string_get_size(void)
 {
-       test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-       test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-       test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+       /* small values */
+       test_string_get_size_one(0, 512, "0 B", "0 B");
+       test_string_get_size_one(1, 512, "512 B", "512 B");
+       test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+       /* normal values */
+       test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+       test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+       test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+       /* weird block sizes */
+       test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+       /* huge values */
+       test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+       test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
 static int __init test_string_helpers_init(void)
index 6f500ef2301d893c9b2c737747fc355974ff4a19..f0b323abb4c64a566700d9d265f4451174847eaa 100644 (file)
@@ -49,3 +49,65 @@ ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len)
         }
 }
 EXPORT_SYMBOL(ucs2_strncmp);
+
+unsigned long
+ucs2_utf8size(const ucs2_char_t *src)
+{
+       unsigned long i;
+       unsigned long j = 0;
+
+       for (i = 0; i < ucs2_strlen(src); i++) {
+               u16 c = src[i];
+
+               if (c >= 0x800)
+                       j += 3;
+               else if (c >= 0x80)
+                       j += 2;
+               else
+                       j += 1;
+       }
+
+       return j;
+}
+EXPORT_SYMBOL(ucs2_utf8size);
+
+/*
+ * copy at most maxlength bytes of whole utf8 characters to dest from the
+ * ucs2 string src.
+ *
+ * The return value is the number of characters copied, not including the
+ * final NUL character.
+ */
+unsigned long
+ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
+{
+       unsigned int i;
+       unsigned long j = 0;
+       unsigned long limit = ucs2_strnlen(src, maxlength);
+
+       for (i = 0; maxlength && i < limit; i++) {
+               u16 c = src[i];
+
+               if (c >= 0x800) {
+                       if (maxlength < 3)
+                               break;
+                       maxlength -= 3;
+                       dest[j++] = 0xe0 | (c & 0xf000) >> 12;
+                       dest[j++] = 0x80 | (c & 0x0fc0) >> 6;
+                       dest[j++] = 0x80 | (c & 0x003f);
+               } else if (c >= 0x80) {
+                       if (maxlength < 2)
+                               break;
+                       maxlength -= 2;
+                       dest[j++] = 0xc0 | (c & 0x7c0) >> 6;
+                       dest[j++] = 0x80 | (c & 0x03f);
+               } else {
+                       maxlength -= 1;
+                       dest[j++] = c & 0x7f;
+               }
+       }
+       if (maxlength)
+               dest[j] = '\0';
+       return j;
+}
+EXPORT_SYMBOL(ucs2_as_utf8);
index 48ff9c36644d64c324c5c243212f282e406dce2f..f44e178e6edec22e2f7c405f8cb433b2be935f83 100644 (file)
@@ -1590,22 +1590,23 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
                        return buf;
                }
        case 'K':
-               /*
-                * %pK cannot be used in IRQ context because its test
-                * for CAP_SYSLOG would be meaningless.
-                */
-               if (kptr_restrict && (in_irq() || in_serving_softirq() ||
-                                     in_nmi())) {
-                       if (spec.field_width == -1)
-                               spec.field_width = default_width;
-                       return string(buf, end, "pK-error", spec);
-               }
-
                switch (kptr_restrict) {
                case 0:
                        /* Always print %pK values */
                        break;
                case 1: {
+                       const struct cred *cred;
+
+                       /*
+                        * kptr_restrict==1 cannot be used in IRQ context
+                        * because its test for CAP_SYSLOG would be meaningless.
+                        */
+                       if (in_irq() || in_serving_softirq() || in_nmi()) {
+                               if (spec.field_width == -1)
+                                       spec.field_width = default_width;
+                               return string(buf, end, "pK-error", spec);
+                       }
+
                        /*
                         * Only print the real pointer value if the current
                         * process has CAP_SYSLOG and is running with the
@@ -1615,8 +1616,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
                         * leak pointer values if a binary opens a file using
                         * %pK and then elevates privileges before reading it.
                         */
-                       const struct cred *cred = current_cred();
-
+                       cred = current_cred();
                        if (!has_capability_noaudit(current, CAP_SYSLOG) ||
                            !uid_eq(cred->euid, cred->uid) ||
                            !gid_eq(cred->egid, cred->gid))
index 97a4e06b15c00dfd7f8b8bc0f1e1e4610b769938..03cbfa072f42a7378ed57b8b1060641b31e7d97e 100644 (file)
@@ -624,7 +624,7 @@ config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
        bool
 
 config DEFERRED_STRUCT_PAGE_INIT
-       bool "Defer initialisation of struct pages to kswapd"
+       bool "Defer initialisation of struct pages to kthreads"
        default n
        depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
        depends on MEMORY_HOTPLUG
@@ -633,9 +633,10 @@ config DEFERRED_STRUCT_PAGE_INIT
          single thread. On very large machines this can take a considerable
          amount of time. If this option is set, large machines will bring up
          a subset of memmap at boot and then initialise the rest in parallel
-         when kswapd starts. This has a potential performance impact on
-         processes running early in the lifetime of the systemm until kswapd
-         finishes the initialisation.
+         by starting one-off "pgdatinitX" kernel thread for each node X. This
+         has a potential performance impact on processes running early in the
+         lifetime of the system until these kthreads finish the
+         initialisation.
 
 config IDLE_PAGE_TRACKING
        bool "Enable idle page tracking"
index cc5d29d2da9b4b36be3fde383267ff10c59e1422..c554d173a65fa640186b058e5e97be68b48c7ea5 100644 (file)
@@ -328,7 +328,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
        return 0;
 
 out_destroy_stat:
-       while (--i)
+       while (i--)
                percpu_counter_destroy(&wb->stat[i]);
        fprop_local_destroy_percpu(&wb->completions);
 out_put_cong:
@@ -989,7 +989,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
                 * here rather than calling cond_resched().
                 */
                if (current->flags & PF_WQ_WORKER)
-                       schedule_timeout(1);
+                       schedule_timeout_uninterruptible(1);
                else
                        cond_resched();
 
index 8fc50811119b28b54f15ac875ddc956217acc570..ba5d8f3e6d68a3769589c372adc3183b4addfb40 100644 (file)
@@ -22,7 +22,7 @@
  * cleancache_ops is set by cleancache_register_ops to contain the pointers
  * to the cleancache "backend" implementation functions.
  */
-static struct cleancache_ops *cleancache_ops __read_mostly;
+static const struct cleancache_ops *cleancache_ops __read_mostly;
 
 /*
  * Counters available via /sys/kernel/debug/cleancache (if debugfs is
@@ -49,7 +49,7 @@ static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
 /*
  * Register operations for cleancache. Returns 0 on success.
  */
-int cleancache_register_ops(struct cleancache_ops *ops)
+int cleancache_register_ops(const struct cleancache_ops *ops)
 {
        if (cmpxchg(&cleancache_ops, NULL, ops))
                return -EBUSY;
index 847ee43c28068a0fb744fe124c1d8afa429d0719..3461d97ecb30bfe18d1ed50729e5147aa2ad7e6d 100644 (file)
@@ -11,6 +11,7 @@
  */
 #include <linux/export.h>
 #include <linux/compiler.h>
+#include <linux/dax.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
        __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
 
        if (shadow) {
-               mapping->nrshadows++;
+               mapping->nrexceptional++;
                /*
-                * Make sure the nrshadows update is committed before
+                * Make sure the nrexceptional update is committed before
                 * the nrpages update so that final truncate racing
                 * with reclaim does not see both counters 0 at the
                 * same time and miss a shadow entry.
@@ -445,7 +446,8 @@ int filemap_write_and_wait(struct address_space *mapping)
 {
        int err = 0;
 
-       if (mapping->nrpages) {
+       if ((!dax_mapping(mapping) && mapping->nrpages) ||
+           (dax_mapping(mapping) && mapping->nrexceptional)) {
                err = filemap_fdatawrite(mapping);
                /*
                 * Even if the above returned error, the pages may be
@@ -481,7 +483,8 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 {
        int err = 0;
 
-       if (mapping->nrpages) {
+       if ((!dax_mapping(mapping) && mapping->nrpages) ||
+           (dax_mapping(mapping) && mapping->nrexceptional)) {
                err = __filemap_fdatawrite_range(mapping, lstart, lend,
                                                 WB_SYNC_ALL);
                /* See comment of filemap_write_and_wait() */
@@ -579,9 +582,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
                p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
                if (!radix_tree_exceptional_entry(p))
                        return -EEXIST;
+
+               if (WARN_ON(dax_mapping(mapping)))
+                       return -EINVAL;
+
                if (shadowp)
                        *shadowp = p;
-               mapping->nrshadows--;
+               mapping->nrexceptional--;
                if (node)
                        workingset_node_shadows_dec(node);
        }
@@ -1245,9 +1252,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto restart;
                        /*
-                        * A shadow entry of a recently evicted page,
-                        * or a swap entry from shmem/tmpfs.  Return
-                        * it without attempting to raise page count.
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
                         */
                        goto export;
                }
@@ -1494,6 +1501,74 @@ repeat:
 }
 EXPORT_SYMBOL(find_get_pages_tag);
 
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:   the address_space to search
+ * @start:     the starting page cache index
+ * @tag:       the tag index
+ * @nr_entries:        the maximum number of entries
+ * @entries:   where the resulting entries are placed
+ * @indices:   the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+                       int tag, unsigned int nr_entries,
+                       struct page **entries, pgoff_t *indices)
+{
+       void **slot;
+       unsigned int ret = 0;
+       struct radix_tree_iter iter;
+
+       if (!nr_entries)
+               return 0;
+
+       rcu_read_lock();
+restart:
+       radix_tree_for_each_tagged(slot, &mapping->page_tree,
+                                  &iter, start, tag) {
+               struct page *page;
+repeat:
+               page = radix_tree_deref_slot(slot);
+               if (unlikely(!page))
+                       continue;
+               if (radix_tree_exception(page)) {
+                       if (radix_tree_deref_retry(page)) {
+                               /*
+                                * Transient condition which can only trigger
+                                * when entry at index 0 moves out of or back
+                                * to root: none yet gotten, safe to restart.
+                                */
+                               goto restart;
+                       }
+
+                       /*
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
+                        */
+                       goto export;
+               }
+               if (!page_cache_get_speculative(page))
+                       goto repeat;
+
+               /* Has the page moved? */
+               if (unlikely(page != *slot)) {
+                       page_cache_release(page);
+                       goto repeat;
+               }
+export:
+               indices[ret] = iter.index;
+               entries[ret] = page;
+               if (++ret == nr_entries)
+                       break;
+       }
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL(find_get_entries_tag);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:
@@ -1811,6 +1886,7 @@ EXPORT_SYMBOL(generic_file_read_iter);
  * page_cache_read - adds requested page to the page cache if not already there
  * @file:      file to read
  * @offset:    page index
+ * @gfp_mask:  memory allocation flags
  *
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
@@ -2684,11 +2760,11 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret > 0)
                ret = __generic_file_write_iter(iocb, from);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (ret > 0) {
                ssize_t err;
index b64a36175884e07604b0e216bc2d545a2892dcb7..7bf19ffa21999c13fa1f24dc01a6bda77217688c 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -430,10 +430,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
                         * Anon pages in shared mappings are surprising: now
                         * just reject it.
                         */
-                       if (!is_cow_mapping(vm_flags)) {
-                               WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
+                       if (!is_cow_mapping(vm_flags))
                                return -EFAULT;
-                       }
                }
        } else if (!(vm_flags & VM_READ)) {
                if (!(gup_flags & FOLL_FORCE))
index 8ad580273521046904ec13478e7b897839d955d7..e10a4fee88d2bcf1f4bba89285391cbadfb96098 100644 (file)
@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = {
        .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
 };
 
-static DEFINE_SPINLOCK(split_queue_lock);
-static LIST_HEAD(split_queue);
-static unsigned long split_queue_len;
 static struct shrinker deferred_split_shrinker;
 
 static void set_recommended_min_free_kbytes(void)
@@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                return false;
        entry = mk_pmd(zero_page, vma->vm_page_prot);
        entry = pmd_mkhuge(entry);
-       pgtable_trans_huge_deposit(mm, pmd, pgtable);
+       if (pgtable)
+               pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, haddr, pmd, entry);
        atomic_long_inc(&mm->nr_ptes);
        return true;
@@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        spinlock_t *dst_ptl, *src_ptl;
        struct page *src_page;
        pmd_t pmd;
-       pgtable_t pgtable;
+       pgtable_t pgtable = NULL;
        int ret;
 
-       ret = -ENOMEM;
-       pgtable = pte_alloc_one(dst_mm, addr);
-       if (unlikely(!pgtable))
-               goto out;
+       if (!vma_is_dax(vma)) {
+               ret = -ENOMEM;
+               pgtable = pte_alloc_one(dst_mm, addr);
+               if (unlikely(!pgtable))
+                       goto out;
+       }
 
        dst_ptl = pmd_lock(dst_mm, dst_pmd);
        src_ptl = pmd_lockptr(src_mm, src_pmd);
@@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                goto out_unlock;
        }
 
-       if (pmd_trans_huge(pmd)) {
+       if (!vma_is_dax(vma)) {
                /* thp accounting separate from pmd_devmap accounting */
                src_page = pmd_page(pmd);
                VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
@@ -1560,7 +1560,8 @@ int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        struct mm_struct *mm = tlb->mm;
        int ret = 0;
 
-       if (!pmd_trans_huge_lock(pmd, vma, &ptl))
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (!ptl)
                goto out_unlocked;
 
        orig_pmd = *pmd;
@@ -1627,7 +1628,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        pmd_t orig_pmd;
        spinlock_t *ptl;
 
-       if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
+       ptl = __pmd_trans_huge_lock(pmd, vma);
+       if (!ptl)
                return 0;
        /*
         * For architectures like ppc64 we look at deposited pgtable
@@ -1690,14 +1692,16 @@ bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
         * We don't have to worry about the ordering of src and dst
         * ptlocks because exclusive mmap_sem prevents deadlock.
         */
-       if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
+       old_ptl = __pmd_trans_huge_lock(old_pmd, vma);
+       if (old_ptl) {
                new_ptl = pmd_lockptr(mm, new_pmd);
                if (new_ptl != old_ptl)
                        spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
                pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
                VM_BUG_ON(!pmd_none(*new_pmd));
 
-               if (pmd_move_must_withdraw(new_ptl, old_ptl)) {
+               if (pmd_move_must_withdraw(new_ptl, old_ptl) &&
+                               vma_is_anonymous(vma)) {
                        pgtable_t pgtable;
                        pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
                        pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
@@ -1724,7 +1728,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
        spinlock_t *ptl;
        int ret = 0;
 
-       if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = __pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                pmd_t entry;
                bool preserve_write = prot_numa && pmd_write(*pmd);
                ret = 1;
@@ -1760,14 +1765,14 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  * Note that if it returns true, this routine returns without unlocking page
  * table lock. So callers must unlock it.
  */
-bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
 {
-       *ptl = pmd_lock(vma->vm_mm, pmd);
+       spinlock_t *ptl;
+       ptl = pmd_lock(vma->vm_mm, pmd);
        if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
-               return true;
-       spin_unlock(*ptl);
-       return false;
+               return ptl;
+       spin_unlock(ptl);
+       return NULL;
 }
 
 #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
@@ -2068,7 +2073,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
        if (likely(writable)) {
                if (likely(referenced)) {
                        result = SCAN_SUCCEED;
-                       trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+                       trace_mm_collapse_huge_page_isolate(page, none_or_zero,
                                                            referenced, writable, result);
                        return 1;
                }
@@ -2078,7 +2083,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 
 out:
        release_pte_pages(pte, _pte);
-       trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+       trace_mm_collapse_huge_page_isolate(page, none_or_zero,
                                            referenced, writable, result);
        return 0;
 }
@@ -2576,7 +2581,7 @@ out_unmap:
                collapse_huge_page(mm, address, hpage, vma, node);
        }
 out:
-       trace_mm_khugepaged_scan_pmd(mm, page_to_pfn(page), writable, referenced,
+       trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
                                     none_or_zero, result);
        return ret;
 }
@@ -2831,6 +2836,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        pgtable_t pgtable;
        pmd_t _pmd;
        bool young, write, dirty;
+       unsigned long addr;
        int i;
 
        VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
@@ -2856,10 +2862,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        young = pmd_young(*pmd);
        dirty = pmd_dirty(*pmd);
 
+       pmdp_huge_split_prepare(vma, haddr, pmd);
        pgtable = pgtable_trans_huge_withdraw(mm, pmd);
        pmd_populate(mm, &_pmd, pgtable);
 
-       for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+       for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
                pte_t entry, *pte;
                /*
                 * Note that NUMA hinting access restrictions are not
@@ -2880,9 +2887,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                }
                if (dirty)
                        SetPageDirty(page + i);
-               pte = pte_offset_map(&_pmd, haddr);
+               pte = pte_offset_map(&_pmd, addr);
                BUG_ON(!pte_none(*pte));
-               set_pte_at(mm, haddr, pte, entry);
+               set_pte_at(mm, addr, pte, entry);
                atomic_inc(&page[i]._mapcount);
                pte_unmap(pte);
        }
@@ -2932,7 +2939,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        pmd_populate(mm, pmd, pgtable);
 
        if (freeze) {
-               for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+               for (i = 0; i < HPAGE_PMD_NR; i++) {
                        page_remove_rmap(page + i, false);
                        put_page(page + i);
                }
@@ -3354,6 +3361,7 @@ int total_mapcount(struct page *page)
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct page *head = compound_head(page);
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
        struct anon_vma *anon_vma;
        int count, mapcount, ret;
        bool mlocked;
@@ -3397,19 +3405,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                lru_add_drain();
 
        /* Prevent deferred_split_scan() touching ->_count */
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        count = page_count(head);
        mapcount = total_mapcount(head);
        if (!mapcount && count == 1) {
                if (!list_empty(page_deferred_list(head))) {
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                        list_del(page_deferred_list(head));
                }
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                __split_huge_page(page, list);
                ret = 0;
        } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                pr_alert("total_mapcount: %u, page_count(): %u\n",
                                mapcount, count);
                if (PageTail(page))
@@ -3417,7 +3425,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                dump_page(page, "total_mapcount(head) > 0");
                BUG();
        } else {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                unfreeze_page(anon_vma, head);
                ret = -EBUSY;
        }
@@ -3432,64 +3440,65 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
        unsigned long flags;
 
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        if (!list_empty(page_deferred_list(page))) {
-               split_queue_len--;
+               pgdata->split_queue_len--;
                list_del(page_deferred_list(page));
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
        free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
        unsigned long flags;
 
        VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        if (list_empty(page_deferred_list(page))) {
-               list_add_tail(page_deferred_list(page), &split_queue);
-               split_queue_len++;
+               list_add_tail(page_deferred_list(page), &pgdata->split_queue);
+               pgdata->split_queue_len++;
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
                struct shrink_control *sc)
 {
-       /*
-        * Split a page from split_queue will free up at least one page,
-        * at most HPAGE_PMD_NR - 1. We don't track exact number.
-        * Let's use HPAGE_PMD_NR / 2 as ballpark.
-        */
-       return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
+       return ACCESS_ONCE(pgdata->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
                struct shrink_control *sc)
 {
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
        unsigned long flags;
        LIST_HEAD(list), *pos, *next;
        struct page *page;
        int split = 0;
 
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_init(&split_queue, &list);
-
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        /* Take pin on all head pages to avoid freeing them under us */
-       list_for_each_safe(pos, next, &list) {
+       list_for_each_safe(pos, next, &pgdata->split_queue) {
                page = list_entry((void *)pos, struct page, mapping);
                page = compound_head(page);
-               /* race with put_compound_page() */
-               if (!get_page_unless_zero(page)) {
+               if (get_page_unless_zero(page)) {
+                       list_move(page_deferred_list(page), &list);
+               } else {
+                       /* We lost race with put_compound_page() */
                        list_del_init(page_deferred_list(page));
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                }
+               if (!--sc->nr_to_scan)
+                       break;
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
        list_for_each_safe(pos, next, &list) {
                page = list_entry((void *)pos, struct page, mapping);
@@ -3501,17 +3510,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
                put_page(page);
        }
 
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_tail(&list, &split_queue);
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       list_splice_tail(&list, &pgdata->split_queue);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
-       return split * HPAGE_PMD_NR / 2;
+       /*
+        * Stop shrinker if we didn't split any page, but the queue is empty.
+        * This can happen if pages were freed under us.
+        */
+       if (!split && list_empty(&pgdata->split_queue))
+               return SHRINK_STOP;
+       return split;
 }
 
 static struct shrinker deferred_split_shrinker = {
        .count_objects = deferred_split_count,
        .scan_objects = deferred_split_scan,
        .seeks = DEFAULT_SEEKS,
+       .flags = SHRINKER_NUMA_AWARE,
 };
 
 #ifdef CONFIG_DEBUG_FS
index 12908dcf58316afd3f4b14d379a8e139249cd396..01f2b48c8618a9f973eeb11f2162b75bb8cf67d4 100644 (file)
@@ -1001,7 +1001,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
                ((node = hstate_next_node_to_free(hs, mask)) || 1);     \
                nr_nodes--)
 
-#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
+#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA))
 static void destroy_compound_gigantic_page(struct page *page,
                                        unsigned int order)
 {
@@ -1214,8 +1214,8 @@ void free_huge_page(struct page *page)
 
        set_page_private(page, 0);
        page->mapping = NULL;
-       BUG_ON(page_count(page));
-       BUG_ON(page_mapcount(page));
+       VM_BUG_ON_PAGE(page_count(page), page);
+       VM_BUG_ON_PAGE(page_mapcount(page), page);
        restore_reserve = PagePrivate(page);
        ClearPagePrivate(page);
 
@@ -1286,6 +1286,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned int order)
                set_page_count(p, 0);
                set_compound_head(p, page);
        }
+       atomic_set(compound_mapcount_ptr(page), -1);
 }
 
 /*
@@ -2629,8 +2630,10 @@ static int __init hugetlb_init(void)
                        hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
        }
        default_hstate_idx = hstate_index(size_to_hstate(default_hstate_size));
-       if (default_hstate_max_huge_pages)
-               default_hstate.max_huge_pages = default_hstate_max_huge_pages;
+       if (default_hstate_max_huge_pages) {
+               if (!default_hstate.max_huge_pages)
+                       default_hstate.max_huge_pages = default_hstate_max_huge_pages;
+       }
 
        hugetlb_init_hstates();
        gather_bootmem_prealloc();
index ed8b5ffcf9b16fbfcf3ccba0d182957980ad45ab..a38a21ebddb454540fc1dcae83be516ffc7cdf26 100644 (file)
@@ -216,6 +216,37 @@ static inline bool is_cow_mapping(vm_flags_t flags)
        return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+/*
+ * These three helpers classifies VMAs for virtual memory accounting.
+ */
+
+/*
+ * Executable code area - executable, not writable, not stack
+ */
+static inline bool is_exec_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
+}
+
+/*
+ * Stack area - atomatically grows in one direction
+ *
+ * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
+ * do_mmap() forbids all other combinations.
+ */
+static inline bool is_stack_mapping(vm_flags_t flags)
+{
+       return (flags & VM_STACK) == VM_STACK;
+}
+
+/*
+ * Data area - private, writable, not stack
+ */
+static inline bool is_data_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
+}
+
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev, struct rb_node *rb_parent);
index d2ed81e59a94c3f81674ead9e84d51ca8ebd9950..dd7989929f13ae16e265bff30fd958f1f13993cb 100644 (file)
@@ -1448,7 +1448,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
  * Remaining API functions
  */
 
-phys_addr_t __init memblock_phys_mem_size(void)
+phys_addr_t __init_memblock memblock_phys_mem_size(void)
 {
        return memblock.memory.total_size;
 }
index ca052f2a4a0b374eb9a2a6498b407be7da28ac48..d06cae2de783acf462162e27f8770ff7bf60f4ad 100644 (file)
@@ -4638,7 +4638,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
                spin_unlock(ptl);
@@ -4826,7 +4827,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        union mc_target target;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (mc.precharge < HPAGE_PMD_NR) {
                        spin_unlock(ptl);
                        return 0;
index 30991f83d0bf54dc537f1927a10883aa5787dd97..8132787ae4d509d475ed6a77705076ddfee30630 100644 (file)
@@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
         * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
         * without pte special, it would there be refcounted as a normal page.
         */
-       if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
+       if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
                struct page *page;
 
-               page = pfn_t_to_page(pfn);
+               /*
+                * At this point we are committed to insert_page()
+                * regardless of whether the caller specified flags that
+                * result in pfn_t_has_page() == false.
+                */
+               page = pfn_to_page(pfn_t_to_pfn(pfn));
                return insert_page(vma, addr, page, vma->vm_page_prot);
        }
        return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -2232,11 +2237,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 
        page_cache_get(old_page);
 
-       /*
-        * Only catch write-faults on shared writable pages,
-        * read-only shared pages can get COWed by
-        * get_user_pages(.write=1, .force=1).
-        */
        if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
                int tmp;
 
@@ -3404,8 +3404,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (unlikely(pmd_none(*pmd)) &&
            unlikely(__pte_alloc(mm, vma, pmd, address)))
                return VM_FAULT_OOM;
-       /* if an huge pmd materialized from under us just retry later */
-       if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
+       /*
+        * If a huge pmd materialized under us just retry later.  Use
+        * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+        * didn't become pmd_trans_huge under us and then back to pmd_none, as
+        * a result of MADV_DONTNEED running immediately after a huge pmd fault
+        * in a different thread of this mm, in turn leading to a misleading
+        * pmd_trans_huge() retval.  All we have to ensure is that it is a
+        * regular pmd that we can walk with pte_offset_map() and we can do that
+        * through an atomic read in C, which is what pmd_trans_unstable()
+        * provides.
+        */
+       if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
                return 0;
        /*
         * A regular pmd is established and it can't morph into a huge pmd
index 27d135408a22057a5b166e7eb2bf2e080bbd33f2..4c4187c0e1deeb25bdbf5eb383132d27feb9be90 100644 (file)
@@ -548,8 +548,7 @@ retry:
                        goto retry;
                }
 
-               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
-                       migrate_page_add(page, qp->pagelist, flags);
+               migrate_page_add(page, qp->pagelist, flags);
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
@@ -625,7 +624,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
        unsigned long endvma = vma->vm_end;
        unsigned long flags = qp->flags;
 
-       if (vma->vm_flags & VM_PFNMAP)
+       if (!vma_migratable(vma))
                return 1;
 
        if (endvma > end)
@@ -644,16 +643,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 
        if (flags & MPOL_MF_LAZY) {
                /* Similar to task_numa_work, skip inaccessible VMAs */
-               if (vma_migratable(vma) &&
-                       vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
+               if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
                        change_prot_numa(vma, start, endvma);
                return 1;
        }
 
-       if ((flags & MPOL_MF_STRICT) ||
-           ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-            vma_migratable(vma)))
-               /* queue pages from current vma */
+       /* queue pages from current vma */
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                return 0;
        return 1;
 }
index b1034f9c77e7d5a9bdbe60692396e5584c6991fc..3ad0fea5c4387c5b6ff1d8fb513b73488b9180f2 100644 (file)
@@ -1582,7 +1582,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
                                         (GFP_HIGHUSER_MOVABLE |
                                          __GFP_THISNODE | __GFP_NOMEMALLOC |
                                          __GFP_NORETRY | __GFP_NOWARN) &
-                                        ~(__GFP_IO | __GFP_FS), 0);
+                                        ~__GFP_RECLAIM, 0);
 
        return newpage;
 }
index 2a565ed8bb4907398a0d3d2619fd4939df777970..563f320454902df04e782f73aa8d0473656034c9 100644 (file)
@@ -117,7 +117,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        unsigned char *vec = walk->private;
        int nr = (end - addr) >> PAGE_SHIFT;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                memset(vec, 1, nr);
                spin_unlock(ptl);
                goto out;
index e1e2b1207bf2ee00604468d027b3879381e1b089..96f00104192861aec6358d9fae5f9b39fecb43a4 100644 (file)
@@ -175,7 +175,7 @@ static void __munlock_isolation_failed(struct page *page)
  */
 unsigned int munlock_vma_page(struct page *page)
 {
-       unsigned int nr_pages;
+       int nr_pages;
        struct zone *zone = page_zone(page);
 
        /* For try_to_munlock() and to serialize with page migration */
index 84b12624ceb01d83762172634179825b086961fd..76d1ec29149bf25f5e6ffe3e56e86467efccc641 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -42,6 +42,7 @@
 #include <linux/memory.h>
 #include <linux/printk.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/moduleparam.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -69,6 +70,8 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
 int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 #endif
 
+static bool ignore_rlimit_data = true;
+core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
 static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
@@ -387,8 +390,9 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 }
 
 #ifdef CONFIG_DEBUG_VM_RB
-static int browse_rb(struct rb_root *root)
+static int browse_rb(struct mm_struct *mm)
 {
+       struct rb_root *root = &mm->mm_rb;
        int i = 0, j, bug = 0;
        struct rb_node *nd, *pn = NULL;
        unsigned long prev = 0, pend = 0;
@@ -411,12 +415,14 @@ static int browse_rb(struct rb_root *root)
                                  vma->vm_start, vma->vm_end);
                        bug = 1;
                }
+               spin_lock(&mm->page_table_lock);
                if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
                        pr_emerg("free gap %lx, correct %lx\n",
                               vma->rb_subtree_gap,
                               vma_compute_subtree_gap(vma));
                        bug = 1;
                }
+               spin_unlock(&mm->page_table_lock);
                i++;
                pn = nd;
                prev = vma->vm_start;
@@ -453,12 +459,16 @@ static void validate_mm(struct mm_struct *mm)
        struct vm_area_struct *vma = mm->mmap;
 
        while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
                highest_address = vma->vm_end;
                vma = vma->vm_next;
                i++;
@@ -472,7 +482,7 @@ static void validate_mm(struct mm_struct *mm)
                          mm->highest_vm_end, highest_address);
                bug = 1;
        }
-       i = browse_rb(&mm->mm_rb);
+       i = browse_rb(mm);
        if (i != mm->map_count) {
                if (i != -1)
                        pr_emerg("map_count %d rb %d\n", mm->map_count, i);
@@ -2139,32 +2149,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
-       int error;
+       int error = 0;
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against wrapping around to address 0. */
+       if (address < PAGE_ALIGN(address+4))
+               address = PAGE_ALIGN(address+4);
+       else
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
                return -ENOMEM;
-       vma_lock_anon_vma(vma);
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
         */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address > vma->vm_end) {
@@ -2182,7 +2187,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2205,7 +2210,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
@@ -2221,25 +2226,21 @@ int expand_downwards(struct vm_area_struct *vma,
        struct mm_struct *mm = vma->vm_mm;
        int error;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
        address &= PAGE_MASK;
        error = security_mmap_addr(address);
        if (error)
                return error;
 
-       vma_lock_anon_vma(vma);
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
@@ -2257,7 +2258,7 @@ int expand_downwards(struct vm_area_struct *vma,
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2278,7 +2279,7 @@ int expand_downwards(struct vm_area_struct *vma,
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
@@ -2663,12 +2664,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        if (!vma || !(vma->vm_flags & VM_SHARED))
                goto out;
 
-       if (start < vma->vm_start || start + size > vma->vm_end)
+       if (start < vma->vm_start)
                goto out;
 
-       if (pgoff == linear_page_index(vma, start)) {
-               ret = 0;
-               goto out;
+       if (start + size > vma->vm_end) {
+               struct vm_area_struct *next;
+
+               for (next = vma->vm_next; next; next = next->vm_next) {
+                       /* hole between vmas ? */
+                       if (next->vm_start != next->vm_prev->vm_end)
+                               goto out;
+
+                       if (next->vm_file != vma->vm_file)
+                               goto out;
+
+                       if (next->vm_flags != vma->vm_flags)
+                               goto out;
+
+                       if (start + size <= next->vm_end)
+                               break;
+               }
+
+               if (!next)
+                       goto out;
        }
 
        prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
@@ -2678,9 +2696,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        flags &= MAP_NONBLOCK;
        flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
        if (vma->vm_flags & VM_LOCKED) {
+               struct vm_area_struct *tmp;
                flags |= MAP_LOCKED;
+
                /* drop PG_Mlocked flag for over-mapped range */
-               munlock_vma_pages_range(vma, start, start + size);
+               for (tmp = vma; tmp->vm_start >= start + size;
+                               tmp = tmp->vm_next) {
+                       munlock_vma_pages_range(tmp,
+                                       max(tmp->vm_start, start),
+                                       min(tmp->vm_end, start + size));
+               }
        }
 
        file = get_file(vma->vm_file);
@@ -2982,9 +3007,17 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
        if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
                return false;
 
-       if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS &
-                               (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE)
-               return mm->data_vm + npages <= rlimit(RLIMIT_DATA);
+       if (is_data_mapping(flags) &&
+           mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
+               if (ignore_rlimit_data)
+                       pr_warn_once("%s (%d): VmData %lu exceed data ulimit "
+                                    "%lu. Will be forbidden soon.\n",
+                                    current->comm, current->pid,
+                                    (mm->data_vm + npages) << PAGE_SHIFT,
+                                    rlimit(RLIMIT_DATA));
+               else
+                       return false;
+       }
 
        return true;
 }
@@ -2993,11 +3026,11 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 {
        mm->total_vm += npages;
 
-       if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC)
+       if (is_exec_mapping(flags))
                mm->exec_vm += npages;
-       else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)))
+       else if (is_stack_mapping(flags))
                mm->stack_vm += npages;
-       else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
+       else if (is_data_mapping(flags))
                mm->data_vm += npages;
 }
 
index 8eb7bb40dc40b6e8e89d05fbb7e05f8c836e05da..f7cb3d4d9c2eb55d74738e374faa3bfa37251686 100644 (file)
@@ -160,9 +160,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                }
 
                if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
-                       if (next - addr != HPAGE_PMD_SIZE)
+                       if (next - addr != HPAGE_PMD_SIZE) {
                                split_huge_pmd(vma, pmd, addr);
-                       else {
+                               if (pmd_none(*pmd))
+                                       continue;
+                       } else {
                                int nr_ptes = change_huge_pmd(vma, pmd, addr,
                                                newprot, prot_numa);
 
index d77946a997f798ecc25ccaee2886139ff9d9a587..8eeba02fc99137b5a47996750159de8a455e2c4d 100644 (file)
@@ -210,6 +210,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
                                }
                        }
                        split_huge_pmd(vma, old_pmd, old_addr);
+                       if (pmd_none(*old_pmd))
+                               continue;
                        VM_BUG_ON(pmd_trans_huge(*old_pmd));
                }
                if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma,
index 63358d9f9aa98eff0848879b0503b5d80b9ea8d0..838ca8bb64f7376062fc6670c759a27d7f59c7e3 100644 (file)
@@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
        spin_lock_init(&pgdat->numabalancing_migrate_lock);
        pgdat->numabalancing_migrate_nr_pages = 0;
        pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spin_lock_init(&pgdat->split_queue_lock);
+       INIT_LIST_HEAD(&pgdat->split_queue);
+       pgdat->split_queue_len = 0;
 #endif
        init_waitqueue_head(&pgdat->kswapd_wait);
        init_waitqueue_head(&pgdat->pfmemalloc_wait);
@@ -6615,7 +6620,7 @@ bool is_pageblock_removable_nolock(struct page *page)
        return !has_unmovable_pages(zone, page, 0, true);
 }
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 
 static unsigned long pfn_max_align_down(unsigned long pfn)
 {
index 8a943b97a053a6aef9c939b6b06854fedea01b7b..998607adf6eb840a6ae30371484839277eaaab6b 100644 (file)
@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size)
 /**
  * pcpu_mem_free - free memory
  * @ptr: memory to free
- * @size: size of the area
  *
  * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().
  */
-static void pcpu_mem_free(void *ptr, size_t size)
+static void pcpu_mem_free(void *ptr)
 {
-       if (size <= PAGE_SIZE)
-               kfree(ptr);
-       else
-               vfree(ptr);
+       kvfree(ptr);
 }
 
 /**
@@ -463,8 +459,8 @@ out_unlock:
         * pcpu_mem_free() might end up calling vfree() which uses
         * IRQ-unsafe lock and thus can't be called under pcpu_lock.
         */
-       pcpu_mem_free(old, old_size);
-       pcpu_mem_free(new, new_size);
+       pcpu_mem_free(old);
+       pcpu_mem_free(new);
 
        return 0;
 }
@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
        chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
                                                sizeof(chunk->map[0]));
        if (!chunk->map) {
-               pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+               pcpu_mem_free(chunk);
                return NULL;
        }
 
@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
 {
        if (!chunk)
                return;
-       pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
-       pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+       pcpu_mem_free(chunk->map);
+       pcpu_mem_free(chunk);
 }
 
 /**
index 9d4767698a1cd6988d4f71b37ef3f384eff5b3b1..06a005b979a763cb4afbf0e57fe9c7f70d2c0c92 100644 (file)
@@ -90,9 +90,9 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
  * ARCHes with special requirements for evicting THP backing TLB entries can
  * implement this. Otherwise also, it can help optimize normal TLB flush in
  * THP regime. stock flush_tlb_range() typically has optimization to nuke the
- * entire TLB TLB if flush span is greater than a threshhold, which will
+ * entire TLB if flush span is greater than a threshold, which will
  * likely be true for a single huge page. Thus a single thp flush will
- * invalidate the entire TLB which is not desitable.
+ * invalidate the entire TLB which is not desirable.
  * e.g. see arch/arc: flush_pmd_tlb_range
  */
 #define flush_pmd_tlb_range(vma, addr, end)    flush_tlb_range(vma, addr, end)
@@ -195,7 +195,9 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
        VM_BUG_ON(pmd_trans_huge(*pmdp));
        pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
-       flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+       /* collapse entails shooting down ptes not pmd */
+       flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;
 }
 #endif
index fa2ceb2d2655dbd8e06ecd4e02384df5ad5b5891..440e2a7e6c1c2ca706376eb72d719fb8b83af05a 100644 (file)
@@ -701,8 +701,7 @@ static void shmem_evict_inode(struct inode *inode)
                        list_del_init(&info->swaplist);
                        mutex_unlock(&shmem_swaplist_mutex);
                }
-       } else
-               kfree(info->symlink);
+       }
 
        simple_xattrs_free(&info->xattrs);
        WARN_ON(inode->i_blocks);
@@ -1902,7 +1901,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
        if (whence != SEEK_DATA && whence != SEEK_HOLE)
                return generic_file_llseek_size(file, offset, whence,
                                        MAX_LFS_FILESIZE, i_size_read(inode));
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* We're holding i_mutex so we can access i_size directly */
 
        if (offset < 0)
@@ -1926,7 +1925,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 
        if (offset >= 0)
                offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -2091,7 +2090,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        if (seals & ~(unsigned int)F_ALL_SEALS)
                return -EINVAL;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (info->seals & F_SEAL_SEAL) {
                error = -EPERM;
@@ -2114,7 +2113,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        error = 0;
 
 unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(shmem_add_seals);
@@ -2164,7 +2163,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                struct address_space *mapping = file->f_mapping;
@@ -2277,7 +2276,7 @@ undone:
        inode->i_private = NULL;
        spin_unlock(&inode->i_lock);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 
@@ -2549,13 +2548,12 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
        info = SHMEM_I(inode);
        inode->i_size = len-1;
        if (len <= SHORT_SYMLINK_LEN) {
-               info->symlink = kmemdup(symname, len, GFP_KERNEL);
-               if (!info->symlink) {
+               inode->i_link = kmemdup(symname, len, GFP_KERNEL);
+               if (!inode->i_link) {
                        iput(inode);
                        return -ENOMEM;
                }
                inode->i_op = &shmem_short_symlink_operations;
-               inode->i_link = info->symlink;
        } else {
                inode_nohighmem(inode);
                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -3132,6 +3130,7 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 static void shmem_destroy_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
+       kfree(inode->i_link);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
index 6ecc697a8bc4670a3910fdcdc155196ee71a539a..621fbcb35a366abf0d979cf48c5476cff1e609f5 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2275,7 +2275,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 
        err = setup_cpu_cache(cachep, gfp);
        if (err) {
-               __kmem_cache_shutdown(cachep);
+               __kmem_cache_release(cachep);
                return err;
        }
 
@@ -2413,13 +2413,14 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
 }
 
 int __kmem_cache_shutdown(struct kmem_cache *cachep)
+{
+       return __kmem_cache_shrink(cachep, false);
+}
+
+void __kmem_cache_release(struct kmem_cache *cachep)
 {
        int i;
        struct kmem_cache_node *n;
-       int rc = __kmem_cache_shrink(cachep, false);
-
-       if (rc)
-               return rc;
 
        free_percpu(cachep->cpu_cache);
 
@@ -2430,7 +2431,6 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
                kfree(n);
                cachep->node[i] = NULL;
        }
-       return 0;
 }
 
 /*
index 834ad240c0bb13980fbe10fb29aba166dcff3f28..2eedacea439de698bdf2103f54ae936fe40de48d 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -140,6 +140,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
 #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
 
 int __kmem_cache_shutdown(struct kmem_cache *);
+void __kmem_cache_release(struct kmem_cache *);
 int __kmem_cache_shrink(struct kmem_cache *, bool);
 void slab_kmem_cache_release(struct kmem_cache *);
 
index b50aef01ccf7ea97aa621dc63cfd66e111ee3dae..065b7bdabdc30c5b763b368d84e2057fcde3eb5a 100644 (file)
@@ -693,6 +693,7 @@ static inline int shutdown_memcg_caches(struct kmem_cache *s,
 
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
+       __kmem_cache_release(s);
        destroy_memcg_params(s);
        kfree_const(s->name);
        kmem_cache_free(kmem_cache, s);
index 17e8f8cc7c534adca165f6d0e9b546e2f8484148..5ec158054ffe0b2ddf20c01276403f1984108dcb 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -630,6 +630,10 @@ int __kmem_cache_shutdown(struct kmem_cache *c)
        return 0;
 }
 
+void __kmem_cache_release(struct kmem_cache *c)
+{
+}
+
 int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate)
 {
        return 0;
index 2e1355ac056b02a51778b2b1eef770b276626309..d8fbd4a6ed599882489143665ca750eb7613fd65 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1592,18 +1592,12 @@ static inline void add_partial(struct kmem_cache_node *n,
        __add_partial(n, page, tail);
 }
 
-static inline void
-__remove_partial(struct kmem_cache_node *n, struct page *page)
-{
-       list_del(&page->lru);
-       n->nr_partial--;
-}
-
 static inline void remove_partial(struct kmem_cache_node *n,
                                        struct page *page)
 {
        lockdep_assert_held(&n->list_lock);
-       __remove_partial(n, page);
+       list_del(&page->lru);
+       n->nr_partial--;
 }
 
 /*
@@ -3184,6 +3178,12 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
        }
 }
 
+void __kmem_cache_release(struct kmem_cache *s)
+{
+       free_percpu(s->cpu_slab);
+       free_kmem_cache_nodes(s);
+}
+
 static int init_kmem_cache_nodes(struct kmem_cache *s)
 {
        int node;
@@ -3443,28 +3443,31 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
 
 /*
  * Attempt to free all partial slabs on a node.
- * This is called from kmem_cache_close(). We must be the last thread
- * using the cache and therefore we do not need to lock anymore.
+ * This is called from __kmem_cache_shutdown(). We must take list_lock
+ * because sysfs file might still access partial list after the shutdowning.
  */
 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
        struct page *page, *h;
 
+       BUG_ON(irqs_disabled());
+       spin_lock_irq(&n->list_lock);
        list_for_each_entry_safe(page, h, &n->partial, lru) {
                if (!page->inuse) {
-                       __remove_partial(n, page);
+                       remove_partial(n, page);
                        discard_slab(s, page);
                } else {
                        list_slab_objects(s, page,
-                       "Objects remaining in %s on kmem_cache_close()");
+                       "Objects remaining in %s on __kmem_cache_shutdown()");
                }
        }
+       spin_unlock_irq(&n->list_lock);
 }
 
 /*
  * Release all resources used by a slab cache.
  */
-static inline int kmem_cache_close(struct kmem_cache *s)
+int __kmem_cache_shutdown(struct kmem_cache *s)
 {
        int node;
        struct kmem_cache_node *n;
@@ -3476,16 +3479,9 @@ static inline int kmem_cache_close(struct kmem_cache *s)
                if (n->nr_partial || slabs_node(s, node))
                        return 1;
        }
-       free_percpu(s->cpu_slab);
-       free_kmem_cache_nodes(s);
        return 0;
 }
 
-int __kmem_cache_shutdown(struct kmem_cache *s)
-{
-       return kmem_cache_close(s);
-}
-
 /********************************************************************
  *             Kmalloc subsystem
  *******************************************************************/
@@ -3980,7 +3976,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
        memcg_propagate_slab_attrs(s);
        err = sysfs_slab_add(s);
        if (err)
-               kmem_cache_close(s);
+               __kmem_cache_release(s);
 
        return err;
 }
index c43f654a7b645474592de879f0664e4698d9dfbd..d2c37365e2d6e429cf0f7cdbb62308f5e76aea93 100644 (file)
@@ -1956,9 +1956,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                set_blocksize(bdev, old_block_size);
                blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
        } else {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                inode->i_flags &= ~S_SWAPFILE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        filp_close(swap_file, NULL);
 
@@ -2183,7 +2183,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
                p->flags |= SWP_BLKDEV;
        } else if (S_ISREG(inode->i_mode)) {
                p->bdev = inode->i_sb->s_bdev;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (IS_SWAPFILE(inode))
                        return -EBUSY;
        } else
@@ -2416,7 +2416,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        mapping = swap_file->f_mapping;
        inode = mapping->host;
 
-       /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+       /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
        error = claim_swapfile(p, inode);
        if (unlikely(error))
                goto bad_swap;
@@ -2561,7 +2561,7 @@ bad_swap:
        vfree(cluster_info);
        if (swap_file) {
                if (inode && S_ISREG(inode->i_mode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        inode = NULL;
                }
                filp_close(swap_file, NULL);
@@ -2574,7 +2574,7 @@ out:
        if (name)
                putname(name);
        if (inode && S_ISREG(inode->i_mode))
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        return error;
 }
 
index 76e35ad971025ce5eb3781543537d1bf3b947b8d..e3ee0e27cd17f20d1d35f1acdeab18136ccc1e29 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/backing-dev.h>
+#include <linux/dax.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping,
                return;
 
        spin_lock_irq(&mapping->tree_lock);
-       /*
-        * Regular page slots are stabilized by the page lock even
-        * without the tree itself locked.  These unlocked entries
-        * need verification under the tree lock.
-        */
-       if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
-               goto unlock;
-       if (*slot != entry)
-               goto unlock;
-       radix_tree_replace_slot(slot, NULL);
-       mapping->nrshadows--;
-       if (!node)
-               goto unlock;
-       workingset_node_shadows_dec(node);
-       /*
-        * Don't track node without shadow entries.
-        *
-        * Avoid acquiring the list_lru lock if already untracked.
-        * The list_empty() test is safe as node->private_list is
-        * protected by mapping->tree_lock.
-        */
-       if (!workingset_node_shadows(node) &&
-           !list_empty(&node->private_list))
-               list_lru_del(&workingset_shadow_nodes, &node->private_list);
-       __radix_tree_delete_node(&mapping->page_tree, node);
+
+       if (dax_mapping(mapping)) {
+               if (radix_tree_delete_item(&mapping->page_tree, index, entry))
+                       mapping->nrexceptional--;
+       } else {
+               /*
+                * Regular page slots are stabilized by the page lock even
+                * without the tree itself locked.  These unlocked entries
+                * need verification under the tree lock.
+                */
+               if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
+                                       &slot))
+                       goto unlock;
+               if (*slot != entry)
+                       goto unlock;
+               radix_tree_replace_slot(slot, NULL);
+               mapping->nrexceptional--;
+               if (!node)
+                       goto unlock;
+               workingset_node_shadows_dec(node);
+               /*
+                * Don't track node without shadow entries.
+                *
+                * Avoid acquiring the list_lru lock if already untracked.
+                * The list_empty() test is safe as node->private_list is
+                * protected by mapping->tree_lock.
+                */
+               if (!workingset_node_shadows(node) &&
+                   !list_empty(&node->private_list))
+                       list_lru_del(&workingset_shadow_nodes,
+                                       &node->private_list);
+               __radix_tree_delete_node(&mapping->page_tree, node);
+       }
 unlock:
        spin_unlock_irq(&mapping->tree_lock);
 }
@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
        int             i;
 
        cleancache_invalidate_inode(mapping);
-       if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+       if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
                return;
 
        /* Offsets within partial pages */
@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
  */
 void truncate_inode_pages_final(struct address_space *mapping)
 {
-       unsigned long nrshadows;
+       unsigned long nrexceptional;
        unsigned long nrpages;
 
        /*
@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping)
 
        /*
         * When reclaim installs eviction entries, it increases
-        * nrshadows first, then decreases nrpages.  Make sure we see
+        * nrexceptional first, then decreases nrpages.  Make sure we see
         * this in the right order or we might miss an entry.
         */
        nrpages = mapping->nrpages;
        smp_rmb();
-       nrshadows = mapping->nrshadows;
+       nrexceptional = mapping->nrexceptional;
 
-       if (nrpages || nrshadows) {
+       if (nrpages || nrexceptional) {
                /*
                 * As truncation uses a lockless tree lookup, cycle
                 * the tree lock to make sure any ongoing tree
index c108a6542d05d3e1c880c1f6ab540ced62156e54..4fb14ca5a41967696a6f769189eb5a59c6a91c0f 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,36 +230,11 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /* Check if the vma is being used as a stack by this task */
-static int vm_is_stack_for_task(struct task_struct *t,
-                               struct vm_area_struct *vma)
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t)
 {
        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
-/*
- * Check if the vma is being used as a stack.
- * If is_group is non-zero, check in the entire thread group or else
- * just check in the current task. Returns the task_struct of the task
- * that the vma is stack for. Must be called under rcu_read_lock().
- */
-struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group)
-{
-       if (vm_is_stack_for_task(task, vma))
-               return task;
-
-       if (in_group) {
-               struct task_struct *t;
-
-               for_each_thread(task, t) {
-                       if (vm_is_stack_for_task(t, vma))
-                               return t;
-               }
-       }
-
-       return NULL;
-}
-
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
index 9a6c0704211c856c300f67057fc202e25c9657ab..149fdf6c5c56f927f3613538c61b3c5831c80af9 100644 (file)
@@ -248,9 +248,8 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 
        if (tree) {
                spin_lock(&vmpr->sr_lock);
-               vmpr->tree_scanned += scanned;
+               scanned = vmpr->tree_scanned += scanned;
                vmpr->tree_reclaimed += reclaimed;
-               scanned = vmpr->scanned;
                spin_unlock(&vmpr->sr_lock);
 
                if (scanned < vmpressure_win)
index bd620b65db52680fc8a6e221fe90189c22dca2f1..71b1c29948dba30aab0a894ddc7c84eb62acde2b 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/oom.h>
 #include <linux/prefetch.h>
 #include <linux/printk.h>
+#include <linux/dax.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 * inode reclaim needs to empty out the radix tree or
                 * the nodes are lost.  Don't plant shadows behind its
                 * back.
+                *
+                * We also don't store shadows for DAX mappings because the
+                * only page cache pages found in these are zero pages
+                * covering holes, and because we don't want to mix DAX
+                * exceptional entries and shadow exceptional entries in the
+                * same page_tree.
                 */
                if (reclaimed && page_is_file_cache(page) &&
-                   !mapping_exiting(mapping))
+                   !mapping_exiting(mapping) && !dax_mapping(mapping))
                        shadow = workingset_eviction(mapping, page);
                __delete_from_page_cache(page, shadow, memcg);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -1436,7 +1443,7 @@ int isolate_lru_page(struct page *page)
        int ret = -EBUSY;
 
        VM_BUG_ON_PAGE(!page_count(page), page);
-       VM_BUG_ON_PAGE(PageTail(page), page);
+       WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
 
        if (PageLRU(page)) {
                struct zone *zone = page_zone(page);
index 64bd0aa13f75cc25247609542f0949c18995c8c1..084c6725b3734430483e7ea4fcf74e2ab67f7bfa 100644 (file)
@@ -1396,10 +1396,15 @@ static void vmstat_update(struct work_struct *w)
                 * Counters were updated so we expect more updates
                 * to occur in the future. Keep on running the
                 * update worker thread.
+                * If we were marked on cpu_stat_off clear the flag
+                * so that vmstat_shepherd doesn't schedule us again.
                 */
-               queue_delayed_work_on(smp_processor_id(), vmstat_wq,
-                       this_cpu_ptr(&vmstat_work),
-                       round_jiffies_relative(sysctl_stat_interval));
+               if (!cpumask_test_and_clear_cpu(smp_processor_id(),
+                                               cpu_stat_off)) {
+                       queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+                               this_cpu_ptr(&vmstat_work),
+                               round_jiffies_relative(sysctl_stat_interval));
+               }
        } else {
                /*
                 * We did not update any counters so the app may be in
@@ -1408,17 +1413,7 @@ static void vmstat_update(struct work_struct *w)
                 * Defer the checking for differentials to the
                 * shepherd thread on a different processor.
                 */
-               int r;
-               /*
-                * Shepherd work thread does not race since it never
-                * changes the bit if its zero but the cpu
-                * online / off line code may race if
-                * worker threads are still allowed during
-                * shutdown / startup.
-                */
-               r = cpumask_test_and_set_cpu(smp_processor_id(),
-                       cpu_stat_off);
-               VM_BUG_ON(r);
+               cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
        }
 }
 
@@ -1427,18 +1422,6 @@ static void vmstat_update(struct work_struct *w)
  * until the diffs stay at zero. The function is used by NOHZ and can only be
  * invoked when tick processing is not active.
  */
-void quiet_vmstat(void)
-{
-       if (system_state != SYSTEM_RUNNING)
-               return;
-
-       do {
-               if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
-                       cancel_delayed_work(this_cpu_ptr(&vmstat_work));
-
-       } while (refresh_cpu_vm_stats(false));
-}
-
 /*
  * Check if the diffs for a certain cpu indicate that
  * an update is needed.
@@ -1462,6 +1445,30 @@ static bool need_update(int cpu)
        return false;
 }
 
+void quiet_vmstat(void)
+{
+       if (system_state != SYSTEM_RUNNING)
+               return;
+
+       /*
+        * If we are already in hands of the shepherd then there
+        * is nothing for us to do here.
+        */
+       if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
+               return;
+
+       if (!need_update(smp_processor_id()))
+               return;
+
+       /*
+        * Just refresh counters and do not care about the pending delayed
+        * vmstat_update. It doesn't fire that often to matter and canceling
+        * it would be too expensive from this path.
+        * vmstat_shepherd will take care about that for us.
+        */
+       refresh_cpu_vm_stats(false);
+}
+
 
 /*
  * Shepherd worker thread that checks the
@@ -1479,18 +1486,25 @@ static void vmstat_shepherd(struct work_struct *w)
 
        get_online_cpus();
        /* Check processors whose vmstat worker threads have been disabled */
-       for_each_cpu(cpu, cpu_stat_off)
-               if (need_update(cpu) &&
-                       cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
-
-                       queue_delayed_work_on(cpu, vmstat_wq,
-                               &per_cpu(vmstat_work, cpu), 0);
+       for_each_cpu(cpu, cpu_stat_off) {
+               struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
 
+               if (need_update(cpu)) {
+                       if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+                               queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+               } else {
+                       /*
+                        * Cancel the work if quiet_vmstat has put this
+                        * cpu on cpu_stat_off because the work item might
+                        * be still scheduled
+                        */
+                       cancel_delayed_work(dw);
+               }
+       }
        put_online_cpus();
 
        schedule_delayed_work(&shepherd,
                round_jiffies_relative(sysctl_stat_interval));
-
 }
 
 static void __init start_shepherd_timer(void)
@@ -1498,7 +1512,7 @@ static void __init start_shepherd_timer(void)
        int cpu;
 
        for_each_possible_cpu(cpu)
-               INIT_DELAYED_WORK(per_cpu_ptr(&vmstat_work, cpu),
+               INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
                        vmstat_update);
 
        if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
index aa017133744b227bed7592ea6cc32f360c3e142c..61ead9e5549df171f43fa53936a4b8cbd86a21a5 100644 (file)
@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
                        node->slots[i] = NULL;
                        BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
                        node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
-                       BUG_ON(!mapping->nrshadows);
-                       mapping->nrshadows--;
+                       BUG_ON(!mapping->nrexceptional);
+                       mapping->nrexceptional--;
                }
        }
        BUG_ON(node->count);
index bced8c074c1280c6ff175576cb035bf048f18ddd..7bc2208b6cc4c445286c20e01d9911ac9e75b9b2 100644 (file)
@@ -108,9 +108,7 @@ struct p9_poll_wait {
  * @unsent_req_list: accounting for requests that haven't been sent
  * @req: current request being processed (if any)
  * @tmp_buf: temporary buffer to read in header
- * @rsize: amount to read for current frame
- * @rpos: read position in current frame
- * @rbuf: current read buffer
+ * @rc: temporary fcall for reading current frame
  * @wpos: write position for current frame
  * @wsize: amount of data to write for current frame
  * @wbuf: current write buffer
@@ -131,9 +129,7 @@ struct p9_conn {
        struct list_head unsent_req_list;
        struct p9_req_t *req;
        char tmp_buf[7];
-       int rsize;
-       int rpos;
-       char *rbuf;
+       struct p9_fcall rc;
        int wpos;
        int wsize;
        char *wbuf;
@@ -305,69 +301,77 @@ static void p9_read_work(struct work_struct *work)
        if (m->err < 0)
                return;
 
-       p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+       p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
 
-       if (!m->rbuf) {
-               m->rbuf = m->tmp_buf;
-               m->rpos = 0;
-               m->rsize = 7; /* start by reading header */
+       if (!m->rc.sdata) {
+               m->rc.sdata = m->tmp_buf;
+               m->rc.offset = 0;
+               m->rc.capacity = 7; /* start by reading header */
        }
 
        clear_bit(Rpending, &m->wsched);
-       p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
-                m, m->rpos, m->rsize, m->rsize-m->rpos);
-       err = p9_fd_read(m->client, m->rbuf + m->rpos,
-                                               m->rsize - m->rpos);
+       p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
+                m, m->rc.offset, m->rc.capacity,
+                m->rc.capacity - m->rc.offset);
+       err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
+                        m->rc.capacity - m->rc.offset);
        p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
-       if (err == -EAGAIN) {
+       if (err == -EAGAIN)
                goto end_clear;
-       }
 
        if (err <= 0)
                goto error;
 
-       m->rpos += err;
+       m->rc.offset += err;
 
-       if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
-               u16 tag;
+       /* header read in */
+       if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new header\n");
 
-               n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
-               if (n >= m->client->msize) {
+               err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0);
+               if (err) {
+                       p9_debug(P9_DEBUG_ERROR,
+                                "error parsing header: %d\n", err);
+                       goto error;
+               }
+
+               if (m->rc.size >= m->client->msize) {
                        p9_debug(P9_DEBUG_ERROR,
-                                "requested packet size too big: %d\n", n);
+                                "requested packet size too big: %d\n",
+                                m->rc.size);
                        err = -EIO;
                        goto error;
                }
 
-               tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
                p9_debug(P9_DEBUG_TRANS,
-                        "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+                        "mux %p pkt: size: %d bytes tag: %d\n",
+                        m, m->rc.size, m->rc.tag);
 
-               m->req = p9_tag_lookup(m->client, tag);
+               m->req = p9_tag_lookup(m->client, m->rc.tag);
                if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
                        p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
-                                tag);
+                                m->rc.tag);
                        err = -EIO;
                        goto error;
                }
 
                if (m->req->rc == NULL) {
-                       m->req->rc = kmalloc(sizeof(struct p9_fcall) +
-                                               m->client->msize, GFP_NOFS);
-                       if (!m->req->rc) {
-                               m->req = NULL;
-                               err = -ENOMEM;
-                               goto error;
-                       }
+                       p9_debug(P9_DEBUG_ERROR,
+                                "No recv fcall for tag %d (req %p), disconnecting!\n",
+                                m->rc.tag, m->req);
+                       m->req = NULL;
+                       err = -EIO;
+                       goto error;
                }
-               m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
-               memcpy(m->rbuf, m->tmp_buf, m->rsize);
-               m->rsize = n;
+               m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+               memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
+               m->rc.capacity = m->rc.size;
        }
 
-       /* not an else because some packets (like clunk) have no payload */
-       if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+       /* packet is read in
+        * not an else because some packets (like clunk) have no payload
+        */
+       if ((m->req) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new packet\n");
                spin_lock(&m->client->lock);
                if (m->req->status != REQ_STATUS_ERROR)
@@ -375,9 +379,9 @@ static void p9_read_work(struct work_struct *work)
                list_del(&m->req->req_list);
                spin_unlock(&m->client->lock);
                p9_client_cb(m->client, m->req, status);
-               m->rbuf = NULL;
-               m->rpos = 0;
-               m->rsize = 0;
+               m->rc.sdata = NULL;
+               m->rc.offset = 0;
+               m->rc.capacity = 0;
                m->req = NULL;
        }
 
index 199bc76202d2552d23fe964f377bbf69eaf518b9..4acb1d5417aaf980bc7797c817eb9a9a350ecbf5 100644 (file)
@@ -658,7 +658,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
        mutex_unlock(&virtio_9p_lock);
 
        if (!found) {
-               pr_err("no channels available\n");
+               pr_err("no channels available for device %s\n", devname);
                return ret;
        }
 
index d5871ac493eb10c984b9e74f2beee3049bc79645..f066781be3c856b8ae192c6b74d614597297a872 100644 (file)
@@ -1625,7 +1625,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
                rt = atrtr_find(&at_hint);
        }
-       err = ENETUNREACH;
+       err = -ENETUNREACH;
        if (!rt)
                goto out;
 
index e6c8382c79ba86dfea5078a37f692f74ebffb01c..ccf70bed0d0cc0c8636d21ba7f8508bbbf9cd4e7 100644 (file)
@@ -527,11 +527,12 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
                 * gets dereferenced.
                 */
                spin_lock_bh(&bat_priv->gw.list_lock);
-               hlist_del_init_rcu(&gw_node->list);
+               if (!hlist_unhashed(&gw_node->list)) {
+                       hlist_del_init_rcu(&gw_node->list);
+                       batadv_gw_node_free_ref(gw_node);
+               }
                spin_unlock_bh(&bat_priv->gw.list_lock);
 
-               batadv_gw_node_free_ref(gw_node);
-
                curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
                if (gw_node == curr_gw)
                        batadv_gw_reselect(bat_priv);
index 01acccc4d2185806ae6dd37b2f0f091d9a0b92a0..57f7107169f58c5e772887a6260d070b1eb9fc1e 100644 (file)
@@ -75,6 +75,28 @@ out:
        return hard_iface;
 }
 
+/**
+ * batadv_mutual_parents - check if two devices are each others parent
+ * @dev1: 1st net_device
+ * @dev2: 2nd net_device
+ *
+ * veth devices come in pairs and each is the parent of the other!
+ *
+ * Return: true if the devices are each others parent, otherwise false
+ */
+static bool batadv_mutual_parents(const struct net_device *dev1,
+                                 const struct net_device *dev2)
+{
+       int dev1_parent_iflink = dev_get_iflink(dev1);
+       int dev2_parent_iflink = dev_get_iflink(dev2);
+
+       if (!dev1_parent_iflink || !dev2_parent_iflink)
+               return false;
+
+       return (dev1_parent_iflink == dev2->ifindex) &&
+              (dev2_parent_iflink == dev1->ifindex);
+}
+
 /**
  * batadv_is_on_batman_iface - check if a device is a batman iface descendant
  * @net_dev: the device to check
@@ -108,6 +130,9 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
        if (WARN(!parent_dev, "Cannot find parent device"))
                return false;
 
+       if (batadv_mutual_parents(net_dev, parent_dev))
+               return false;
+
        ret = batadv_is_on_batman_iface(parent_dev);
 
        return ret;
index cdfc85fa2743c78d4e0e3e269085ec5d6f1fb22f..0e80fd1461ab9062fed09a11373ea0f9a2817752 100644 (file)
@@ -303,9 +303,11 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
 
        if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
                spin_lock_bh(&orig_node->vlan_list_lock);
-               hlist_del_init_rcu(&vlan->list);
+               if (!hlist_unhashed(&vlan->list)) {
+                       hlist_del_init_rcu(&vlan->list);
+                       batadv_orig_node_vlan_free_ref(vlan);
+               }
                spin_unlock_bh(&orig_node->vlan_list_lock);
-               batadv_orig_node_vlan_free_ref(vlan);
        }
 
        batadv_orig_node_vlan_free_ref(vlan);
index d040365ba98e7f7cafcbedd20ac3b4c632603414..8a4cc2f7f0db2a277e4281d5be62d76c7446a716 100644 (file)
@@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
        /* check that it's our buffer */
        if (lowpan_is_ipv6(*skb_network_header(skb))) {
+               /* Pull off the 1-byte of 6lowpan header. */
+               skb_pull(skb, 1);
+
                /* Copy the packet so that the IPv6 header is
                 * properly aligned.
                 */
@@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
                local_skb->protocol = htons(ETH_P_IPV6);
                local_skb->pkt_type = PACKET_HOST;
+               local_skb->dev = dev;
 
                skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
 
@@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
                if (!local_skb)
                        goto drop;
 
+               local_skb->dev = dev;
+
                ret = iphc_decompress(local_skb, dev, chan);
                if (ret < 0) {
                        kfree_skb(local_skb);
@@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
                local_skb->protocol = htons(ETH_P_IPV6);
                local_skb->pkt_type = PACKET_HOST;
-               local_skb->dev = dev;
 
                if (give_skb_to_upper(local_skb, dev)
                                != NET_RX_SUCCESS) {
index 47bcef7547967ce544ff0cf5ea7dabecdd080d90..883c821a9e784851a89c00e3874b7d9da9628a69 100644 (file)
@@ -4112,8 +4112,10 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
                        break;
                }
 
-               *req_complete = bt_cb(skb)->hci.req_complete;
-               *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+               if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB)
+                       *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+               else
+                       *req_complete = bt_cb(skb)->hci.req_complete;
                kfree_skb(skb);
        }
        spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
index 41b5f3813f02f1ed73ffecdc7d685645ded8c265..c78ee2dc93237475eb002ae61a737e0137c387f9 100644 (file)
@@ -688,21 +688,29 @@ static u8 update_white_list(struct hci_request *req)
         * command to remove it from the controller.
         */
        list_for_each_entry(b, &hdev->le_white_list, list) {
-               struct hci_cp_le_del_from_white_list cp;
+               /* If the device is neither in pend_le_conns nor
+                * pend_le_reports then remove it from the whitelist.
+                */
+               if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
+                                              &b->bdaddr, b->bdaddr_type) &&
+                   !hci_pend_le_action_lookup(&hdev->pend_le_reports,
+                                              &b->bdaddr, b->bdaddr_type)) {
+                       struct hci_cp_le_del_from_white_list cp;
+
+                       cp.bdaddr_type = b->bdaddr_type;
+                       bacpy(&cp.bdaddr, &b->bdaddr);
 
-               if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
-                                             &b->bdaddr, b->bdaddr_type) ||
-                   hci_pend_le_action_lookup(&hdev->pend_le_reports,
-                                             &b->bdaddr, b->bdaddr_type)) {
-                       white_list_entries++;
+                       hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+                                   sizeof(cp), &cp);
                        continue;
                }
 
-               cp.bdaddr_type = b->bdaddr_type;
-               bacpy(&cp.bdaddr, &b->bdaddr);
+               if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
+                       /* White list can not be used with RPAs */
+                       return 0x00;
+               }
 
-               hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
-                           sizeof(cp), &cp);
+               white_list_entries++;
        }
 
        /* Since all no longer valid white list entries have been
index 39a5149f301085d45d37a94361e2b86486d852bd..eb4f5f24cbe36a1d4f524978d52d8558f4dbde80 100644 (file)
@@ -197,10 +197,20 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm)
                chan->sport = psm;
                err = 0;
        } else {
-               u16 p;
+               u16 p, start, end, incr;
+
+               if (chan->src_type == BDADDR_BREDR) {
+                       start = L2CAP_PSM_DYN_START;
+                       end = L2CAP_PSM_AUTO_END;
+                       incr = 2;
+               } else {
+                       start = L2CAP_PSM_LE_DYN_START;
+                       end = L2CAP_PSM_LE_DYN_END;
+                       incr = 1;
+               }
 
                err = -EINVAL;
-               for (p = 0x1001; p < 0x1100; p += 2)
+               for (p = start; p <= end; p += incr)
                        if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
                                chan->psm   = cpu_to_le16(p);
                                chan->sport = cpu_to_le16(p);
index 1bb5515270449e8115a0c169ea5b6380594c40a4..e4cae72895a72bebc4c95b430d688e83cfebfada 100644 (file)
@@ -58,7 +58,7 @@ static int l2cap_validate_bredr_psm(u16 psm)
                return -EINVAL;
 
        /* Restrict usage of well-known PSMs */
-       if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE))
+       if (psm < L2CAP_PSM_DYN_START && !capable(CAP_NET_BIND_SERVICE))
                return -EACCES;
 
        return 0;
@@ -67,11 +67,11 @@ static int l2cap_validate_bredr_psm(u16 psm)
 static int l2cap_validate_le_psm(u16 psm)
 {
        /* Valid LE_PSM ranges are defined only until 0x00ff */
-       if (psm > 0x00ff)
+       if (psm > L2CAP_PSM_LE_DYN_END)
                return -EINVAL;
 
        /* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */
-       if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE))
+       if (psm < L2CAP_PSM_LE_DYN_START && !capable(CAP_NET_BIND_SERVICE))
                return -EACCES;
 
        return 0;
@@ -125,6 +125,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
                        goto done;
        }
 
+       bacpy(&chan->src, &la.l2_bdaddr);
+       chan->src_type = la.l2_bdaddr_type;
+
        if (la.l2_cid)
                err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid));
        else
@@ -156,9 +159,6 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
                break;
        }
 
-       bacpy(&chan->src, &la.l2_bdaddr);
-       chan->src_type = la.l2_bdaddr_type;
-
        if (chan->psm && bdaddr_type_is_le(chan->src_type))
                chan->mode = L2CAP_MODE_LE_FLOWCTL;
 
index ffed8a1d4f27634866c93d22b4ceb059b956cc91..4b175df35184b08b5695d966f66b129148c6c011 100644 (file)
@@ -1072,22 +1072,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
                        hcon->dst_type = smp->remote_irk->addr_type;
                        queue_work(hdev->workqueue, &conn->id_addr_update_work);
                }
-
-               /* When receiving an indentity resolving key for
-                * a remote device that does not use a resolvable
-                * private address, just remove the key so that
-                * it is possible to use the controller white
-                * list for scanning.
-                *
-                * Userspace will have been told to not store
-                * this key at this point. So it is safe to
-                * just remove it.
-                */
-               if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
-                       list_del_rcu(&smp->remote_irk->list);
-                       kfree_rcu(smp->remote_irk, rcu);
-                       smp->remote_irk = NULL;
-               }
        }
 
        if (smp->csrk) {
index a1abe4936fe15299b7643b8944023050c5f938c1..3addc05b9a16676b07ca13bc6faf0cba62b3acfd 100644 (file)
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
        .notifier_call = br_device_event
 };
 
+/* called with RTNL */
 static int br_switchdev_event(struct notifier_block *unused,
                              unsigned long event, void *ptr)
 {
@@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused,
        struct switchdev_notifier_fdb_info *fdb_info;
        int err = NOTIFY_DONE;
 
-       rtnl_lock();
        p = br_port_get_rtnl(dev);
        if (!p)
                goto out;
@@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused,
        }
 
 out:
-       rtnl_unlock();
        return err;
 }
 
index 30e105f57f0d9a59e1ce03de8531b90718ac8e29..74c278e0022545729641c5741bbd3d07cd6b36b5 100644 (file)
@@ -425,8 +425,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
        mp = br_mdb_ip_get(mdb, group);
        if (!mp) {
                mp = br_multicast_new_group(br, port, group);
-               err = PTR_ERR(mp);
-               if (IS_ERR(mp))
+               err = PTR_ERR_OR_ZERO(mp);
+               if (err)
                        return err;
        }
 
index 61d7617d924912347325a5db483172794e6df33a..b82440e1fcb4f3fe8a25e121416360cc23f8fada 100644 (file)
@@ -159,7 +159,7 @@ static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
                tmppkt = NULL;
 
                /* Verify that length is correct */
-               err = EPROTO;
+               err = -EPROTO;
                if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
                        goto out;
        }
index 10d87753ed8737329c244b1ae7718e95c8abd118..9e43a315e6622028ba2b61fd8d36ab20305d611b 100644 (file)
@@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
        void *ticket_buf = NULL;
        void *tp, *tpend;
        void **ptp;
-       struct ceph_timespec new_validity;
        struct ceph_crypto_key new_session_key;
        struct ceph_buffer *new_ticket_blob;
        unsigned long new_expires, new_renew_after;
@@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
        if (ret)
                goto out;
 
-       ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
-       ceph_decode_timespec(&validity, &new_validity);
+       ceph_decode_timespec(&validity, dp);
+       dp += sizeof(struct ceph_timespec);
        new_expires = get_seconds() + validity.tv_sec;
        new_renew_after = new_expires - (validity.tv_sec / 4);
        dout(" expires=%lu renew_after=%lu\n", new_expires,
@@ -233,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
                ceph_buffer_put(th->ticket_blob);
        th->session_key = new_session_key;
        th->ticket_blob = new_ticket_blob;
-       th->validity = new_validity;
        th->secret_id = new_secret_id;
        th->expires = new_expires;
        th->renew_after = new_renew_after;
+       th->have_key = true;
        dout(" got ticket service %d (%s) secret_id %lld len %d\n",
             type, ceph_entity_type_name(type), th->secret_id,
             (int)th->ticket_blob->vec.iov_len);
@@ -384,6 +383,24 @@ bad:
        return -ERANGE;
 }
 
+static bool need_key(struct ceph_x_ticket_handler *th)
+{
+       if (!th->have_key)
+               return true;
+
+       return get_seconds() >= th->renew_after;
+}
+
+static bool have_key(struct ceph_x_ticket_handler *th)
+{
+       if (th->have_key) {
+               if (get_seconds() >= th->expires)
+                       th->have_key = false;
+       }
+
+       return th->have_key;
+}
+
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
 {
        int want = ac->want_keys;
@@ -402,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
                        continue;
 
                th = get_ticket_handler(ac, service);
-
                if (IS_ERR(th)) {
                        *pneed |= service;
                        continue;
                }
 
-               if (get_seconds() >= th->renew_after)
+               if (need_key(th))
                        *pneed |= service;
-               if (get_seconds() >= th->expires)
+               if (!have_key(th))
                        xi->have_keys &= ~service;
        }
 }
 
-
 static int ceph_x_build_request(struct ceph_auth_client *ac,
                                void *buf, void *end)
 {
@@ -667,14 +682,26 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
        ac->private = NULL;
 }
 
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
-                                  int peer_type)
+static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
 {
        struct ceph_x_ticket_handler *th;
 
        th = get_ticket_handler(ac, peer_type);
        if (!IS_ERR(th))
-               memset(&th->validity, 0, sizeof(th->validity));
+               th->have_key = false;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+                                        int peer_type)
+{
+       /*
+        * We are to invalidate a service ticket in the hopes of
+        * getting a new, hopefully more valid, one.  But, we won't get
+        * it unless our AUTH ticket is good, so invalidate AUTH ticket
+        * as well, just in case.
+        */
+       invalidate_ticket(ac, peer_type);
+       invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
 }
 
 static int calcu_signature(struct ceph_x_authorizer *au,
index e8b7c6917d472f69d356252415efcb76a4afd3cf..40b1a3cf7397352e4673becccdbbb083c88f04d8 100644 (file)
@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler {
        unsigned int service;
 
        struct ceph_crypto_key session_key;
-       struct ceph_timespec validity;
+       bool have_key;
 
        u64 secret_id;
        struct ceph_buffer *ticket_blob;
index 393bfb22d5bbafd83c20f5953b98c6709b637452..5fcfb98f309efb5018f84628902cd1711fc36705 100644 (file)
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
  * @local_retries: localized retries
  * @local_fallback_retries: localized fallback retries
  * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @stable: stable mode starts rep=0 in the recursive call for all replicas
  * @vary_r: pass r to recursive calls
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  * @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                               unsigned int local_fallback_retries,
                               int recurse_to_leaf,
                               unsigned int vary_r,
+                              unsigned int stable,
                               int *out2,
                               int parent_r)
 {
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
        int collide, reject;
        int count = out_size;
 
-       dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+       dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
                recurse_to_leaf ? "_LEAF" : "",
                bucket->id, x, outpos, numrep,
                tries, recurse_tries, local_retries, local_fallback_retries,
-               parent_r);
+               parent_r, stable);
 
-       for (rep = outpos; rep < numrep && count > 0 ; rep++) {
+       for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
                /* keep trying until we get a non-out, non-colliding item */
                ftotal = 0;
                skip_rep = 0;
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
                                                if (crush_choose_firstn(map,
                                                         map->buckets[-1-item],
                                                         weight, weight_max,
-                                                        x, outpos+1, 0,
+                                                        x, stable ? 1 : outpos+1, 0,
                                                         out2, outpos, count,
                                                         recurse_tries, 0,
                                                         local_retries,
                                                         local_fallback_retries,
                                                         0,
                                                         vary_r,
+                                                        stable,
                                                         NULL,
                                                         sub_r) <= outpos)
                                                        /* didn't get leaf */
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
        int choose_local_fallback_retries = map->choose_local_fallback_tries;
 
        int vary_r = map->chooseleaf_vary_r;
+       int stable = map->chooseleaf_stable;
 
        if ((__u32)ruleno >= map->max_rules) {
                dprintk(" bad ruleno %d\n", ruleno);
@@ -835,7 +839,8 @@ int crush_do_rule(const struct crush_map *map,
                case CRUSH_RULE_TAKE:
                        if ((curstep->arg1 >= 0 &&
                             curstep->arg1 < map->max_devices) ||
-                           (-1-curstep->arg1 < map->max_buckets &&
+                           (-1-curstep->arg1 >= 0 &&
+                            -1-curstep->arg1 < map->max_buckets &&
                             map->buckets[-1-curstep->arg1])) {
                                w[0] = curstep->arg1;
                                wsize = 1;
@@ -869,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
                                vary_r = curstep->arg1;
                        break;
 
+               case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
+                       if (curstep->arg1 >= 0)
+                               stable = curstep->arg1;
+                       break;
+
                case CRUSH_RULE_CHOOSELEAF_FIRSTN:
                case CRUSH_RULE_CHOOSE_FIRSTN:
                        firstn = 1;
@@ -888,6 +898,7 @@ int crush_do_rule(const struct crush_map *map,
                        osize = 0;
 
                        for (i = 0; i < wsize; i++) {
+                               int bno;
                                /*
                                 * see CRUSH_N, CRUSH_N_MINUS macros.
                                 * basically, numrep <= 0 means relative to
@@ -900,6 +911,13 @@ int crush_do_rule(const struct crush_map *map,
                                                continue;
                                }
                                j = 0;
+                               /* make sure bucket id is valid */
+                               bno = -1 - w[i];
+                               if (bno < 0 || bno >= map->max_buckets) {
+                                       /* w[i] is probably CRUSH_ITEM_NONE */
+                                       dprintk("  bad w[i] %d\n", w[i]);
+                                       continue;
+                               }
                                if (firstn) {
                                        int recurse_tries;
                                        if (choose_leaf_tries)
@@ -911,7 +929,7 @@ int crush_do_rule(const struct crush_map *map,
                                                recurse_tries = choose_tries;
                                        osize += crush_choose_firstn(
                                                map,
-                                               map->buckets[-1-w[i]],
+                                               map->buckets[bno],
                                                weight, weight_max,
                                                x, numrep,
                                                curstep->arg2,
@@ -923,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
                                                choose_local_fallback_retries,
                                                recurse_to_leaf,
                                                vary_r,
+                                               stable,
                                                c+osize,
                                                0);
                                } else {
@@ -930,7 +949,7 @@ int crush_do_rule(const struct crush_map *map,
                                                    numrep : (result_max-osize));
                                        crush_choose_indep(
                                                map,
-                                               map->buckets[-1-w[i]],
+                                               map->buckets[bno],
                                                weight, weight_max,
                                                x, out_size, numrep,
                                                curstep->arg2,
index 9981039ef4ffcca29081b83a5e06a81ce6871f20..9382619a405b8da854c82b041a137a670399567f 100644 (file)
@@ -23,9 +23,6 @@
 #include <linux/ceph/pagelist.h>
 #include <linux/export.h>
 
-#define list_entry_next(pos, member)                                   \
-       list_entry(pos->member.next, typeof(*pos), member)
-
 /*
  * Ceph uses the messenger to exchange ceph_msg messages with other
  * hosts in the system.  The messenger provides ordered and reliable
@@ -672,6 +669,8 @@ static void reset_connection(struct ceph_connection *con)
        }
        con->in_seq = 0;
        con->in_seq_acked = 0;
+
+       con->out_skip = 0;
 }
 
 /*
@@ -771,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
 
 static void con_out_kvec_reset(struct ceph_connection *con)
 {
+       BUG_ON(con->out_skip);
+
        con->out_kvec_left = 0;
        con->out_kvec_bytes = 0;
        con->out_kvec_cur = &con->out_kvec[0];
@@ -779,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
 static void con_out_kvec_add(struct ceph_connection *con,
                                size_t size, void *data)
 {
-       int index;
+       int index = con->out_kvec_left;
 
-       index = con->out_kvec_left;
+       BUG_ON(con->out_skip);
        BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
 
        con->out_kvec[index].iov_len = size;
@@ -790,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
        con->out_kvec_bytes += size;
 }
 
+/*
+ * Chop off a kvec from the end.  Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+       int off = con->out_kvec_cur - con->out_kvec;
+       int skip = 0;
+
+       if (con->out_kvec_bytes > 0) {
+               skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+               BUG_ON(con->out_kvec_bytes < skip);
+               BUG_ON(!con->out_kvec_left);
+               con->out_kvec_bytes -= skip;
+               con->out_kvec_left--;
+       }
+
+       return skip;
+}
+
 #ifdef CONFIG_BLOCK
 
 /*
@@ -1042,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
        /* Move on to the next page */
 
        BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
-       cursor->page = list_entry_next(cursor->page, lru);
+       cursor->page = list_next_entry(cursor->page, lru);
        cursor->last_piece = cursor->resid <= PAGE_SIZE;
 
        return true;
@@ -1166,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
        if (!cursor->resid && cursor->total_resid) {
                WARN_ON(!cursor->last_piece);
                BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
-               cursor->data = list_entry_next(cursor->data, links);
+               cursor->data = list_next_entry(cursor->data, links);
                __ceph_msg_data_cursor_init(cursor);
                new_piece = true;
        }
@@ -1175,6 +1197,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
        return new_piece;
 }
 
+static size_t sizeof_footer(struct ceph_connection *con)
+{
+       return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
+           sizeof(struct ceph_msg_footer) :
+           sizeof(struct ceph_msg_footer_old);
+}
+
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
        BUG_ON(!msg);
@@ -1197,7 +1226,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
        m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
 
        dout("prepare_write_message_footer %p\n", con);
-       con->out_kvec_is_msg = true;
        con->out_kvec[v].iov_base = &m->footer;
        if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
                if (con->ops->sign_message)
@@ -1225,7 +1253,6 @@ static void prepare_write_message(struct ceph_connection *con)
        u32 crc;
 
        con_out_kvec_reset(con);
-       con->out_kvec_is_msg = true;
        con->out_msg_done = false;
 
        /* Sneak an ack in there first?  If we can get it into the same
@@ -1265,18 +1292,19 @@ static void prepare_write_message(struct ceph_connection *con)
 
        /* tag + hdr + front + middle */
        con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
-       con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+       con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
        con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
 
        if (m->middle)
                con_out_kvec_add(con, m->middle->vec.iov_len,
                        m->middle->vec.iov_base);
 
-       /* fill in crc (except data pages), footer */
+       /* fill in hdr crc and finalize hdr */
        crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
        con->out_msg->hdr.crc = cpu_to_le32(crc);
-       con->out_msg->footer.flags = 0;
+       memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
 
+       /* fill in front and middle crc, footer */
        crc = crc32c(0, m->front.iov_base, m->front.iov_len);
        con->out_msg->footer.front_crc = cpu_to_le32(crc);
        if (m->middle) {
@@ -1288,6 +1316,7 @@ static void prepare_write_message(struct ceph_connection *con)
        dout("%s front_crc %u middle_crc %u\n", __func__,
             le32_to_cpu(con->out_msg->footer.front_crc),
             le32_to_cpu(con->out_msg->footer.middle_crc));
+       con->out_msg->footer.flags = 0;
 
        /* is there a data payload? */
        con->out_msg->footer.data_crc = 0;
@@ -1492,7 +1521,6 @@ static int write_partial_kvec(struct ceph_connection *con)
                }
        }
        con->out_kvec_left = 0;
-       con->out_kvec_is_msg = false;
        ret = 1;
 out:
        dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1584,6 +1612,7 @@ static int write_partial_skip(struct ceph_connection *con)
 {
        int ret;
 
+       dout("%s %p %d left\n", __func__, con, con->out_skip);
        while (con->out_skip > 0) {
                size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
 
@@ -2313,9 +2342,9 @@ static int read_partial_message(struct ceph_connection *con)
                        ceph_pr_addr(&con->peer_addr.in_addr),
                        seq, con->in_seq + 1);
                con->in_base_pos = -front_len - middle_len - data_len -
-                       sizeof(m->footer);
+                       sizeof_footer(con);
                con->in_tag = CEPH_MSGR_TAG_READY;
-               return 0;
+               return 1;
        } else if ((s64)seq - (s64)con->in_seq > 1) {
                pr_err("read_partial_message bad seq %lld expected %lld\n",
                       seq, con->in_seq + 1);
@@ -2338,10 +2367,10 @@ static int read_partial_message(struct ceph_connection *con)
                        /* skip this message */
                        dout("alloc_msg said skip message\n");
                        con->in_base_pos = -front_len - middle_len - data_len -
-                               sizeof(m->footer);
+                               sizeof_footer(con);
                        con->in_tag = CEPH_MSGR_TAG_READY;
                        con->in_seq++;
-                       return 0;
+                       return 1;
                }
 
                BUG_ON(!con->in_msg);
@@ -2506,13 +2535,13 @@ more:
 
 more_kvec:
        /* kvec data queued? */
-       if (con->out_skip) {
-               ret = write_partial_skip(con);
+       if (con->out_kvec_left) {
+               ret = write_partial_kvec(con);
                if (ret <= 0)
                        goto out;
        }
-       if (con->out_kvec_left) {
-               ret = write_partial_kvec(con);
+       if (con->out_skip) {
+               ret = write_partial_skip(con);
                if (ret <= 0)
                        goto out;
        }
@@ -2805,13 +2834,17 @@ static bool con_backoff(struct ceph_connection *con)
 
 static void con_fault_finish(struct ceph_connection *con)
 {
+       dout("%s %p\n", __func__, con);
+
        /*
         * in case we faulted due to authentication, invalidate our
         * current tickets so that we can get new ones.
         */
-       if (con->auth_retry && con->ops->invalidate_authorizer) {
-               dout("calling invalidate_authorizer()\n");
-               con->ops->invalidate_authorizer(con);
+       if (con->auth_retry) {
+               dout("auth_retry %d, invalidating\n", con->auth_retry);
+               if (con->ops->invalidate_authorizer)
+                       con->ops->invalidate_authorizer(con);
+               con->auth_retry = 0;
        }
 
        if (con->ops->fault)
@@ -3050,16 +3083,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
                ceph_msg_put(msg);
        }
        if (con->out_msg == msg) {
-               dout("%s %p msg %p - was sending\n", __func__, con, msg);
-               con->out_msg = NULL;
-               if (con->out_kvec_is_msg) {
-                       con->out_skip = con->out_kvec_bytes;
-                       con->out_kvec_is_msg = false;
+               BUG_ON(con->out_skip);
+               /* footer */
+               if (con->out_msg_done) {
+                       con->out_skip += con_out_kvec_skip(con);
+               } else {
+                       BUG_ON(!msg->data_length);
+                       if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+                               con->out_skip += sizeof(msg->footer);
+                       else
+                               con->out_skip += sizeof(msg->old_footer);
                }
+               /* data, middle, front */
+               if (msg->data_length)
+                       con->out_skip += msg->cursor.total_resid;
+               if (msg->middle)
+                       con->out_skip += con_out_kvec_skip(con);
+               con->out_skip += con_out_kvec_skip(con);
+
+               dout("%s %p msg %p - was sending, will write %d skip %d\n",
+                    __func__, con, msg, con->out_kvec_bytes, con->out_skip);
                msg->hdr.seq = 0;
-
+               con->out_msg = NULL;
                ceph_msg_put(msg);
        }
+
        mutex_unlock(&con->mutex);
 }
 
@@ -3361,9 +3409,7 @@ static void ceph_msg_free(struct ceph_msg *m)
 static void ceph_msg_release(struct kref *kref)
 {
        struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
-       LIST_HEAD(data);
-       struct list_head *links;
-       struct list_head *next;
+       struct ceph_msg_data *data, *next;
 
        dout("%s %p\n", __func__, m);
        WARN_ON(!list_empty(&m->list_head));
@@ -3376,12 +3422,8 @@ static void ceph_msg_release(struct kref *kref)
                m->middle = NULL;
        }
 
-       list_splice_init(&m->data, &data);
-       list_for_each_safe(links, next, &data) {
-               struct ceph_msg_data *data;
-
-               data = list_entry(links, struct ceph_msg_data, links);
-               list_del_init(links);
+       list_for_each_entry_safe(data, next, &m->data, links) {
+               list_del_init(&data->links);
                ceph_msg_data_destroy(data);
        }
        m->data_length = 0;
index edda01626a459efbfdaeebd46fd6a0b13a037879..de85dddc3dc08cfeee81435c4475a6d975c4cd96 100644 (file)
@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc)
        return monc->client->have_fsid && monc->auth->global_id > 0;
 }
 
-/*
- * The monitor responds with mount ack indicate mount success.  The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
 static void ceph_monc_handle_map(struct ceph_mon_client *monc,
                                 struct ceph_msg *msg)
 {
index f8f235930d887adf1df94314b18c719638328df0..5bc053778feddd0e35fe73480317be25345da61c 100644 (file)
@@ -1770,6 +1770,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
        u32 osdmap_epoch;
        int already_completed;
        u32 bytes;
+       u8 decode_redir;
        unsigned int i;
 
        tid = le64_to_cpu(msg->hdr.tid);
@@ -1841,6 +1842,15 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                p += 8 + 4; /* skip replay_version */
                p += 8; /* skip user_version */
 
+               if (le16_to_cpu(msg->hdr.version) >= 7)
+                       ceph_decode_8_safe(&p, end, decode_redir, bad_put);
+               else
+                       decode_redir = 1;
+       } else {
+               decode_redir = 0;
+       }
+
+       if (decode_redir) {
                err = ceph_redirect_decode(&p, end, &redir);
                if (err)
                        goto bad_put;
@@ -2843,8 +2853,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
        mutex_lock(&osdc->request_mutex);
        req = __lookup_request(osdc, tid);
        if (!req) {
-               pr_warn("%s osd%d tid %llu unknown, skipping\n",
-                       __func__, osd->o_osd, tid);
+               dout("%s osd%d tid %llu unknown, skipping\n", __func__,
+                    osd->o_osd, tid);
                m = NULL;
                *skip = 1;
                goto out;
index 7d8f581d9f1f7987b8d7051160c34f42ad2f5e73..243574c8cf33807fcaf9374530358f1e44080764 100644 (file)
@@ -342,23 +342,32 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         c->choose_local_tries = ceph_decode_32(p);
         c->choose_local_fallback_tries =  ceph_decode_32(p);
         c->choose_total_tries = ceph_decode_32(p);
-        dout("crush decode tunable choose_local_tries = %d",
+        dout("crush decode tunable choose_local_tries = %d\n",
              c->choose_local_tries);
-        dout("crush decode tunable choose_local_fallback_tries = %d",
+        dout("crush decode tunable choose_local_fallback_tries = %d\n",
              c->choose_local_fallback_tries);
-        dout("crush decode tunable choose_total_tries = %d",
+        dout("crush decode tunable choose_total_tries = %d\n",
              c->choose_total_tries);
 
        ceph_decode_need(p, end, sizeof(u32), done);
        c->chooseleaf_descend_once = ceph_decode_32(p);
-       dout("crush decode tunable chooseleaf_descend_once = %d",
+       dout("crush decode tunable chooseleaf_descend_once = %d\n",
             c->chooseleaf_descend_once);
 
        ceph_decode_need(p, end, sizeof(u8), done);
        c->chooseleaf_vary_r = ceph_decode_8(p);
-       dout("crush decode tunable chooseleaf_vary_r = %d",
+       dout("crush decode tunable chooseleaf_vary_r = %d\n",
             c->chooseleaf_vary_r);
 
+       /* skip straw_calc_version, allowed_bucket_algs */
+       ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
+       *p += sizeof(u8) + sizeof(u32);
+
+       ceph_decode_need(p, end, sizeof(u8), done);
+       c->chooseleaf_stable = ceph_decode_8(p);
+       dout("crush decode tunable chooseleaf_stable = %d\n",
+            c->chooseleaf_stable);
+
 done:
        dout("crush_decode success\n");
        return c;
index cc9e3652cf93a6306e6f614f966e0fe7cf17a10e..0ef061b2badcd1334d2146735c30e32b7b39998c 100644 (file)
@@ -4351,6 +4351,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 
                diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                diffs |= p->vlan_tci ^ skb->vlan_tci;
+               diffs |= skb_metadata_dst_cmp(p, skb);
                if (maclen == ETH_HLEN)
                        diffs |= compare_ether_header(skb_mac_header(p),
                                                      skb_mac_header(skb));
@@ -4548,10 +4549,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
                break;
 
        case GRO_MERGED_FREE:
-               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+                       skb_dst_drop(skb);
                        kmem_cache_free(skbuff_head_cache, skb);
-               else
+               } else {
                        __kfree_skb(skb);
+               }
                break;
 
        case GRO_HELD:
@@ -5376,12 +5379,12 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
 {
        struct netdev_adjacent *lower;
 
-       lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+       lower = list_entry(*iter, struct netdev_adjacent, list);
 
        if (&lower->list == &dev->adj_list.lower)
                return NULL;
 
-       *iter = &lower->list;
+       *iter = lower->list.next;
 
        return lower->dev;
 }
@@ -7419,8 +7422,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
        setup(dev);
 
-       if (!dev->tx_queue_len)
+       if (!dev->tx_queue_len) {
                dev->priv_flags |= IFF_NO_QUEUE;
+               dev->tx_queue_len = 1;
+       }
 
        dev->num_tx_queues = txqs;
        dev->real_num_tx_queues = txqs;
index d79699c9d1b9eb9f250254e360b2d5a7b4ff6e34..12e7003320107dbb2b86f4fa1cfd20a8d59e8ed5 100644 (file)
@@ -208,7 +208,6 @@ ip:
        case htons(ETH_P_IPV6): {
                const struct ipv6hdr *iph;
                struct ipv6hdr _iph;
-               __be32 flow_label;
 
 ipv6:
                iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
@@ -230,8 +229,12 @@ ipv6:
                        key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                }
 
-               flow_label = ip6_flowlabel(iph);
-               if (flow_label) {
+               if ((dissector_uses_key(flow_dissector,
+                                       FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
+                    (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
+                   ip6_flowlabel(iph)) {
+                       __be32 flow_label = ip6_flowlabel(iph);
+
                        if (dissector_uses_key(flow_dissector,
                                               FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
                                key_tags = skb_flow_dissector_target(flow_dissector,
@@ -396,6 +399,13 @@ ip_proto_again:
                                goto out_bad;
                        proto = eth->h_proto;
                        nhoff += sizeof(*eth);
+
+                       /* Cap headers that we access via pointers at the
+                        * end of the Ethernet header as our maximum alignment
+                        * at that point is only 2 bytes.
+                        */
+                       if (NET_IP_ALIGN)
+                               hlen = nhoff;
                }
 
                key_control->flags |= FLOW_DIS_ENCAPSULATION;
index 14596fb3717270d62fa70544b7ec2496de96e1ce..2696aefdc148887138d46f98dc0a678ce2699512 100644 (file)
@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
                *fplp = fpl;
                fpl->count = 0;
                fpl->max = SCM_MAX_FD;
+               fpl->user = NULL;
        }
        fpp = &fpl->fp[fpl->count];
 
@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
                *fpp++ = file;
                fpl->count++;
        }
+
+       if (!fpl->user)
+               fpl->user = get_uid(current_user());
+
        return num;
 }
 
@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm)
                scm->fp = NULL;
                for (i=fpl->count-1; i>=0; i--)
                        fput(fpl->fp[i]);
+               free_uid(fpl->user);
                kfree(fpl);
        }
 }
@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
                for (i = 0; i < fpl->count; i++)
                        get_file(fpl->fp[i]);
                new_fpl->max = new_fpl->count;
+               new_fpl->user = get_uid(fpl->user);
        }
        return new_fpl;
 }
index b2df375ec9c2173a8132b8efa1c3062f0510284b..5bf88f58bee7405ff65f80487a64339b92a91bcb 100644 (file)
@@ -79,6 +79,8 @@
 
 struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+EXPORT_SYMBOL(sysctl_max_skb_frags);
 
 /**
  *     skb_panic - private function for out-of-line support
index 1df98c55744029de02522e34ed00c3cd2ca03af1..e92b759d906c1bbcad5ff3ecc977d6393df90361 100644 (file)
@@ -93,10 +93,17 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
  *  @sk2: Socket belonging to the existing reuseport group.
  *  May return ENOMEM and not add socket to group under memory pressure.
  */
-int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
 {
        struct sock_reuseport *reuse;
 
+       if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+               int err = reuseport_alloc(sk2);
+
+               if (err)
+                       return err;
+       }
+
        spin_lock_bh(&reuseport_lock);
        reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
                                          lockdep_is_held(&reuseport_lock)),
index 95b6139d710c46825d1e43f825188d81fcb70f60..a6beb7b6ae556dff501413d6661d9c4655502e36 100644 (file)
@@ -26,6 +26,7 @@ static int zero = 0;
 static int one = 1;
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
+static int max_skb_frags = MAX_SKB_FRAGS;
 
 static int net_msg_warn;       /* Unused, but still a sysctl */
 
@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+       {
+               .procname       = "max_skb_frags",
+               .data           = &sysctl_max_skb_frags,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+               .extra2         = &max_skb_frags,
+       },
        { }
 };
 
index 5684e14932bd47e97b9d547307bfc50230e7be7d..902d606324a04ff81adbdc301a1ed58baa3f61f9 100644 (file)
@@ -824,26 +824,26 @@ lookup:
 
        if (sk->sk_state == DCCP_NEW_SYN_RECV) {
                struct request_sock *req = inet_reqsk(sk);
-               struct sock *nsk = NULL;
+               struct sock *nsk;
 
                sk = req->rsk_listener;
-               if (likely(sk->sk_state == DCCP_LISTEN)) {
-                       nsk = dccp_check_req(sk, skb, req);
-               } else {
+               if (unlikely(sk->sk_state != DCCP_LISTEN)) {
                        inet_csk_reqsk_queue_drop_and_put(sk, req);
                        goto lookup;
                }
+               sock_hold(sk);
+               nsk = dccp_check_req(sk, skb, req);
                if (!nsk) {
                        reqsk_put(req);
-                       goto discard_it;
+                       goto discard_and_relse;
                }
                if (nsk == sk) {
-                       sock_hold(sk);
                        reqsk_put(req);
                } else if (dccp_child_process(sk, nsk, skb)) {
                        dccp_v4_ctl_send_reset(sk, skb);
-                       goto discard_it;
+                       goto discard_and_relse;
                } else {
+                       sock_put(sk);
                        return 0;
                }
        }
index 9c6d0508e63a2ab7f13105cd91ea8c027c4a7557..b8608b71a66d34894722c17121754c403c74d946 100644 (file)
@@ -691,26 +691,26 @@ lookup:
 
        if (sk->sk_state == DCCP_NEW_SYN_RECV) {
                struct request_sock *req = inet_reqsk(sk);
-               struct sock *nsk = NULL;
+               struct sock *nsk;
 
                sk = req->rsk_listener;
-               if (likely(sk->sk_state == DCCP_LISTEN)) {
-                       nsk = dccp_check_req(sk, skb, req);
-               } else {
+               if (unlikely(sk->sk_state != DCCP_LISTEN)) {
                        inet_csk_reqsk_queue_drop_and_put(sk, req);
                        goto lookup;
                }
+               sock_hold(sk);
+               nsk = dccp_check_req(sk, skb, req);
                if (!nsk) {
                        reqsk_put(req);
-                       goto discard_it;
+                       goto discard_and_relse;
                }
                if (nsk == sk) {
-                       sock_hold(sk);
                        reqsk_put(req);
                } else if (dccp_child_process(sk, nsk, skb)) {
                        dccp_v6_ctl_send_reset(sk, skb);
-                       goto discard_it;
+                       goto discard_and_relse;
                } else {
+                       sock_put(sk);
                        return 0;
                }
        }
index 40b9ca72aae3da05c924b2d7adaa0825acfd398d..ab24521beb4d52ceb4bf09e846983954d54a67f0 100644 (file)
@@ -1194,7 +1194,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
        if (ret) {
                netdev_err(master, "error %d registering interface %s\n",
                           ret, slave_dev->name);
-               phy_disconnect(p->phy);
                ds->ports[port] = NULL;
                free_netdev(slave_dev);
                return ret;
@@ -1205,6 +1204,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
        ret = dsa_slave_phy_setup(p, slave_dev);
        if (ret) {
                netdev_err(master, "error %d setting up slave phy\n", ret);
+               unregister_netdev(slave_dev);
                free_netdev(slave_dev);
                return ret;
        }
index c22920525e5d844bd7e4210b233440b883c2681e..775824720b6b57d68460fc8e05915e479f221414 100644 (file)
@@ -353,6 +353,7 @@ config INET_ESP
        select CRYPTO_CBC
        select CRYPTO_SHA1
        select CRYPTO_DES
+       select CRYPTO_ECHAINIV
        ---help---
          Support for IPsec ESP.
 
index cebd9d31e65a4a7539cab0bef71887736bc188f7..f6303b17546b3535d035b7f662cf43e2b2d59280 100644 (file)
@@ -1847,7 +1847,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
        if (err < 0)
                goto errout;
 
-       err = EINVAL;
+       err = -EINVAL;
        if (!tb[NETCONFA_IFINDEX])
                goto errout;
 
index 744e5936c10d7ec555d1ca621f5bd4be57f1c72b..d07fc076bea0a4bc96f68075fb3bb79b95007e63 100644 (file)
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
 
        if (!n->tn_bits)
                kmem_cache_free(trie_leaf_kmem, n);
-       else if (n->tn_bits <= TNODE_KMALLOC_MAX)
-               kfree(n);
        else
-               vfree(n);
+               kvfree(n);
 }
 
 #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
@@ -1396,9 +1394,10 @@ found:
                struct fib_info *fi = fa->fa_info;
                int nhsel, err;
 
-               if ((index >= (1ul << fa->fa_slen)) &&
-                   ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
-                       continue;
+               if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
+                       if (index >= (1ul << fa->fa_slen))
+                               continue;
+               }
                if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
                        continue;
                if (fi->fib_dead)
index 46b9c887bede0568ec378d3b809dda8fa463a166..64148914803a8443ecc0de2a45c141ae72cc0258 100644 (file)
@@ -789,14 +789,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
        reqsk_put(req);
 }
 
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
-                             struct sock *child)
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+                                     struct request_sock *req,
+                                     struct sock *child)
 {
        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 
        spin_lock(&queue->rskq_lock);
        if (unlikely(sk->sk_state != TCP_LISTEN)) {
                inet_child_forget(sk, req, child);
+               child = NULL;
        } else {
                req->sk = child;
                req->dl_next = NULL;
@@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
                sk_acceptq_added(sk);
        }
        spin_unlock(&queue->rskq_lock);
+       return child;
 }
 EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
 
@@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
        if (own_req) {
                inet_csk_reqsk_queue_drop(sk, req);
                reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-               inet_csk_reqsk_queue_add(sk, req, child);
-               /* Warning: caller must not call reqsk_put(req);
-                * child stole last reference on it.
-                */
-               return child;
+               if (inet_csk_reqsk_queue_add(sk, req, child))
+                       return child;
        }
        /* Too bad, another child took ownership of the request, undo. */
        bh_unlock_sock(child);
index 8bb8e7ad85483234d8a852782515ca11c93af68d..6029157a19ed1632fbf642416c0199829bf6d7ff 100644 (file)
@@ -361,13 +361,20 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
                                 req->id.idiag_dport, req->id.idiag_src[0],
                                 req->id.idiag_sport, req->id.idiag_if);
 #if IS_ENABLED(CONFIG_IPV6)
-       else if (req->sdiag_family == AF_INET6)
-               sk = inet6_lookup(net, hashinfo,
-                                 (struct in6_addr *)req->id.idiag_dst,
-                                 req->id.idiag_dport,
-                                 (struct in6_addr *)req->id.idiag_src,
-                                 req->id.idiag_sport,
-                                 req->id.idiag_if);
+       else if (req->sdiag_family == AF_INET6) {
+               if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+                   ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+                       sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+                                        req->id.idiag_dport, req->id.idiag_src[3],
+                                        req->id.idiag_sport, req->id.idiag_if);
+               else
+                       sk = inet6_lookup(net, hashinfo,
+                                         (struct in6_addr *)req->id.idiag_dst,
+                                         req->id.idiag_dport,
+                                         (struct in6_addr *)req->id.idiag_src,
+                                         req->id.idiag_sport,
+                                         req->id.idiag_if);
+       }
 #endif
        else
                return ERR_PTR(-EINVAL);
index 3f00810b7288991f83ba147a65ac4f78e68ae594..187c6fcc3027779ba31d0721eb4a7389addaaf8a 100644 (file)
@@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
        struct ipq *qp;
 
        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+       skb_orphan(skb);
 
        /* Lookup (or create) queue header */
        qp = ip_find(net, ip_hdr(skb), user, vif);
index 7c51c4e1661f9aafc63829cf58b3b5ec28ed859e..41ba68de46d890c8fabc490bb07e7d45c26b7ed6 100644 (file)
@@ -1054,8 +1054,9 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 static void ipgre_tap_setup(struct net_device *dev)
 {
        ether_setup(dev);
-       dev->netdev_ops         = &gre_tap_netdev_ops;
-       dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
+       dev->netdev_ops = &gre_tap_netdev_ops;
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
        ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
@@ -1240,6 +1241,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
        err = ipgre_newlink(net, dev, tb, NULL);
        if (err < 0)
                goto out;
+
+       /* openvswitch users expect packet sizes to be unrestricted,
+        * so set the largest MTU we can.
+        */
+       err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+       if (err)
+               goto out;
+
        return dev;
 out:
        free_netdev(dev);
index b1209b63381f6f9ae81cf258ad3724e1b6a6b9d8..d77eb0c3b6842bc5605a882f9ad46c609dcdeb50 100644 (file)
@@ -316,7 +316,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
        const struct iphdr *iph = ip_hdr(skb);
        struct rtable *rt;
 
-       if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
+       if (sysctl_ip_early_demux &&
+           !skb_dst(skb) &&
+           !skb->sk &&
+           !ip_is_fragment(iph)) {
                const struct net_protocol *ipprot;
                int protocol = iph->protocol;
 
index 5f73a7c03e27d334c771f144825c4a2f718d71ba..a50124260f5a4aaa98a3e4a582dbcbdbc236e370 100644 (file)
@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
                switch (cmsg->cmsg_type) {
                case IP_RETOPTS:
                        err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+                       /* Our caller is responsible for freeing ipc->opt */
                        err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
                                             err < 40 ? err : 40);
                        if (err)
index c7bd72e9b544848d10a490b010e0a30c4d5e4c21..89e8861e05fcb1d371c371c1784b89499b69d9df 100644 (file)
@@ -943,17 +943,31 @@ done:
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
        int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+       int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
 
-       if (new_mtu < 68 ||
-           new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+       if (new_mtu < 68)
                return -EINVAL;
+
+       if (new_mtu > max_mtu) {
+               if (strict)
+                       return -EINVAL;
+
+               new_mtu = max_mtu;
+       }
+
        dev->mtu = new_mtu;
        return 0;
 }
+EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+       return __ip_tunnel_change_mtu(dev, new_mtu, true);
+}
 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
 
 static void ip_tunnel_dev_free(struct net_device *dev)
index 67f7c9de0b16689dd1e7d075882b9477ac5e2bf9..2ed9dd2b5f2f5a6ea3ea52208e5ec1424a501fc5 100644 (file)
@@ -143,7 +143,11 @@ static char dhcp_client_identifier[253] __initdata;
 
 /* Persistent data: */
 
+#ifdef IPCONFIG_DYNAMIC
 static int ic_proto_used;                      /* Protocol used, if any */
+#else
+#define ic_proto_used 0
+#endif
 static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
 static u8 ic_domain[64];               /* DNS (not NIS) domain name */
 
index 6fb869f646bf7a15b2f012cc1982c2a9f3d5935f..a04dee536b8ef8b5f4c1a085563d5d37b6fa118b 100644 (file)
@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
 {
        int err;
 
-       skb_orphan(skb);
-
        local_bh_disable();
        err = ip_defrag(net, skb, user);
        local_bh_enable();
index c117b21b937de778dffa484d589141ea31e1b6bc..d3a27165f9cca0f20f68616295cfbf683fb4adc0 100644 (file)
@@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        if (msg->msg_controllen) {
                err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
-               if (err)
+               if (unlikely(err)) {
+                       kfree(ipc.opt);
                        return err;
+               }
                if (ipc.opt)
                        free = 1;
        }
index bc35f1842512bef8e4d87e76542d7bf11f8946fa..7113bae4e6a0c02726e0e11c33415b6779b7d04b 100644 (file)
@@ -547,8 +547,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        if (msg->msg_controllen) {
                err = ip_cmsg_send(net, msg, &ipc, false);
-               if (err)
+               if (unlikely(err)) {
+                       kfree(ipc.opt);
                        goto out;
+               }
                if (ipc.opt)
                        free = 1;
        }
index 85f184e429c63c8c426f58c779dc3544cf2e2a54..02c62299d717b9f6c38a5227e3b3ae376e0015b6 100644 (file)
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly  = 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly                = 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly      = 256;
 
+static int ip_rt_gc_timeout __read_mostly      = RT_GC_TIMEOUT;
 /*
  *     Interface to generic destination cache.
  */
@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
                                struct fib_nh *nh = &FIB_RES_NH(res);
 
                                update_or_create_fnhe(nh, fl4->daddr, new_gw,
-                                                     0, 0);
+                                               0, jiffies + ip_rt_gc_timeout);
                        }
                        if (kill_route)
                                rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(struct net_device *dev,
 #endif
 }
 
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+       struct fnhe_hash_bucket *hash;
+       struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+       u32 hval = fnhe_hashfun(daddr);
+
+       spin_lock_bh(&fnhe_lock);
+
+       hash = rcu_dereference_protected(nh->nh_exceptions,
+                                        lockdep_is_held(&fnhe_lock));
+       hash += hval;
+
+       fnhe_p = &hash->chain;
+       fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+       while (fnhe) {
+               if (fnhe->fnhe_daddr == daddr) {
+                       rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+                               fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+                       fnhe_flush_routes(fnhe);
+                       kfree_rcu(fnhe, rcu);
+                       break;
+               }
+               fnhe_p = &fnhe->fnhe_next;
+               fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+                                                lockdep_is_held(&fnhe_lock));
+       }
+
+       spin_unlock_bh(&fnhe_lock);
+}
+
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
                           const struct fib_result *res,
@@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buff *skb,
 
        fnhe = find_exception(&FIB_RES_NH(*res), daddr);
        if (do_cache) {
-               if (fnhe)
+               if (fnhe) {
                        rth = rcu_dereference(fnhe->fnhe_rth_input);
-               else
-                       rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+                       if (rth && rth->dst.expires &&
+                           time_after(jiffies, rth->dst.expires)) {
+                               ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+                               fnhe = NULL;
+                       } else {
+                               goto rt_cache;
+                       }
+               }
+
+               rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
 
+rt_cache:
                if (rt_cache_valid(rth)) {
                        skb_dst_set_noref(skb, &rth->dst);
                        goto out;
@@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
                struct fib_nh *nh = &FIB_RES_NH(*res);
 
                fnhe = find_exception(nh, fl4->daddr);
-               if (fnhe)
+               if (fnhe) {
                        prth = &fnhe->fnhe_rth_output;
-               else {
-                       if (unlikely(fl4->flowi4_flags &
-                                    FLOWI_FLAG_KNOWN_NH &&
-                                    !(nh->nh_gw &&
-                                      nh->nh_scope == RT_SCOPE_LINK))) {
-                               do_cache = false;
-                               goto add;
+                       rth = rcu_dereference(*prth);
+                       if (rth && rth->dst.expires &&
+                           time_after(jiffies, rth->dst.expires)) {
+                               ip_del_fnhe(nh, fl4->daddr);
+                               fnhe = NULL;
+                       } else {
+                               goto rt_cache;
                        }
-                       prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
                }
+
+               if (unlikely(fl4->flowi4_flags &
+                            FLOWI_FLAG_KNOWN_NH &&
+                            !(nh->nh_gw &&
+                              nh->nh_scope == RT_SCOPE_LINK))) {
+                       do_cache = false;
+                       goto add;
+               }
+               prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
                rth = rcu_dereference(*prth);
+
+rt_cache:
                if (rt_cache_valid(rth)) {
                        dst_hold(&rth->dst);
                        return rth;
@@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 }
 
 #ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly      = RT_GC_TIMEOUT;
 static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
 static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
 static int ip_rt_gc_elasticity __read_mostly   = 8;
index fd17eec9352517f68b86fcd08bf5b215bfa6fa46..483ffdf5aa4dd4ad41809ef73ca2ec815df131c7 100644 (file)
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
+#include <asm/unaligned.h>
 #include <net/busy_poll.h>
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -939,7 +940,7 @@ new_segment:
 
                i = skb_shinfo(skb)->nr_frags;
                can_coalesce = skb_can_coalesce(skb, i, page, offset);
-               if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+               if (!can_coalesce && i >= sysctl_max_skb_frags) {
                        tcp_mark_push(tp, skb);
                        goto new_segment;
                }
@@ -1212,7 +1213,7 @@ new_segment:
 
                        if (!skb_can_coalesce(skb, i, pfrag->page,
                                              pfrag->offset)) {
-                               if (i == MAX_SKB_FRAGS || !sg) {
+                               if (i == sysctl_max_skb_frags || !sg) {
                                        tcp_mark_push(tp, skb);
                                        goto new_segment;
                                }
@@ -2638,6 +2639,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
        unsigned int start;
+       u64 rate64;
        u32 rate;
 
        memset(info, 0, sizeof(*info));
@@ -2703,15 +2705,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_total_retrans = tp->total_retrans;
 
        rate = READ_ONCE(sk->sk_pacing_rate);
-       info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       put_unaligned(rate64, &info->tcpi_pacing_rate);
 
        rate = READ_ONCE(sk->sk_max_pacing_rate);
-       info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       put_unaligned(rate64, &info->tcpi_max_pacing_rate);
 
        do {
                start = u64_stats_fetch_begin_irq(&tp->syncp);
-               info->tcpi_bytes_acked = tp->bytes_acked;
-               info->tcpi_bytes_received = tp->bytes_received;
+               put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+               put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
        } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
        info->tcpi_segs_out = tp->segs_out;
        info->tcpi_segs_in = tp->segs_in;
@@ -2946,7 +2950,7 @@ static void __tcp_alloc_md5sig_pool(void)
                        struct crypto_hash *hash;
 
                        hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-                       if (IS_ERR_OR_NULL(hash))
+                       if (IS_ERR(hash))
                                return;
                        per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
                }
index 0003d409fec5beec010050ef5ccafd58bf2f1501..3b2c8e90a475d91bc30ca5fd8b75f23285eac07d 100644 (file)
@@ -2164,8 +2164,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
-       int cnt, oldcnt;
-       int err;
+       int cnt, oldcnt, lost;
        unsigned int mss;
        /* Use SACK to deduce losses of new sequences sent during recovery */
        const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2204,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
                                break;
 
                        mss = tcp_skb_mss(skb);
-                       err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
-                                          mss, GFP_ATOMIC);
-                       if (err < 0)
+                       /* If needed, chop off the prefix to mark as lost. */
+                       lost = (packets - oldcnt) * mss;
+                       if (lost < skb->len &&
+                           tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
                                break;
                        cnt = packets;
                }
@@ -2366,8 +2366,6 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
                        tp->snd_ssthresh = tp->prior_ssthresh;
                        tcp_ecn_withdraw_cwr(tp);
                }
-       } else {
-               tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
        }
        tp->snd_cwnd_stamp = tcp_time_stamp;
        tp->undo_marker = 0;
@@ -2898,7 +2896,10 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
 {
        const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
        struct rtt_meas *m = tcp_sk(sk)->rtt_min;
-       struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+       struct rtt_meas rttm = {
+               .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+               .ts = now,
+       };
        u32 elapsed;
 
        /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
index 5ced3e4013e3c2119f43b1674e69ff18e281e664..487ac67059e237bab05d2291dd8e0a82f26e2bb0 100644 (file)
@@ -311,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
 
 
 /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
-void tcp_req_err(struct sock *sk, u32 seq)
+void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 {
        struct request_sock *req = inet_reqsk(sk);
        struct net *net = sock_net(sk);
@@ -323,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq)
 
        if (seq != tcp_rsk(req)->snt_isn) {
                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-       } else {
+       } else if (abort) {
                /*
                 * Still in SYN_RECV, just remove it silently.
                 * There is no good way to pass the error to the newly
@@ -383,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        }
        seq = ntohl(th->seq);
        if (sk->sk_state == TCP_NEW_SYN_RECV)
-               return tcp_req_err(sk, seq);
+               return tcp_req_err(sk, seq,
+                                 type == ICMP_PARAMETERPROB ||
+                                 type == ICMP_TIME_EXCEEDED ||
+                                 (type == ICMP_DEST_UNREACH &&
+                                  (code == ICMP_NET_UNREACH ||
+                                   code == ICMP_HOST_UNREACH)));
 
        bh_lock_sock(sk);
        /* If too many ICMPs get dropped on busy
@@ -707,7 +712,8 @@ release_sk1:
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+                           struct sk_buff *skb, u32 seq, u32 ack,
                            u32 win, u32 tsval, u32 tsecr, int oif,
                            struct tcp_md5sig_key *key,
                            int reply_flags, u8 tos)
@@ -722,7 +728,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
                        ];
        } rep;
        struct ip_reply_arg arg;
-       struct net *net = dev_net(skb_dst(skb)->dev);
 
        memset(&rep.th, 0, sizeof(struct tcphdr));
        memset(&arg, 0, sizeof(arg));
@@ -784,7 +789,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
        struct inet_timewait_sock *tw = inet_twsk(sk);
        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-       tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+       tcp_v4_send_ack(sock_net(sk), skb,
+                       tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
                        tcp_time_stamp + tcptw->tw_ts_offset,
                        tcptw->tw_ts_recent,
@@ -803,8 +809,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
         */
-       tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
-                       tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+       u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+                                            tcp_sk(sk)->snd_nxt;
+
+       tcp_v4_send_ack(sock_net(sk), skb, seq,
                        tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
                        tcp_time_stamp,
                        req->ts_recent,
@@ -1589,28 +1597,30 @@ process:
 
        if (sk->sk_state == TCP_NEW_SYN_RECV) {
                struct request_sock *req = inet_reqsk(sk);
-               struct sock *nsk = NULL;
+               struct sock *nsk;
 
                sk = req->rsk_listener;
-               if (tcp_v4_inbound_md5_hash(sk, skb))
-                       goto discard_and_relse;
-               if (likely(sk->sk_state == TCP_LISTEN)) {
-                       nsk = tcp_check_req(sk, skb, req, false);
-               } else {
+               if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+                       reqsk_put(req);
+                       goto discard_it;
+               }
+               if (unlikely(sk->sk_state != TCP_LISTEN)) {
                        inet_csk_reqsk_queue_drop_and_put(sk, req);
                        goto lookup;
                }
+               sock_hold(sk);
+               nsk = tcp_check_req(sk, skb, req, false);
                if (!nsk) {
                        reqsk_put(req);
-                       goto discard_it;
+                       goto discard_and_relse;
                }
                if (nsk == sk) {
-                       sock_hold(sk);
                        reqsk_put(req);
                } else if (tcp_child_process(sk, nsk, skb)) {
                        tcp_v4_send_reset(nsk, skb);
-                       goto discard_it;
+                       goto discard_and_relse;
                } else {
+                       sock_put(sk);
                        return 0;
                }
        }
index dc45b538e237b908f4d6c6337b4c533a4236735a..95d2f198017ef1a6db687ddfe405a13830a37f3f 100644 (file)
@@ -499,6 +499,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
        struct sock *sk, *result;
        struct hlist_nulls_node *node;
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
 begin:
@@ -512,14 +513,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
-                                                           sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
+                                                       sizeof(struct udphdr));
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
@@ -563,6 +568,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
        unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
        struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
        rcu_read_lock();
@@ -601,14 +607,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
@@ -1038,8 +1048,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        if (msg->msg_controllen) {
                err = ip_cmsg_send(sock_net(sk), msg, &ipc,
                                   sk->sk_family == AF_INET6);
-               if (err)
+               if (unlikely(err)) {
+                       kfree(ipc.opt);
                        return err;
+               }
                if (ipc.opt)
                        free = 1;
                connected = 0;
index bb7dabe2ebbf8aabc624af02c00dbc2b33ad544a..40c897515ddc44d57c434dcd761c2ddc5e7fad50 100644 (file)
@@ -69,6 +69,7 @@ config INET6_ESP
        select CRYPTO_CBC
        select CRYPTO_SHA1
        select CRYPTO_DES
+       select CRYPTO_ECHAINIV
        ---help---
          Support for IPsec ESP.
 
index 38eeddedfc21baa843b315d720cc94a28b6af605..bdd7eac4307a0802fb767e843c92dd81aa730d3a 100644 (file)
@@ -583,7 +583,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
        if (err < 0)
                goto errout;
 
-       err = EINVAL;
+       err = -EINVAL;
        if (!tb[NETCONFA_IFINDEX])
                goto errout;
 
@@ -3538,6 +3538,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 {
        struct inet6_dev *idev = ifp->idev;
        struct net_device *dev = idev->dev;
+       bool notify = false;
 
        addrconf_join_solict(dev, &ifp->addr);
 
@@ -3583,7 +3584,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
                        /* Because optimistic nodes can use this address,
                         * notify listeners. If DAD fails, RTM_DELADDR is sent.
                         */
-                       ipv6_ifa_notify(RTM_NEWADDR, ifp);
+                       notify = true;
                }
        }
 
@@ -3591,6 +3592,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 out:
        spin_unlock(&ifp->lock);
        read_unlock_bh(&idev->lock);
+       if (notify)
+               ipv6_ifa_notify(RTM_NEWADDR, ifp);
 }
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp)
index 517c55b01ba84b55a0004ce9505d14c4a3951cfc..428162155280ca2af782ea9fd9fa26e0d1666d89 100644 (file)
@@ -162,6 +162,9 @@ ipv4_connected:
        fl6.fl6_dport = inet->inet_dport;
        fl6.fl6_sport = inet->inet_sport;
 
+       if (!fl6.flowi6_oif)
+               fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
        if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
                fl6.flowi6_oif = np->mcast_oif;
 
index 1f9ebe3cbb4ac042edd0b05754d1fc03cdfe73cd..dc2db4f7b182c4ebc1a8a51487a3d2e893955df9 100644 (file)
@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
                }
                spin_lock_bh(&ip6_sk_fl_lock);
                for (sflp = &np->ipv6_fl_list;
-                    (sfl = rcu_dereference(*sflp)) != NULL;
+                    (sfl = rcu_dereference_protected(*sflp,
+                                                     lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
                     sflp = &sfl->next) {
                        if (sfl->fl->label == freq.flr_label) {
                                if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
                                        np->flow_label &= ~IPV6_FLOWLABEL_MASK;
-                               *sflp = rcu_dereference(sfl->next);
+                               *sflp = sfl->next;
                                spin_unlock_bh(&ip6_sk_fl_lock);
                                fl_release(sfl->fl);
                                kfree_rcu(sfl, rcu);
index f37f18b6b40c9547e24902f4af88bdf6f154f394..a69aad1e29d1ebb2429650cc083f0e4dd2bcaf86 100644 (file)
@@ -1512,6 +1512,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
        dev->destructor = ip6gre_dev_free;
 
        dev->features |= NETIF_F_NETNS_LOCAL;
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 }
 
 static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
index 23de98f976d5b7c846e42e9260acbac3b1632c17..a163102f1803e5f9eb4a60c501bd4adcc27bd62b 100644 (file)
@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
        struct rt6_info *rt;
 #endif
        int err;
+       int flags = 0;
 
        /* The correct way to handle this would be to do
         * ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
                        dst_release(*dst);
                        *dst = NULL;
                }
+
+               if (fl6->flowi6_oif)
+                       flags |= RT6_LOOKUP_F_IFACE;
        }
 
        if (!*dst)
-               *dst = ip6_route_output(net, sk, fl6);
+               *dst = ip6_route_output_flags(net, sk, fl6, flags);
 
        err = (*dst)->error;
        if (err)
index 31ba7ca19757083a1a83f8388b87be5695b1ba98..051b6a6bfff6ca7c0dd42ceb43a417260a11bd23 100644 (file)
 #include <net/ipv6.h>
 #include <net/netfilter/ipv6/nf_nat_masquerade.h>
 
+#define MAX_WORK_COUNT 16
+
+static atomic_t v6_worker_count;
+
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
                       const struct net_device *out)
@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = {
        .notifier_call  = masq_device_event,
 };
 
+struct masq_dev_work {
+       struct work_struct work;
+       struct net *net;
+       int ifindex;
+};
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+       struct masq_dev_work *w;
+       long index;
+
+       w = container_of(work, struct masq_dev_work, work);
+
+       index = w->ifindex;
+       nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+
+       put_net(w->net);
+       kfree(w);
+       atomic_dec(&v6_worker_count);
+       module_put(THIS_MODULE);
+}
+
+/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
+ * schedule.
+ *
+ * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * time if there are lots of conntracks and the system
+ * handles high softirq load, so it frequently calls cond_resched
+ * while iterating the conntrack table.
+ *
+ * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount
+ * of ipv6 addresses being deleted), we also need to add an upper
+ * limit to the number of queued work items.
+ */
 static int masq_inet_event(struct notifier_block *this,
                           unsigned long event, void *ptr)
 {
        struct inet6_ifaddr *ifa = ptr;
-       struct netdev_notifier_info info;
+       const struct net_device *dev;
+       struct masq_dev_work *w;
+       struct net *net;
+
+       if (event != NETDEV_DOWN ||
+           atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
+               return NOTIFY_DONE;
+
+       dev = ifa->idev->dev;
+       net = maybe_get_net(dev_net(dev));
+       if (!net)
+               return NOTIFY_DONE;
 
-       netdev_notifier_info_init(&info, ifa->idev->dev);
-       return masq_device_event(this, event, &info);
+       if (!try_module_get(THIS_MODULE))
+               goto err_module;
+
+       w = kmalloc(sizeof(*w), GFP_ATOMIC);
+       if (w) {
+               atomic_inc(&v6_worker_count);
+
+               INIT_WORK(&w->work, iterate_cleanup_work);
+               w->ifindex = dev->ifindex;
+               w->net = net;
+               schedule_work(&w->work);
+
+               return NOTIFY_DONE;
+       }
+
+       module_put(THIS_MODULE);
+ err_module:
+       put_net(net);
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block masq_inet_notifier = {
index 3c8834bc822d2b64b8d7f4b33a9042dd279c584a..ed446639219c3ae8832a19a1f944e4dd6ddc6302 100644 (file)
@@ -1183,11 +1183,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-                                   struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+                                        struct flowi6 *fl6, int flags)
 {
        struct dst_entry *dst;
-       int flags = 0;
        bool any_src;
 
        dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1208,7 +1207,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
 
        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 }
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
index e794ef66a40135f33b5731a1059a6f45d0fc90b7..2066d1c25a11bb18bbe75caf4abe38eeccaa4b61 100644 (file)
@@ -201,14 +201,14 @@ static int ipip6_tunnel_create(struct net_device *dev)
        if ((__force u16)t->parms.i_flags & SIT_ISATAP)
                dev->priv_flags |= IFF_ISATAP;
 
+       dev->rtnl_link_ops = &sit_link_ops;
+
        err = register_netdevice(dev);
        if (err < 0)
                goto out;
 
        ipip6_tunnel_clone_6rd(dev, sitn);
 
-       dev->rtnl_link_ops = &sit_link_ops;
-
        dev_hold(dev);
 
        ipip6_tunnel_link(sitn, t);
index 006396e31cb0dcedf359c5aaa068ab3d28f7501c..5c8c842730286ea4212222769d124f9e3d611333 100644 (file)
@@ -327,6 +327,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        struct tcp_sock *tp;
        __u32 seq, snd_una;
        struct sock *sk;
+       bool fatal;
        int err;
 
        sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@ -345,8 +346,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                return;
        }
        seq = ntohl(th->seq);
+       fatal = icmpv6_err_convert(type, code, &err);
        if (sk->sk_state == TCP_NEW_SYN_RECV)
-               return tcp_req_err(sk, seq);
+               return tcp_req_err(sk, seq, fatal);
 
        bh_lock_sock(sk);
        if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -400,7 +402,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                goto out;
        }
 
-       icmpv6_err_convert(type, code, &err);
 
        /* Might be for an request_sock */
        switch (sk->sk_state) {
@@ -1386,7 +1387,7 @@ process:
 
        if (sk->sk_state == TCP_NEW_SYN_RECV) {
                struct request_sock *req = inet_reqsk(sk);
-               struct sock *nsk = NULL;
+               struct sock *nsk;
 
                sk = req->rsk_listener;
                tcp_v6_fill_cb(skb, hdr, th);
@@ -1394,24 +1395,24 @@ process:
                        reqsk_put(req);
                        goto discard_it;
                }
-               if (likely(sk->sk_state == TCP_LISTEN)) {
-                       nsk = tcp_check_req(sk, skb, req, false);
-               } else {
+               if (unlikely(sk->sk_state != TCP_LISTEN)) {
                        inet_csk_reqsk_queue_drop_and_put(sk, req);
                        goto lookup;
                }
+               sock_hold(sk);
+               nsk = tcp_check_req(sk, skb, req, false);
                if (!nsk) {
                        reqsk_put(req);
-                       goto discard_it;
+                       goto discard_and_relse;
                }
                if (nsk == sk) {
-                       sock_hold(sk);
                        reqsk_put(req);
                        tcp_v6_restore_cb(skb);
                } else if (tcp_child_process(sk, nsk, skb)) {
                        tcp_v6_send_reset(nsk, skb);
-                       goto discard_it;
+                       goto discard_and_relse;
                } else {
+                       sock_put(sk);
                        return 0;
                }
        }
index 5d2c2afffe7b2c0790bc00505c518d21eb4b589b..22e28a44e3c88c6e52ff59ea1c7dd8a3e964e701 100644 (file)
@@ -257,6 +257,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
        struct sock *sk, *result;
        struct hlist_nulls_node *node;
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
 begin:
@@ -270,14 +271,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
-                                                           sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
+                                                       sizeof(struct udphdr));
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
@@ -321,6 +326,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
        unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
        struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
        rcu_read_lock();
@@ -358,14 +364,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
index 3c4caa60c9265dd95f62d7a3aba84d40cdba0492..5728e76ca6d51adabef2cde3e06b0b9184c2d07e 100644 (file)
@@ -134,11 +134,10 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
                return -1;
        }
        skb_put(skb, count);
+       pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
 
        spin_unlock_irqrestore(&self->spinlock, flags);
 
-       pr_debug("%s(), skb->len=%d\n", __func__ , skb->len);
-
        if (flush) {
                /* ircomm_tty_do_softint will take care of the rest */
                schedule_work(&self->tqueue);
index ef50a94d3eb726f75d4202a7a15c7dd1f812a3aa..fc3598a922b071503514af87deb603a154e2b33b 100644 (file)
@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
        if (!addr || addr->sa_family != AF_IUCV)
                return -EINVAL;
 
+       if (addr_len < sizeof(struct sockaddr_iucv))
+               return -EINVAL;
+
        lock_sock(sk);
        if (sk->sk_state != IUCV_OPEN) {
                err = -EBADFD;
index f93c5be612a7cb43611708ebb66c25b4db0d27cf..2caaa84ce92dac811c7813ebbb233c9596d03ca0 100644 (file)
@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family,
        ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
                                  NLM_F_ACK, tunnel, cmd);
 
-       if (ret >= 0)
-               return genlmsg_multicast_allns(family, msg, 0,  0, GFP_ATOMIC);
+       if (ret >= 0) {
+               ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+               /* We don't care if no one is listening */
+               if (ret == -ESRCH)
+                       ret = 0;
+               return ret;
+       }
 
        nlmsg_free(msg);
 
@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family,
        ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
                                   NLM_F_ACK, session, cmd);
 
-       if (ret >= 0)
-               return genlmsg_multicast_allns(family, msg, 0,  0, GFP_ATOMIC);
+       if (ret >= 0) {
+               ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+               /* We don't care if no one is listening */
+               if (ret == -ESRCH)
+                       ret = 0;
+               return ret;
+       }
 
        nlmsg_free(msg);
 
index f7fc0e00497fd73f858833e2cb54ec3058b177fe..978d3bc31df727468602a7b234d10b5b1e9ba35c 100644 (file)
@@ -1733,7 +1733,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
                if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
                        continue;
                sdata->u.ibss.last_scan_completed = jiffies;
-               ieee80211_queue_work(&local->hw, &sdata->work);
        }
        mutex_unlock(&local->iflist_mtx);
 }
index 6bcf0faa4a89dbf38ee125099ad8c328f378331f..8190bf27ebfff8b0a594b7c029130dc9b1b99957 100644 (file)
@@ -248,6 +248,7 @@ static void ieee80211_restart_work(struct work_struct *work)
 
        /* wait for scan work complete */
        flush_workqueue(local->workqueue);
+       flush_work(&local->sched_scan_stopped_work);
 
        WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
             "%s called with hardware scan in progress\n", __func__);
@@ -256,6 +257,11 @@ static void ieee80211_restart_work(struct work_struct *work)
        list_for_each_entry(sdata, &local->interfaces, list)
                flush_delayed_work(&sdata->dec_tailroom_needed_wk);
        ieee80211_scan_cancel(local);
+
+       /* make sure any new ROC will consider local->in_reconfig */
+       flush_delayed_work(&local->roc_work);
+       flush_work(&local->hw_roc_done);
+
        ieee80211_reconfig(local);
        rtnl_unlock();
 }
index fa28500f28fd9fd8d452602b0407741589f12e36..6f85b6ab8e51015ab9a0520822fc4daff6af92b0 100644 (file)
@@ -1370,17 +1370,6 @@ out:
        sdata_unlock(sdata);
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
-       struct ieee80211_sub_if_data *sdata;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(sdata, &local->interfaces, list)
-               if (ieee80211_vif_is_mesh(&sdata->vif) &&
-                   ieee80211_sdata_running(sdata))
-                       ieee80211_queue_work(&local->hw, &sdata->work);
-       rcu_read_unlock();
-}
 
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
index a1596344c3ba1c5fe590767f3c0483310d63121a..4a8019f79fb2cba035a316b0a8d56a7e8f25d614 100644 (file)
@@ -362,14 +362,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
        return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
 void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
 void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_stop(void);
 #else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
 static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
 { return false; }
 static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
index 1c342e2592c4ab8af62871a4e44da0f3029140b0..bfbb1acafdd1f004ff4960b2238fbc5343302f38 100644 (file)
@@ -4005,8 +4005,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
                if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
                        ieee80211_queue_work(&sdata->local->hw,
                                             &sdata->u.mgd.monitor_work);
-               /* and do all the other regular work too */
-               ieee80211_queue_work(&sdata->local->hw, &sdata->work);
        }
 }
 
index 8b2f4eaac2ba65f8e25ae47dc316bf288ef8e0e9..55a9c5b94ce128d06ffb3154173fa2ea62a53f9b 100644 (file)
@@ -252,14 +252,11 @@ static bool ieee80211_recalc_sw_work(struct ieee80211_local *local,
 static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
                                         unsigned long start_time)
 {
-       struct ieee80211_local *local = roc->sdata->local;
-
        if (WARN_ON(roc->notified))
                return;
 
        roc->start_time = start_time;
        roc->started = true;
-       roc->hw_begun = true;
 
        if (roc->mgmt_tx_cookie) {
                if (!WARN_ON(!roc->frame)) {
@@ -274,9 +271,6 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
        }
 
        roc->notified = true;
-
-       if (!local->ops->remain_on_channel)
-               ieee80211_recalc_sw_work(local, start_time);
 }
 
 static void ieee80211_hw_roc_start(struct work_struct *work)
@@ -291,6 +285,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
                if (!roc->started)
                        break;
 
+               roc->hw_begun = true;
                ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
        }
 
@@ -413,6 +408,10 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
                return;
        }
 
+       /* defer roc if driver is not started (i.e. during reconfig) */
+       if (local->in_reconfig)
+               return;
+
        roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
                               list);
 
@@ -534,8 +533,10 @@ ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local,
         * begin, otherwise they'll both be marked properly by the work
         * struct that runs once the driver notifies us of the beginning
         */
-       if (cur_roc->hw_begun)
+       if (cur_roc->hw_begun) {
+               new_roc->hw_begun = true;
                ieee80211_handle_roc_started(new_roc, now);
+       }
 
        return true;
 }
@@ -658,6 +659,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
                        queued = true;
                        roc->on_channel = tmp->on_channel;
                        ieee80211_handle_roc_started(roc, now);
+                       ieee80211_recalc_sw_work(local, now);
                        break;
                }
 
index a413e52f7691418116d928f684a0fc27b308c2d6..ae980ce8daff6023bd8e5f113dee6a57b13408cc 100644 (file)
@@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
        bool was_scanning = local->scanning;
        struct cfg80211_scan_request *scan_req;
        struct ieee80211_sub_if_data *scan_sdata;
+       struct ieee80211_sub_if_data *sdata;
 
        lockdep_assert_held(&local->mtx);
 
@@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
        ieee80211_mlme_notify_scan_completed(local);
        ieee80211_ibss_notify_scan_completed(local);
-       ieee80211_mesh_notify_scan_completed(local);
+
+       /* Requeue all the work that might have been ignored while
+        * the scan was in progress; if there was none this will
+        * just be a no-op for the particular interface.
+        */
+       list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+               if (ieee80211_sdata_running(sdata))
+                       ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+       }
+
        if (was_scanning)
                ieee80211_start_next_roc(local);
 }
@@ -1213,6 +1223,14 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
 
        trace_api_sched_scan_stopped(local);
 
+       /*
+        * this shouldn't really happen, so for simplicity
+        * simply ignore it, and let mac80211 reconfigure
+        * the sched scan later on.
+        */
+       if (local->in_reconfig)
+               return;
+
        schedule_work(&local->sched_scan_stopped_work);
 }
 EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
index 4402ad5b27d1530bf4cc9a0d674c6538f73cb568..a4a4f89d3ba01138c033a13a7d023a04a99e3e6d 100644 (file)
@@ -1453,7 +1453,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
 
        more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);
 
-       if (reason == IEEE80211_FRAME_RELEASE_PSPOLL)
+       if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
                driver_release_tids =
                        BIT(find_highest_prio_tid(driver_release_tids));
 
index 5bad05e9af90fc76201f4bd9a54b931ffd5c61bc..6101deb805a830a80acccbe05499ca4f4fc33988 100644 (file)
@@ -51,6 +51,11 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
        struct ieee80211_hdr *hdr = (void *)skb->data;
        int ac;
 
+       if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+               ieee80211_free_txskb(&local->hw, skb);
+               return;
+       }
+
        /*
         * This skb 'survived' a round-trip through the driver, and
         * hopefully the driver didn't mangle it too badly. However,
index 3943d4bf289c29b436765d82dd0f241c8b7f5530..58f58bd5202ff5ad99630bc7584a8afeb8c2cf26 100644 (file)
@@ -2043,16 +2043,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
                 */
                if (sched_scan_req->n_scan_plans > 1 ||
                    __ieee80211_request_sched_scan_start(sched_scan_sdata,
-                                                        sched_scan_req))
+                                                        sched_scan_req)) {
+                       RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
+                       RCU_INIT_POINTER(local->sched_scan_req, NULL);
                        sched_scan_stopped = true;
+               }
        mutex_unlock(&local->mtx);
 
        if (sched_scan_stopped)
                cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
 
  wake_up:
-       local->in_reconfig = false;
-       barrier();
+       if (local->in_reconfig) {
+               local->in_reconfig = false;
+               barrier();
+
+               /* Restart deferred ROCs */
+               mutex_lock(&local->mtx);
+               ieee80211_start_next_roc(local);
+               mutex_unlock(&local->mtx);
+       }
 
        if (local->monitors == local->open_count && local->monitors > 0)
                ieee80211_add_virtual_monitor(local);
index 8c067e6663a14adf43212eda165c33240bb87985..95e757c377f9598429f136ebd3c951f1bb9da11b 100644 (file)
@@ -891,7 +891,7 @@ config NETFILTER_XT_TARGET_TEE
        depends on IPV6 || IPV6=n
        depends on !NF_CONNTRACK || NF_CONNTRACK
        select NF_DUP_IPV4
-       select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
+       select NF_DUP_IPV6 if IPV6
        ---help---
        This option adds a "TEE" target with which a packet can be cloned and
        this clone be rerouted to another nexthop.
index 43d8c9896fa3e66ea7f76ee1b7934c3129a46fd3..f0f688db6213930ad568b645080eb171bb30f345 100644 (file)
@@ -164,8 +164,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
        };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-       if (e.cidr == 0)
-               return -EINVAL;
        if (adt == IPSET_TEST)
                e.cidr = HOST_MASK;
 
@@ -377,8 +375,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
        };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-       if (e.cidr == 0)
-               return -EINVAL;
        if (adt == IPSET_TEST)
                e.cidr = HOST_MASK;
 
index 3cb3cb831591ef79515b4bde66d5db825a732afb..f60b4fdeeb8cc4fc600506ecc58e2bdc4cad1654 100644 (file)
@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 
+static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly bool nf_conntrack_locks_all;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+       spin_lock(lock);
+       while (unlikely(nf_conntrack_locks_all)) {
+               spin_unlock(lock);
+               spin_lock(&nf_conntrack_locks_all_lock);
+               spin_unlock(&nf_conntrack_locks_all_lock);
+               spin_lock(lock);
+       }
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
 {
        h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
        h1 %= CONNTRACK_LOCKS;
        h2 %= CONNTRACK_LOCKS;
        if (h1 <= h2) {
-               spin_lock(&nf_conntrack_locks[h1]);
+               nf_conntrack_lock(&nf_conntrack_locks[h1]);
                if (h1 != h2)
                        spin_lock_nested(&nf_conntrack_locks[h2],
                                         SINGLE_DEPTH_NESTING);
        } else {
-               spin_lock(&nf_conntrack_locks[h2]);
+               nf_conntrack_lock(&nf_conntrack_locks[h2]);
                spin_lock_nested(&nf_conntrack_locks[h1],
                                 SINGLE_DEPTH_NESTING);
        }
@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
 {
        int i;
 
-       for (i = 0; i < CONNTRACK_LOCKS; i++)
-               spin_lock_nested(&nf_conntrack_locks[i], i);
+       spin_lock(&nf_conntrack_locks_all_lock);
+       nf_conntrack_locks_all = true;
+
+       for (i = 0; i < CONNTRACK_LOCKS; i++) {
+               spin_lock(&nf_conntrack_locks[i]);
+               spin_unlock(&nf_conntrack_locks[i]);
+       }
 }
 
 static void nf_conntrack_all_unlock(void)
 {
-       int i;
-
-       for (i = 0; i < CONNTRACK_LOCKS; i++)
-               spin_unlock(&nf_conntrack_locks[i]);
+       nf_conntrack_locks_all = false;
+       spin_unlock(&nf_conntrack_locks_all_lock);
 }
 
 unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@ restart:
        hash = hash_bucket(_hash, net);
        for (; i < net->ct.htable_size; i++) {
                lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (read_seqcount_retry(&net->ct.generation, sequence)) {
                        spin_unlock(lockp);
                        goto restart;
@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
        for (; *bucket < net->ct.htable_size; (*bucket)++) {
                lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                local_bh_disable();
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (*bucket < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
                                if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -1394,6 +1412,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
                }
                spin_unlock(lockp);
                local_bh_enable();
+               cond_resched();
        }
 
        for_each_possible_cpu(cpu) {
@@ -1406,6 +1425,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
                                set_bit(IPS_DYING_BIT, &ct->status);
                }
                spin_unlock_bh(&pcpu->lock);
+               cond_resched();
        }
        return NULL;
 found:
@@ -1422,6 +1442,8 @@ void nf_ct_iterate_cleanup(struct net *net,
        struct nf_conn *ct;
        unsigned int bucket = 0;
 
+       might_sleep();
+
        while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
                /* Time to push up daises... */
                if (del_timer(&ct->timeout))
@@ -1430,6 +1452,7 @@ void nf_ct_iterate_cleanup(struct net *net,
                /* ... else the timer will get him soon. */
 
                nf_ct_put(ct);
+               cond_resched();
        }
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
index bd9d31537905e4e372da427ac29ce5948f2af34c..3b40ec575cd56a7b73c0b25d9c8decdda979d842 100644 (file)
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
        }
        local_bh_disable();
        for (i = 0; i < net->ct.htable_size; i++) {
-               spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
                if (i < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
                                unhelp(h, me);
index dbb1bb3edb456594184bb211332d1ee799af70ef..355e8552fd5b77682295493a22dc0a03d9338255 100644 (file)
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
        for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
 restart:
                lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (cb->args[0] >= net->ct.htable_size) {
                        spin_unlock(lockp);
                        goto out;
index b6605e0008013972cd94500db5bd626cb9bf3064..5eefe4a355c640cf85c8328740a877c6909880ba 100644 (file)
@@ -224,12 +224,12 @@ static int __init nf_tables_netdev_init(void)
 
        nft_register_chain_type(&nft_filter_chain_netdev);
        ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
-       if (ret < 0)
+       if (ret < 0) {
                nft_unregister_chain_type(&nft_filter_chain_netdev);
-
+               return ret;
+       }
        register_netdevice_notifier(&nf_tables_netdev_notifier);
-
-       return ret;
+       return 0;
 }
 
 static void __exit nf_tables_netdev_exit(void)
index a7ba23353dab938b4048095e50da9eb463dfc477..857ae89633aff80cea1c6ac74355e5e183b4d780 100644 (file)
@@ -311,14 +311,14 @@ replay:
 #endif
                {
                        nfnl_unlock(subsys_id);
-                       netlink_ack(skb, nlh, -EOPNOTSUPP);
+                       netlink_ack(oskb, nlh, -EOPNOTSUPP);
                        return kfree_skb(skb);
                }
        }
 
        if (!ss->commit || !ss->abort) {
                nfnl_unlock(subsys_id);
-               netlink_ack(skb, nlh, -EOPNOTSUPP);
+               netlink_ack(oskb, nlh, -EOPNOTSUPP);
                return kfree_skb(skb);
        }
 
@@ -328,10 +328,12 @@ replay:
                nlh = nlmsg_hdr(skb);
                err = 0;
 
-               if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
-                   skb->len < nlh->nlmsg_len) {
-                       err = -EINVAL;
-                       goto ack;
+               if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+                   skb->len < nlh->nlmsg_len ||
+                   nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+                       nfnl_err_reset(&err_list);
+                       status |= NFNL_BATCH_FAILURE;
+                       goto done;
                }
 
                /* Only requests are handled by the kernel */
@@ -406,7 +408,7 @@ ack:
                                 * pointing to the batch header.
                                 */
                                nfnl_err_reset(&err_list);
-                               netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+                               netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
                                status |= NFNL_BATCH_FAILURE;
                                goto done;
                        }
index 5d010f27ac018d079058000c11acc66d7086b57a..2671b9deb103735ff203999286a8cfbdde434f12 100644 (file)
@@ -307,7 +307,7 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
 
        local_bh_disable();
        for (i = 0; i < net->ct.htable_size; i++) {
-               spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
                if (i < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
                                untimeout(h, timeout);
index 383c17138399aec2a5681580ddd3b6f200f29d67..b78c28ba465ff9c1eed6b6006b6c5e9d9da58072 100644 (file)
@@ -46,16 +46,14 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
                switch (priv->op) {
                case NFT_BYTEORDER_NTOH:
                        for (i = 0; i < priv->len / 8; i++) {
-                               src64 = get_unaligned_be64(&src[i]);
-                               src64 = be64_to_cpu((__force __be64)src64);
+                               src64 = get_unaligned((u64 *)&src[i]);
                                put_unaligned_be64(src64, &dst[i]);
                        }
                        break;
                case NFT_BYTEORDER_HTON:
                        for (i = 0; i < priv->len / 8; i++) {
                                src64 = get_unaligned_be64(&src[i]);
-                               src64 = (__force u64)cpu_to_be64(src64);
-                               put_unaligned_be64(src64, &dst[i]);
+                               put_unaligned(src64, (u64 *)&dst[i]);
                        }
                        break;
                }
index c7808fc19719c4687457d039586f9708179b804e..c9743f78f21999ae01ed735a63c4e856e79cdc37 100644 (file)
@@ -100,7 +100,7 @@ static int nft_counter_init(const struct nft_ctx *ctx,
 
        cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
        if (cpu_stats == NULL)
-               return ENOMEM;
+               return -ENOMEM;
 
        preempt_disable();
        this_cpu = this_cpu_ptr(cpu_stats);
@@ -138,7 +138,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
        cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
                                              GFP_ATOMIC);
        if (cpu_stats == NULL)
-               return ENOMEM;
+               return -ENOMEM;
 
        preempt_disable();
        this_cpu = this_cpu_ptr(cpu_stats);
index a0eb2161e3efdb3476f692ce242cafaff30e7f18..d4a4619fcebc927acd1bf730e044d29813d096d3 100644 (file)
@@ -127,6 +127,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                               NF_CT_LABELS_MAX_SIZE - size);
                return;
        }
+#endif
        case NFT_CT_BYTES: /* fallthrough */
        case NFT_CT_PKTS: {
                const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
@@ -138,7 +139,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                memcpy(dest, &count, sizeof(count));
                return;
        }
-#endif
        default:
                break;
        }
index b7c43def0dc69e1cd03db238e0174283d895003a..e118397254af026fd23ed3097ddc78c8ee9fc0c4 100644 (file)
@@ -228,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
        u8 nexthdr;
-       __be16 frag_off;
+       __be16 frag_off, oldlen, newlen;
        int tcphoff;
        int ret;
 
@@ -244,7 +244,12 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
                return NF_DROP;
        if (ret > 0) {
                ipv6h = ipv6_hdr(skb);
-               ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+               oldlen = ipv6h->payload_len;
+               newlen = htons(ntohs(oldlen) + ret);
+               if (skb->ip_summed == CHECKSUM_COMPLETE)
+                       skb->csum = csum_add(csum_sub(skb->csum, oldlen),
+                                            newlen);
+               ipv6h->payload_len = newlen;
        }
        return XT_CONTINUE;
 }
index 3eff7b67cdf2f5277c2004cf48c2f4e37c218068..6e57a3966dc5748a8cbb99cdf28c62ba1d5b90f1 100644 (file)
@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
        return XT_CONTINUE;
 }
 
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 static unsigned int
 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -131,7 +131,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
                .destroy    = tee_tg_destroy,
                .me         = THIS_MODULE,
        },
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
        {
                .name       = "TEE",
                .revision   = 1,
index 81dc1bb6e0168ed17cd2d60e6dc3a394f0f34e76..f1ffb34e253f4906275d4da99ce26306b94241eb 100644 (file)
@@ -2831,7 +2831,8 @@ static int netlink_dump(struct sock *sk)
         * reasonable static buffer based on the expected largest dump of a
         * single netdev. The outcome is MSG_TRUNC error.
         */
-       skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+       if (!netlink_rx_is_mmaped(sk))
+               skb_reserve(skb, skb_tailroom(skb) - alloc_size);
        netlink_skb_set_owner_r(skb, sk);
 
        len = cb->dump(skb, cb);
index 1605691d94144aee0fc50ffb17be05eca2b59675..5eb7694348b5b82a3e80dc6262912eef441ec88e 100644 (file)
@@ -90,7 +90,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
        int err;
        struct vxlan_config conf = {
                .no_share = true,
-               .flags = VXLAN_F_COLLECT_METADATA,
+               .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+               /* Don't restrict the packets that can be sent by MTU */
+               .mtu = IP_MAX_MTU,
        };
 
        if (!options) {
index f222885ac0c7397ed08c26106e68157d91267ff6..9481d55ff6cb2dd7a228964fa3d9dea154a5a9af 100644 (file)
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
 static void rds_ib_add_one(struct ib_device *device)
 {
        struct rds_ib_device *rds_ibdev;
-       struct ib_device_attr *dev_attr;
 
        /* Only handle IB (no iWARP) devices */
        if (device->node_type != RDMA_NODE_IB_CA)
                return;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               rdsdebug("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
                                 ibdev_to_node(device));
        if (!rds_ibdev)
-               goto free_attr;
+               return;
 
        spin_lock_init(&rds_ibdev->spinlock);
        atomic_set(&rds_ibdev->refcount, 1);
        INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
 
-       rds_ibdev->max_wrs = dev_attr->max_qp_wr;
-       rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
+       rds_ibdev->max_wrs = device->attrs.max_qp_wr;
+       rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
 
-       rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
-       rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
-               min_t(unsigned int, (dev_attr->max_mr / 2),
+       rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
+       rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
+               min_t(unsigned int, (device->attrs.max_mr / 2),
                      rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
 
-       rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
-               min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+       rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
+               min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
                      rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
 
-       rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
-       rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
+       rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
+       rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
 
        rds_ibdev->dev = device;
        rds_ibdev->pd = ib_alloc_pd(device);
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device)
        }
 
        rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
-                dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+                device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
                 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
                 rds_ibdev->max_8k_fmrs);
 
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device)
 
 put_dev:
        rds_ib_dev_put(rds_ibdev);
-free_attr:
-       kfree(dev_attr);
 }
 
 /*
index 576f1825fc55769f7242c75c771a50a6e8e5e93b..f4a9fff829e0e5193697ad334065dd423ab4fe0f 100644 (file)
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns);
 static void rds_iw_add_one(struct ib_device *device)
 {
        struct rds_iw_device *rds_iwdev;
-       struct ib_device_attr *dev_attr;
 
        /* Only handle iwarp devices */
        if (device->node_type != RDMA_NODE_RNIC)
                return;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               rdsdebug("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
        if (!rds_iwdev)
-               goto free_attr;
+               return;
 
        spin_lock_init(&rds_iwdev->spinlock);
 
-       rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
-       rds_iwdev->max_wrs = dev_attr->max_qp_wr;
-       rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
+       rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
+       rds_iwdev->max_wrs = device->attrs.max_qp_wr;
+       rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
 
        rds_iwdev->dev = device;
        rds_iwdev->pd = ib_alloc_pd(device);
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device)
        list_add_tail(&rds_iwdev->list, &rds_iw_devices);
 
        ib_set_client_data(device, &rds_iw_client, rds_iwdev);
-
-       goto free_attr;
+       return;
 
 err_mr:
        if (rds_iwdev->mr)
@@ -121,8 +110,6 @@ err_pd:
        ib_dealloc_pd(rds_iwdev->pd);
 free_dev:
        kfree(rds_iwdev);
-free_attr:
-       kfree(dev_attr);
 }
 
 static void rds_iw_remove_one(struct ib_device *device, void *client_data)
index f53bf3b6558b094b6e1b379568620157b4ffae17..cf5b69ab18294bb2fa36a00f2a19dbf02d0f3045 100644 (file)
@@ -1095,17 +1095,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
        return res;
 }
 
-static bool rfkill_readable(struct rfkill_data *data)
-{
-       bool r;
-
-       mutex_lock(&data->mtx);
-       r = !list_empty(&data->events);
-       mutex_unlock(&data->mtx);
-
-       return r;
-}
-
 static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
                               size_t count, loff_t *pos)
 {
@@ -1122,8 +1111,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
                        goto out;
                }
                mutex_unlock(&data->mtx);
+               /* since we re-check and it just compares pointers,
+                * using !list_empty() without locking isn't a problem
+                */
                ret = wait_event_interruptible(data->read_wait,
-                                              rfkill_readable(data));
+                                              !list_empty(&data->events));
                mutex_lock(&data->mtx);
 
                if (ret)
index b5c2cf2aa6d4bc00e5c4bac2b3326db3d785c8be..af1acf0098666f3ffa8bc5bd168525f550478fef 100644 (file)
@@ -1852,6 +1852,7 @@ reset:
        }
 
        tp = old_tp;
+       protocol = tc_skb_protocol(skb);
        goto reclassify;
 #endif
 }
index f26bdea875c1a8413774e767bb897012d969d2e6..a1cd778240cd7f2b14336c26ba6eec282c1c60b6 100644 (file)
@@ -403,6 +403,8 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
                if (len <= cl->deficit) {
                        cl->deficit -= len;
                        skb = qdisc_dequeue_peeked(cl->qdisc);
+                       if (unlikely(skb == NULL))
+                               goto out;
                        if (cl->qdisc->q.qlen == 0)
                                list_del(&cl->alist);
 
index bf61dfb8e09eea1034c7b9ddd40583752033230c..49d2cc751386f3208c67eed17ec9c3f15b801f50 100644 (file)
@@ -935,15 +935,22 @@ static struct sctp_association *__sctp_lookup_association(
                                        struct sctp_transport **pt)
 {
        struct sctp_transport *t;
+       struct sctp_association *asoc = NULL;
 
+       rcu_read_lock();
        t = sctp_addrs_lookup_transport(net, local, peer);
-       if (!t || t->dead)
-               return NULL;
+       if (!t || !sctp_transport_hold(t))
+               goto out;
 
-       sctp_association_hold(t->asoc);
+       asoc = t->asoc;
+       sctp_association_hold(asoc);
        *pt = t;
 
-       return t->asoc;
+       sctp_transport_put(t);
+
+out:
+       rcu_read_unlock();
+       return asoc;
 }
 
 /* Look up an association. protected by RCU read lock */
@@ -955,9 +962,7 @@ struct sctp_association *sctp_lookup_association(struct net *net,
 {
        struct sctp_association *asoc;
 
-       rcu_read_lock();
        asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
-       rcu_read_unlock();
 
        return asoc;
 }
index 684c5b31563badc1f097434211422f495d356aad..ded7d931a6a5b218c7bbd4481acdea1d2bc29325 100644 (file)
@@ -165,8 +165,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
        list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
                        transports) {
                addr = &transport->ipaddr;
-               if (transport->dead)
-                       continue;
 
                af = sctp_get_af_specific(addr->sa.sa_family);
                if (af->cmp_addr(addr, primary)) {
@@ -380,6 +378,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
        }
 
        transport = (struct sctp_transport *)v;
+       if (!sctp_transport_hold(transport))
+               return 0;
        assoc = transport->asoc;
        epb = &assoc->base;
        sk = epb->sk;
@@ -412,6 +412,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
                sk->sk_rcvbuf);
        seq_printf(seq, "\n");
 
+       sctp_transport_put(transport);
+
        return 0;
 }
 
@@ -489,12 +491,12 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
        }
 
        tsp = (struct sctp_transport *)v;
+       if (!sctp_transport_hold(tsp))
+               return 0;
        assoc = tsp->asoc;
 
        list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
                                transports) {
-               if (tsp->dead)
-                       continue;
                /*
                 * The remote address (ADDR)
                 */
@@ -544,6 +546,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
                seq_printf(seq, "\n");
        }
 
+       sctp_transport_put(tsp);
+
        return 0;
 }
 
index ab0d538a74ed593571cfaef02cd1bb7ce872abe6..1099e99a53c485402ddd9c0693ff5cdd707accca 100644 (file)
@@ -60,6 +60,8 @@
 #include <net/inet_common.h>
 #include <net/inet_ecn.h>
 
+#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
+
 /* Global data structures. */
 struct sctp_globals sctp_globals __read_mostly;
 
@@ -1355,6 +1357,8 @@ static __init int sctp_init(void)
        unsigned long limit;
        int max_share;
        int order;
+       int num_entries;
+       int max_entry_order;
 
        sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
 
@@ -1407,14 +1411,24 @@ static __init int sctp_init(void)
 
        /* Size and allocate the association hash table.
         * The methodology is similar to that of the tcp hash tables.
+        * Though not identical.  Start by getting a goal size
         */
        if (totalram_pages >= (128 * 1024))
                goal = totalram_pages >> (22 - PAGE_SHIFT);
        else
                goal = totalram_pages >> (24 - PAGE_SHIFT);
 
-       for (order = 0; (1UL << order) < goal; order++)
-               ;
+       /* Then compute the page order for said goal */
+       order = get_order(goal);
+
+       /* Now compute the required page order for the maximum sized table we
+        * want to create
+        */
+       max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
+                                   sizeof(struct sctp_bind_hashbucket));
+
+       /* Limit the page order by that maximum hash table size */
+       order = min(order, max_entry_order);
 
        /* Allocate and initialize the endpoint hash table.  */
        sctp_ep_hashsize = 64;
@@ -1430,20 +1444,35 @@ static __init int sctp_init(void)
                INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
        }
 
-       /* Allocate and initialize the SCTP port hash table.  */
+       /* Allocate and initialize the SCTP port hash table.
+        * Note that order is initalized to start at the max sized
+        * table we want to support.  If we can't get that many pages
+        * reduce the order and try again
+        */
        do {
-               sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
-                                       sizeof(struct sctp_bind_hashbucket);
-               if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
-                       continue;
                sctp_port_hashtable = (struct sctp_bind_hashbucket *)
                        __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
        } while (!sctp_port_hashtable && --order > 0);
+
        if (!sctp_port_hashtable) {
                pr_err("Failed bind hash alloc\n");
                status = -ENOMEM;
                goto err_bhash_alloc;
        }
+
+       /* Now compute the number of entries that will fit in the
+        * port hash space we allocated
+        */
+       num_entries = (1UL << order) * PAGE_SIZE /
+                     sizeof(struct sctp_bind_hashbucket);
+
+       /* And finish by rounding it down to the nearest power of two
+        * this wastes some memory of course, but its needed because
+        * the hash function operates based on the assumption that
+        * that the number of entries is a power of two
+        */
+       sctp_port_hashsize = rounddown_pow_of_two(num_entries);
+
        for (i = 0; i < sctp_port_hashsize; i++) {
                spin_lock_init(&sctp_port_hashtable[i].lock);
                INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
@@ -1452,7 +1481,8 @@ static __init int sctp_init(void)
        if (sctp_transport_hashtable_init())
                goto err_thash_alloc;
 
-       pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize);
+       pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
+               num_entries);
 
        sctp_sysctl_register();
 
index 2e21384697c2a6a5fd045142bcd9c39992d3867f..b5327bb77458f3ae68e0098c45d00307a56ed2c5 100644 (file)
@@ -259,12 +259,6 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
                goto out_unlock;
        }
 
-       /* Is this transport really dead and just waiting around for
-        * the timer to let go of the reference?
-        */
-       if (transport->dead)
-               goto out_unlock;
-
        /* Run through the state machine.  */
        error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
                           SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
@@ -380,12 +374,6 @@ void sctp_generate_heartbeat_event(unsigned long data)
                goto out_unlock;
        }
 
-       /* Is this structure just waiting around for us to actually
-        * get destroyed?
-        */
-       if (transport->dead)
-               goto out_unlock;
-
        error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
                           SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
                           asoc->state, asoc->ep, asoc,
index 9bb80ec4c08ff06f6e629078c5a926c3def3ce23..e878da0949dbfc0012d2e7985bf6e28386678d0f 100644 (file)
@@ -5538,6 +5538,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
        struct sctp_hmac_algo_param *hmacs;
        __u16 data_len = 0;
        u32 num_idents;
+       int i;
 
        if (!ep->auth_enable)
                return -EACCES;
@@ -5555,8 +5556,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
                return -EFAULT;
        if (put_user(num_idents, &p->shmac_num_idents))
                return -EFAULT;
-       if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
-               return -EFAULT;
+       for (i = 0; i < num_idents; i++) {
+               __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
+
+               if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
+                       return -EFAULT;
+       }
        return 0;
 }
 
@@ -6636,6 +6641,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
                        if (cmsgs->srinfo->sinfo_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+                             SCTP_SACK_IMMEDIATELY |
                              SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
@@ -6659,6 +6665,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
                        if (cmsgs->sinfo->snd_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+                             SCTP_SACK_IMMEDIATELY |
                              SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
index aab9e3f29755a58eb7584aa763b4b1a46d2e6608..a431c14044a46f98e9b73f66e28f229690ef3c01 100644 (file)
@@ -132,8 +132,6 @@ fail:
  */
 void sctp_transport_free(struct sctp_transport *transport)
 {
-       transport->dead = 1;
-
        /* Try to delete the heartbeat timer.  */
        if (del_timer(&transport->hb_timer))
                sctp_transport_put(transport);
@@ -169,7 +167,7 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
  */
 static void sctp_transport_destroy(struct sctp_transport *transport)
 {
-       if (unlikely(!transport->dead)) {
+       if (unlikely(atomic_read(&transport->refcnt))) {
                WARN(1, "Attempt to destroy undead transport %p!\n", transport);
                return;
        }
@@ -296,9 +294,9 @@ void sctp_transport_route(struct sctp_transport *transport,
 }
 
 /* Hold a reference to a transport.  */
-void sctp_transport_hold(struct sctp_transport *transport)
+int sctp_transport_hold(struct sctp_transport *transport)
 {
-       atomic_inc(&transport->refcnt);
+       return atomic_add_unless(&transport->refcnt, 1, 0);
 }
 
 /* Release a reference to a transport and clean up
index 799e65b944b9019cf7eee177f90f6d67a0fb66dd..cabf586f47d7d0d75a8cb77b44d497e76a34abd7 100644 (file)
@@ -740,7 +740,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
                default:
                        printk(KERN_CRIT "%s: bad return from "
                                "gss_fill_context: %zd\n", __func__, err);
-                       BUG();
+                       gss_msg->msg.errno = -EIO;
                }
                goto err_release_msg;
        }
index 5e4f815c2b34d22fba11ac2443e9d66bc27c779c..273bc3a35425ba99351eedb3f99244f0f9c07ab7 100644 (file)
@@ -771,7 +771,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        if (count == 0)
                return 0;
 
-       mutex_lock(&inode->i_mutex); /* protect against multiple concurrent
+       inode_lock(inode); /* protect against multiple concurrent
                              * readers on this file */
  again:
        spin_lock(&queue_lock);
@@ -784,7 +784,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        }
        if (rp->q.list.next == &cd->queue) {
                spin_unlock(&queue_lock);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                WARN_ON_ONCE(rp->offset);
                return 0;
        }
@@ -838,7 +838,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        }
        if (err == -EAGAIN)
                goto again;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err ? err :  count;
 }
 
@@ -909,9 +909,9 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
        if (!cd->cache_parse)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = cache_downcall(mapping, buf, count, cd);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return ret;
 }
@@ -1225,7 +1225,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
        if (bp[0] == '\\' && bp[1] == 'x') {
                /* HEX STRING */
                bp += 2;
-               while (len < bufsize) {
+               while (len < bufsize - 1) {
                        int h, l;
 
                        h = hex_to_bin(bp[0]);
index 14f45bf0410c688b25fafedfb6636cfa025ad06c..31789ef3e614484a4d5c75721d4b9f63fc735f6a 100644 (file)
@@ -172,7 +172,7 @@ rpc_close_pipes(struct inode *inode)
        int need_release;
        LIST_HEAD(free_list);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        spin_lock(&pipe->lock);
        need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
        pipe->nreaders = 0;
@@ -188,7 +188,7 @@ rpc_close_pipes(struct inode *inode)
        cancel_delayed_work_sync(&pipe->queue_timeout);
        rpc_inode_setowner(inode, NULL);
        RPC_I(inode)->pipe = NULL;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 static struct inode *
@@ -221,7 +221,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
        int first_open;
        int res = -ENXIO;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL)
                goto out;
@@ -237,7 +237,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
                pipe->nwriters++;
        res = 0;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -248,7 +248,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
        struct rpc_pipe_msg *msg;
        int last_close;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL)
                goto out;
@@ -278,7 +278,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
        if (last_close && pipe->ops->release_pipe)
                pipe->ops->release_pipe(inode);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return 0;
 }
 
@@ -290,7 +290,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
        struct rpc_pipe_msg *msg;
        int res = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL) {
                res = -EPIPE;
@@ -322,7 +322,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
                pipe->ops->destroy_msg(msg);
        }
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -332,11 +332,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
        struct inode *inode = file_inode(filp);
        int res;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        res = -EPIPE;
        if (RPC_I(inode)->pipe != NULL)
                res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -349,12 +349,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 
        poll_wait(filp, &rpci->waitq, wait);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (rpci->pipe == NULL)
                mask |= POLLERR | POLLHUP;
        else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
                mask |= POLLIN | POLLRDNORM;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return mask;
 }
 
@@ -367,10 +367,10 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
        switch (cmd) {
        case FIONREAD:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                pipe = RPC_I(inode)->pipe;
                if (pipe == NULL) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return -EPIPE;
                }
                spin_lock(&pipe->lock);
@@ -381,7 +381,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        len += msg->len - msg->copied;
                }
                spin_unlock(&pipe->lock);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return put_user(len, (int __user *)arg);
        default:
                return -EINVAL;
@@ -617,9 +617,9 @@ int rpc_rmdir(struct dentry *dentry)
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        error = __rpc_rmdir(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
@@ -701,9 +701,9 @@ static void rpc_depopulate(struct dentry *parent,
 {
        struct inode *dir = d_inode(parent);
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(dir, I_MUTEX_CHILD);
        __rpc_depopulate(parent, files, start, eof);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 }
 
 static int rpc_populate(struct dentry *parent,
@@ -715,7 +715,7 @@ static int rpc_populate(struct dentry *parent,
        struct dentry *dentry;
        int i, err;
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        for (i = start; i < eof; i++) {
                dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
                err = PTR_ERR(dentry);
@@ -739,11 +739,11 @@ static int rpc_populate(struct dentry *parent,
                if (err != 0)
                        goto out_bad;
        }
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return 0;
 out_bad:
        __rpc_depopulate(parent, files, start, eof);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
                        __FILE__, __func__, parent);
        return err;
@@ -757,7 +757,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
        struct inode *dir = d_inode(parent);
        int error;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        dentry = __rpc_lookup_create_exclusive(parent, name);
        if (IS_ERR(dentry))
                goto out;
@@ -770,7 +770,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
                        goto err_rmdir;
        }
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 err_rmdir:
        __rpc_rmdir(dir, dentry);
@@ -788,11 +788,11 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        if (depopulate != NULL)
                depopulate(dentry);
        error = __rpc_rmdir(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
@@ -828,7 +828,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
        if (pipe->ops->downcall == NULL)
                umode &= ~S_IWUGO;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        dentry = __rpc_lookup_create_exclusive(parent, name);
        if (IS_ERR(dentry))
                goto out;
@@ -837,7 +837,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
        if (err)
                goto out_err;
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 out_err:
        dentry = ERR_PTR(err);
@@ -865,9 +865,9 @@ rpc_unlink(struct dentry *dentry)
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        error = __rpc_rmpipe(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
index 2e98f4a243e57d4539b3f4048d54c4b53f3c12d5..37edea6fa92d9685570ad3c1b37cb5fda5c0b9b3 100644 (file)
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
        if (atomic_dec_and_test(&xprt->count))
                xprt_destroy(xprt);
 }
+EXPORT_SYMBOL_GPL(xprt_put);
index 33f99d3004f2718206ad6b245a214e20718ac540..dc9f3b513a05f3a2fc9027359f9e1e587f9d243b 100644 (file)
@@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 
 rpcrdma-y := transport.o rpc_rdma.o verbs.o \
        fmr_ops.o frwr_ops.o physical_ops.o \
-       svc_rdma.o svc_rdma_transport.o \
+       svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
        svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
        module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
index cc1251d0729707751d87537e1cbc6567aef1596c..2dcd7640eeb525d6a7f1ba8bd3eab047cda30f75 100644 (file)
@@ -341,6 +341,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
        rqst->rq_reply_bytes_recvd = 0;
        rqst->rq_bytes_sent = 0;
        rqst->rq_xid = headerp->rm_xid;
+
+       rqst->rq_private_buf.len = size;
        set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
 
        buf = &rqst->rq_rcv_buf;
index c6836844bd0eb65ffa7134a6a014d60b5b42af1f..e16567389e28f5eba1359ddf91802d63c4612c26 100644 (file)
@@ -190,12 +190,11 @@ static int
 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
             struct rpcrdma_create_data_internal *cdata)
 {
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        int depth, delta;
 
        ia->ri_max_frmr_depth =
                        min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-                             devattr->max_fast_reg_page_list_len);
+                             ia->ri_device->attrs.max_fast_reg_page_list_len);
        dprintk("RPC:       %s: device's max FR page list len = %u\n",
                __func__, ia->ri_max_frmr_depth);
 
@@ -222,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
        }
 
        ep->rep_attr.cap.max_send_wr *= depth;
-       if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
-               cdata->max_requests = devattr->max_qp_wr / depth;
+       if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
+               cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
                if (!cdata->max_requests)
                        return -EINVAL;
                ep->rep_attr.cap.max_send_wr = cdata->max_requests *
index 1b7051bdbdc863dcd149b397bbc3f84a8872c5b1..c846ca9f1ebaa661f1e4a93893752950d35a4b6a 100644 (file)
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD;
 static unsigned int min_ord = 1;
 static unsigned int max_ord = 4096;
 unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
 static unsigned int min_max_requests = 4;
 static unsigned int max_max_requests = 16384;
 unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-/* Temporary NFS request map and context caches */
-struct kmem_cache *svc_rdma_map_cachep;
-struct kmem_cache *svc_rdma_ctxt_cachep;
-
 struct workqueue_struct *svc_rdma_wq;
 
 /*
@@ -243,17 +240,16 @@ void svc_rdma_cleanup(void)
        svc_unreg_xprt_class(&svc_rdma_bc_class);
 #endif
        svc_unreg_xprt_class(&svc_rdma_class);
-       kmem_cache_destroy(svc_rdma_map_cachep);
-       kmem_cache_destroy(svc_rdma_ctxt_cachep);
 }
 
 int svc_rdma_init(void)
 {
        dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
        dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
-       dprintk("\tmax_requests     : %d\n", svcrdma_max_requests);
-       dprintk("\tsq_depth         : %d\n",
+       dprintk("\tmax_requests     : %u\n", svcrdma_max_requests);
+       dprintk("\tsq_depth         : %u\n",
                svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+       dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
        dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
        svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
@@ -264,39 +260,10 @@ int svc_rdma_init(void)
                svcrdma_table_header =
                        register_sysctl_table(svcrdma_root_table);
 
-       /* Create the temporary map cache */
-       svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
-                                               sizeof(struct svc_rdma_req_map),
-                                               0,
-                                               SLAB_HWCACHE_ALIGN,
-                                               NULL);
-       if (!svc_rdma_map_cachep) {
-               printk(KERN_INFO "Could not allocate map cache.\n");
-               goto err0;
-       }
-
-       /* Create the temporary context cache */
-       svc_rdma_ctxt_cachep =
-               kmem_cache_create("svc_rdma_ctxt_cache",
-                                 sizeof(struct svc_rdma_op_ctxt),
-                                 0,
-                                 SLAB_HWCACHE_ALIGN,
-                                 NULL);
-       if (!svc_rdma_ctxt_cachep) {
-               printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
-               goto err1;
-       }
-
        /* Register RDMA with the SVC transport switch */
        svc_reg_xprt_class(&svc_rdma_class);
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
        svc_reg_xprt_class(&svc_rdma_bc_class);
 #endif
        return 0;
- err1:
-       kmem_cache_destroy(svc_rdma_map_cachep);
- err0:
-       unregister_sysctl_table(svcrdma_table_header);
-       destroy_workqueue(svc_rdma_wq);
-       return -ENOMEM;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
new file mode 100644 (file)
index 0000000..65a7c23
--- /dev/null
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA (server-side).
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY        RPCDBG_SVCXPRT
+
+#undef SVCRDMA_BACKCHANNEL_DEBUG
+
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+                            struct xdr_buf *rcvbuf)
+{
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct kvec *dst, *src = &rcvbuf->head[0];
+       struct rpc_rqst *req;
+       unsigned long cwnd;
+       u32 credits;
+       size_t len;
+       __be32 xid;
+       __be32 *p;
+       int ret;
+
+       p = (__be32 *)src->iov_base;
+       len = src->iov_len;
+       xid = rmsgp->rm_xid;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+       pr_info("%s: xid=%08x, length=%zu\n",
+               __func__, be32_to_cpu(xid), len);
+       pr_info("%s: RPC/RDMA: %*ph\n",
+               __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+       pr_info("%s:      RPC: %*ph\n",
+               __func__, (int)len, p);
+#endif
+
+       ret = -EAGAIN;
+       if (src->iov_len < 24)
+               goto out_shortreply;
+
+       spin_lock_bh(&xprt->transport_lock);
+       req = xprt_lookup_rqst(xprt, xid);
+       if (!req)
+               goto out_notfound;
+
+       dst = &req->rq_private_buf.head[0];
+       memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+       if (dst->iov_len < len)
+               goto out_unlock;
+       memcpy(dst->iov_base, p, len);
+
+       credits = be32_to_cpu(rmsgp->rm_credit);
+       if (credits == 0)
+               credits = 1;    /* don't deadlock */
+       else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
+               credits = r_xprt->rx_buf.rb_bc_max_requests;
+
+       cwnd = xprt->cwnd;
+       xprt->cwnd = credits << RPC_CWNDSHIFT;
+       if (xprt->cwnd > cwnd)
+               xprt_release_rqst_cong(req->rq_task);
+
+       ret = 0;
+       xprt_complete_rqst(req->rq_task, rcvbuf->len);
+       rcvbuf->len = 0;
+
+out_unlock:
+       spin_unlock_bh(&xprt->transport_lock);
+out:
+       return ret;
+
+out_shortreply:
+       dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
+               xprt, src->iov_len);
+       goto out;
+
+out_notfound:
+       dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
+               xprt, be32_to_cpu(xid));
+
+       goto out_unlock;
+}
+
+/* Send a backwards direction RPC call.
+ *
+ * Caller holds the connection's mutex and has already marshaled
+ * the RPC/RDMA request.
+ *
+ * This is similar to svc_rdma_reply, but takes an rpc_rqst
+ * instead, does not support chunks, and avoids blocking memory
+ * allocation.
+ *
+ * XXX: There is still an opportunity to block in svc_rdma_send()
+ * if there are no SQ entries to post the Send. This may occur if
+ * the adapter has a small maximum SQ depth.
+ */
+static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+                             struct rpc_rqst *rqst)
+{
+       struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
+       struct svc_rdma_op_ctxt *ctxt;
+       struct svc_rdma_req_map *vec;
+       struct ib_send_wr send_wr;
+       int ret;
+
+       vec = svc_rdma_get_req_map(rdma);
+       ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+       if (ret)
+               goto out_err;
+
+       /* Post a recv buffer to handle the reply for this request. */
+       ret = svc_rdma_post_recv(rdma, GFP_NOIO);
+       if (ret) {
+               pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
+                      ret);
+               pr_err("svcrdma: closing transport %p.\n", rdma);
+               set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+               ret = -ENOTCONN;
+               goto out_err;
+       }
+
+       ctxt = svc_rdma_get_context(rdma);
+       ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
+       ctxt->count = 1;
+
+       ctxt->wr_op = IB_WR_SEND;
+       ctxt->direction = DMA_TO_DEVICE;
+       ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
+       ctxt->sge[0].length = sndbuf->len;
+       ctxt->sge[0].addr =
+           ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
+                           sndbuf->len, DMA_TO_DEVICE);
+       if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
+               ret = -EIO;
+               goto out_unmap;
+       }
+       atomic_inc(&rdma->sc_dma_used);
+
+       memset(&send_wr, 0, sizeof(send_wr));
+       send_wr.wr_id = (unsigned long)ctxt;
+       send_wr.sg_list = ctxt->sge;
+       send_wr.num_sge = 1;
+       send_wr.opcode = IB_WR_SEND;
+       send_wr.send_flags = IB_SEND_SIGNALED;
+
+       ret = svc_rdma_send(rdma, &send_wr);
+       if (ret) {
+               ret = -EIO;
+               goto out_unmap;
+       }
+
+out_err:
+       svc_rdma_put_req_map(rdma, vec);
+       dprintk("svcrdma: %s returns %d\n", __func__, ret);
+       return ret;
+
+out_unmap:
+       svc_rdma_unmap_dma(ctxt);
+       svc_rdma_put_context(ctxt, 1);
+       goto out_err;
+}
+
+/* Server-side transport endpoint wants a whole page for its send
+ * buffer. The client RPC code constructs the RPC header in this
+ * buffer before it invokes ->send_request.
+ *
+ * Returns NULL if there was a temporary allocation failure.
+ */
+static void *
+xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+{
+       struct rpc_rqst *rqst = task->tk_rqstp;
+       struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+       struct svcxprt_rdma *rdma;
+       struct page *page;
+
+       rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+
+       /* Prevent an infinite loop: try to make this case work */
+       if (size > PAGE_SIZE)
+               WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
+                         size);
+
+       page = alloc_page(RPCRDMA_DEF_GFP);
+       if (!page)
+               return NULL;
+
+       return page_address(page);
+}
+
+static void
+xprt_rdma_bc_free(void *buffer)
+{
+       /* No-op: ctxt and page have already been freed. */
+}
+
+static int
+rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
+{
+       struct rpc_xprt *xprt = rqst->rq_xprt;
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+       int rc;
+
+       /* Space in the send buffer for an RPC/RDMA header is reserved
+        * via xprt->tsh_size.
+        */
+       headerp->rm_xid = rqst->rq_xid;
+       headerp->rm_vers = rpcrdma_version;
+       headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+       headerp->rm_type = rdma_msg;
+       headerp->rm_body.rm_chunks[0] = xdr_zero;
+       headerp->rm_body.rm_chunks[1] = xdr_zero;
+       headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+       pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
+#endif
+
+       rc = svc_rdma_bc_sendto(rdma, rqst);
+       if (rc)
+               goto drop_connection;
+       return rc;
+
+drop_connection:
+       dprintk("svcrdma: failed to send bc call\n");
+       xprt_disconnect_done(xprt);
+       return -ENOTCONN;
+}
+
+/* Send an RPC call on the passive end of a transport
+ * connection.
+ */
+static int
+xprt_rdma_bc_send_request(struct rpc_task *task)
+{
+       struct rpc_rqst *rqst = task->tk_rqstp;
+       struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+       struct svcxprt_rdma *rdma;
+       int ret;
+
+       dprintk("svcrdma: sending bc call with xid: %08x\n",
+               be32_to_cpu(rqst->rq_xid));
+
+       if (!mutex_trylock(&sxprt->xpt_mutex)) {
+               rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
+               if (!mutex_trylock(&sxprt->xpt_mutex))
+                       return -EAGAIN;
+               rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
+       }
+
+       ret = -ENOTCONN;
+       rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+       if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+               ret = rpcrdma_bc_send_request(rdma, rqst);
+
+       mutex_unlock(&sxprt->xpt_mutex);
+
+       if (ret < 0)
+               return ret;
+       return 0;
+}
+
+static void
+xprt_rdma_bc_close(struct rpc_xprt *xprt)
+{
+       dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+}
+
+static void
+xprt_rdma_bc_put(struct rpc_xprt *xprt)
+{
+       dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+       xprt_free(xprt);
+       module_put(THIS_MODULE);
+}
+
+static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+       .reserve_xprt           = xprt_reserve_xprt_cong,
+       .release_xprt           = xprt_release_xprt_cong,
+       .alloc_slot             = xprt_alloc_slot,
+       .release_request        = xprt_release_rqst_cong,
+       .buf_alloc              = xprt_rdma_bc_allocate,
+       .buf_free               = xprt_rdma_bc_free,
+       .send_request           = xprt_rdma_bc_send_request,
+       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .close                  = xprt_rdma_bc_close,
+       .destroy                = xprt_rdma_bc_put,
+       .print_stats            = xprt_rdma_print_stats
+};
+
+static const struct rpc_timeout xprt_rdma_bc_timeout = {
+       .to_initval = 60 * HZ,
+       .to_maxval = 60 * HZ,
+};
+
+/* It shouldn't matter if the number of backchannel session slots
+ * doesn't match the number of RPC/RDMA credits. That just means
+ * one or the other will have extra slots that aren't used.
+ */
+static struct rpc_xprt *
+xprt_setup_rdma_bc(struct xprt_create *args)
+{
+       struct rpc_xprt *xprt;
+       struct rpcrdma_xprt *new_xprt;
+
+       if (args->addrlen > sizeof(xprt->addr)) {
+               dprintk("RPC:       %s: address too large\n", __func__);
+               return ERR_PTR(-EBADF);
+       }
+
+       xprt = xprt_alloc(args->net, sizeof(*new_xprt),
+                         RPCRDMA_MAX_BC_REQUESTS,
+                         RPCRDMA_MAX_BC_REQUESTS);
+       if (!xprt) {
+               dprintk("RPC:       %s: couldn't allocate rpc_xprt\n",
+                       __func__);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       xprt->timeout = &xprt_rdma_bc_timeout;
+       xprt_set_bound(xprt);
+       xprt_set_connected(xprt);
+       xprt->bind_timeout = RPCRDMA_BIND_TO;
+       xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+       xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+       xprt->prot = XPRT_TRANSPORT_BC_RDMA;
+       xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+       xprt->ops = &xprt_rdma_bc_procs;
+
+       memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+       xprt->addrlen = args->addrlen;
+       xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
+       xprt->resvport = 0;
+
+       xprt->max_payload = xprt_rdma_max_inline_read;
+
+       new_xprt = rpcx_to_rdmax(xprt);
+       new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
+
+       xprt_get(xprt);
+       args->bc_xprt->xpt_bc_xprt = xprt;
+       xprt->bc_xprt = args->bc_xprt;
+
+       if (!try_module_get(THIS_MODULE))
+               goto out_fail;
+
+       /* Final put for backchannel xprt is in __svc_rdma_free */
+       xprt_get(xprt);
+       return xprt;
+
+out_fail:
+       xprt_rdma_free_addresses(xprt);
+       args->bc_xprt->xpt_bc_xprt = NULL;
+       xprt_put(xprt);
+       xprt_free(xprt);
+       return ERR_PTR(-EINVAL);
+}
+
+struct xprt_class xprt_rdma_bc = {
+       .list                   = LIST_HEAD_INIT(xprt_rdma_bc.list),
+       .name                   = "rdma backchannel",
+       .owner                  = THIS_MODULE,
+       .ident                  = XPRT_TRANSPORT_BC_RDMA,
+       .setup                  = xprt_setup_rdma_bc,
+};
index ff4f01e527ecc08a1480ecba8f00d41a90a76571..c8b8a8b4181eafa75de0d673040b9f927fa74d9d 100644 (file)
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 
                head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
                head->arg.page_len += len;
+
                head->arg.len += len;
                if (!pg_off)
                        head->count++;
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
                        goto err;
                atomic_inc(&xprt->sc_dma_used);
 
-               /* The lkey here is either a local dma lkey or a dma_mr lkey */
-               ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+               ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->sge[pno].length = len;
                ctxt->count++;
 
@@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
        return ret;
 }
 
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+{
+       __be32 *p = (__be32 *)rmsgp;
+
+       if (!xprt->xpt_bc_xprt)
+               return false;
+
+       if (rmsgp->rm_type != rdma_msg)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+               return false;
+
+       /* sanity */
+       if (p[7] != rmsgp->rm_xid)
+               return false;
+       /* call direction */
+       if (p[8] == cpu_to_be32(RPC_CALL))
+               return false;
+
+       return true;
+}
+
 /*
  * Set up the rqstp thread context to point to the RQ buffer. If
  * necessary, pull additional data from the client with an RDMA_READ
@@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
                goto close_out;
        }
 
+       if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
+               ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+                                              &rqstp->rq_arg);
+               svc_rdma_put_context(ctxt, 0);
+               if (ret)
+                       goto repost;
+               return ret;
+       }
+
        /* Read read-list data. */
        ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
        if (ret > 0) {
@@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
        set_bit(XPT_CLOSE, &xprt->xpt_flags);
 defer:
        return 0;
+
+repost:
+       ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
+       if (ret) {
+               pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+                      ret);
+               pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
+               set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
+               ret = -ENOTCONN;
+       }
+       return ret;
 }
index 969a1ab75fc3c5fb8011157e4f57e8d08f560b42..df57f3ce6cd2cc3cae5fa2a709e62ceae2828a26 100644 (file)
@@ -50,9 +50,9 @@
 
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
-static int map_xdr(struct svcxprt_rdma *xprt,
-                  struct xdr_buf *xdr,
-                  struct svc_rdma_req_map *vec)
+int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
+                    struct xdr_buf *xdr,
+                    struct svc_rdma_req_map *vec)
 {
        int sge_no;
        u32 sge_bytes;
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt,
 
        if (xdr->len !=
            (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
-               pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+               pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
                return -EIO;
        }
 
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt,
                sge_no++;
        }
 
-       dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+       dprintk("svcrdma: %s: sge_no %d page_no %d "
                "page_base %u page_len %u head_len %zu tail_len %zu\n",
-               sge_no, page_no, xdr->page_base, xdr->page_len,
+               __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
                xdr->head[0].iov_len, xdr->tail[0].iov_len);
 
        vec->count = sge_no;
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
                                         sge[sge_no].addr))
                        goto err;
                atomic_inc(&xprt->sc_dma_used);
-               sge[sge_no].lkey = xprt->sc_dma_lkey;
+               sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->count++;
                sge_off = 0;
                sge_no++;
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
        int ret;
 
        /* Post a recv buffer to handle another request. */
-       ret = svc_rdma_post_recv(rdma);
+       ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
        if (ret) {
                printk(KERN_INFO
                       "svcrdma: could not post a receive buffer, err=%d."
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
        ctxt->count = 1;
 
        /* Prepare the SGE for the RPCRDMA Header */
-       ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+       ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
        ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
        ctxt->sge[0].addr =
            ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
                                         ctxt->sge[sge_no].addr))
                        goto err;
                atomic_inc(&rdma->sc_dma_used);
-               ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+               ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
                ctxt->sge[sge_no].length = sge_bytes;
        }
        if (byte_count != 0) {
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        /* Build an req vec for the XDR */
        ctxt = svc_rdma_get_context(rdma);
        ctxt->direction = DMA_TO_DEVICE;
-       vec = svc_rdma_get_req_map();
-       ret = map_xdr(rdma, &rqstp->rq_res, vec);
+       vec = svc_rdma_get_req_map(rdma);
+       ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
        if (ret)
                goto err0;
        inline_bytes = rqstp->rq_res.len;
 
        /* Create the RDMA response header */
-       res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+       ret = -ENOMEM;
+       res_page = alloc_page(GFP_KERNEL);
+       if (!res_page)
+               goto err0;
        rdma_resp = page_address(res_page);
        reply_ary = svc_rdma_get_reply_array(rdma_argp);
        if (reply_ary)
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 
        ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
                         inline_bytes);
-       svc_rdma_put_req_map(vec);
+       svc_rdma_put_req_map(rdma, vec);
        dprintk("svcrdma: send_reply returns %d\n", ret);
        return ret;
 
  err1:
        put_page(res_page);
  err0:
-       svc_rdma_put_req_map(vec);
+       svc_rdma_put_req_map(rdma, vec);
        svc_rdma_put_context(ctxt, 0);
        return ret;
 }
index b348b4adef29a48246709cc7f32cf576865753eb..5763825d09bf776bfa89f0007be1805203d96adf 100644 (file)
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
+                                          gfp_t flags)
 {
        struct svc_rdma_op_ctxt *ctxt;
 
-       ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
-                               GFP_KERNEL | __GFP_NOFAIL);
-       ctxt->xprt = xprt;
-       INIT_LIST_HEAD(&ctxt->dto_q);
+       ctxt = kmalloc(sizeof(*ctxt), flags);
+       if (ctxt) {
+               ctxt->xprt = xprt;
+               INIT_LIST_HEAD(&ctxt->free);
+               INIT_LIST_HEAD(&ctxt->dto_q);
+       }
+       return ctxt;
+}
+
+static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
+{
+       unsigned int i;
+
+       /* Each RPC/RDMA credit can consume a number of send
+        * and receive WQEs. One ctxt is allocated for each.
+        */
+       i = xprt->sc_sq_depth + xprt->sc_rq_depth;
+
+       while (i--) {
+               struct svc_rdma_op_ctxt *ctxt;
+
+               ctxt = alloc_ctxt(xprt, GFP_KERNEL);
+               if (!ctxt) {
+                       dprintk("svcrdma: No memory for RDMA ctxt\n");
+                       return false;
+               }
+               list_add(&ctxt->free, &xprt->sc_ctxts);
+       }
+       return true;
+}
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+{
+       struct svc_rdma_op_ctxt *ctxt = NULL;
+
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used++;
+       if (list_empty(&xprt->sc_ctxts))
+               goto out_empty;
+
+       ctxt = list_first_entry(&xprt->sc_ctxts,
+                               struct svc_rdma_op_ctxt, free);
+       list_del_init(&ctxt->free);
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+out:
        ctxt->count = 0;
        ctxt->frmr = NULL;
-       atomic_inc(&xprt->sc_ctxt_used);
        return ctxt;
+
+out_empty:
+       /* Either pre-allocation missed the mark, or send
+        * queue accounting is broken.
+        */
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+       ctxt = alloc_ctxt(xprt, GFP_NOIO);
+       if (ctxt)
+               goto out;
+
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used--;
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
+       return NULL;
 }
 
 void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -174,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
        for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
                /*
                 * Unmap the DMA addr in the SGE if the lkey matches
-                * the sc_dma_lkey, otherwise, ignore it since it is
+                * the local_dma_lkey, otherwise, ignore it since it is
                 * an FRMR lkey and will be unmapped later when the
                 * last WR that uses it completes.
                 */
-               if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+               if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
                        atomic_dec(&xprt->sc_dma_used);
                        ib_dma_unmap_page(xprt->sc_cm_id->device,
                                            ctxt->sge[i].addr,
@@ -190,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 
 void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 {
-       struct svcxprt_rdma *xprt;
+       struct svcxprt_rdma *xprt = ctxt->xprt;
        int i;
 
-       xprt = ctxt->xprt;
        if (free_pages)
                for (i = 0; i < ctxt->count; i++)
                        put_page(ctxt->pages[i]);
 
-       kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
-       atomic_dec(&xprt->sc_ctxt_used);
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used--;
+       list_add(&ctxt->free, &xprt->sc_ctxts);
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
 }
 
-/*
- * Temporary NFS req mappings are shared across all transport
- * instances. These are short lived and should be bounded by the number
- * of concurrent server threads * depth of the SQ.
- */
-struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
+{
+       while (!list_empty(&xprt->sc_ctxts)) {
+               struct svc_rdma_op_ctxt *ctxt;
+
+               ctxt = list_first_entry(&xprt->sc_ctxts,
+                                       struct svc_rdma_op_ctxt, free);
+               list_del(&ctxt->free);
+               kfree(ctxt);
+       }
+}
+
+static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
 {
        struct svc_rdma_req_map *map;
-       map = kmem_cache_alloc(svc_rdma_map_cachep,
-                              GFP_KERNEL | __GFP_NOFAIL);
+
+       map = kmalloc(sizeof(*map), flags);
+       if (map)
+               INIT_LIST_HEAD(&map->free);
+       return map;
+}
+
+static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
+{
+       unsigned int i;
+
+       /* One for each receive buffer on this connection. */
+       i = xprt->sc_max_requests;
+
+       while (i--) {
+               struct svc_rdma_req_map *map;
+
+               map = alloc_req_map(GFP_KERNEL);
+               if (!map) {
+                       dprintk("svcrdma: No memory for request map\n");
+                       return false;
+               }
+               list_add(&map->free, &xprt->sc_maps);
+       }
+       return true;
+}
+
+struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
+{
+       struct svc_rdma_req_map *map = NULL;
+
+       spin_lock(&xprt->sc_map_lock);
+       if (list_empty(&xprt->sc_maps))
+               goto out_empty;
+
+       map = list_first_entry(&xprt->sc_maps,
+                              struct svc_rdma_req_map, free);
+       list_del_init(&map->free);
+       spin_unlock(&xprt->sc_map_lock);
+
+out:
        map->count = 0;
        return map;
+
+out_empty:
+       spin_unlock(&xprt->sc_map_lock);
+
+       /* Pre-allocation amount was incorrect */
+       map = alloc_req_map(GFP_NOIO);
+       if (map)
+               goto out;
+
+       WARN_ONCE(1, "svcrdma: empty request map list?\n");
+       return NULL;
+}
+
+void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
+                         struct svc_rdma_req_map *map)
+{
+       spin_lock(&xprt->sc_map_lock);
+       list_add(&map->free, &xprt->sc_maps);
+       spin_unlock(&xprt->sc_map_lock);
 }
 
-void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
 {
-       kmem_cache_free(svc_rdma_map_cachep, map);
+       while (!list_empty(&xprt->sc_maps)) {
+               struct svc_rdma_req_map *map;
+
+               map = list_first_entry(&xprt->sc_maps,
+                                      struct svc_rdma_req_map, free);
+               list_del(&map->free);
+               kfree(map);
+       }
 }
 
 /* ib_cq event handler */
@@ -386,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 static void process_context(struct svcxprt_rdma *xprt,
                            struct svc_rdma_op_ctxt *ctxt)
 {
+       struct svc_rdma_op_ctxt *read_hdr;
+       int free_pages = 0;
+
        svc_rdma_unmap_dma(ctxt);
 
        switch (ctxt->wr_op) {
        case IB_WR_SEND:
-               if (ctxt->frmr)
-                       pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
-               svc_rdma_put_context(ctxt, 1);
+               free_pages = 1;
                break;
 
        case IB_WR_RDMA_WRITE:
-               if (ctxt->frmr)
-                       pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
-               svc_rdma_put_context(ctxt, 0);
                break;
 
        case IB_WR_RDMA_READ:
        case IB_WR_RDMA_READ_WITH_INV:
                svc_rdma_put_frmr(xprt, ctxt->frmr);
-               if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-                       struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
-                       if (read_hdr) {
-                               spin_lock_bh(&xprt->sc_rq_dto_lock);
-                               set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-                               list_add_tail(&read_hdr->dto_q,
-                                             &xprt->sc_read_complete_q);
-                               spin_unlock_bh(&xprt->sc_rq_dto_lock);
-                       } else {
-                               pr_err("svcrdma: ctxt->read_hdr == NULL\n");
-                       }
-                       svc_xprt_enqueue(&xprt->sc_xprt);
-               }
+
+               if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
+                       break;
+
+               read_hdr = ctxt->read_hdr;
                svc_rdma_put_context(ctxt, 0);
-               break;
+
+               spin_lock_bh(&xprt->sc_rq_dto_lock);
+               set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+               list_add_tail(&read_hdr->dto_q,
+                             &xprt->sc_read_complete_q);
+               spin_unlock_bh(&xprt->sc_rq_dto_lock);
+               svc_xprt_enqueue(&xprt->sc_xprt);
+               return;
 
        default:
-               printk(KERN_ERR "svcrdma: unexpected completion type, "
-                      "opcode=%d\n",
-                      ctxt->wr_op);
+               dprintk("svcrdma: unexpected completion opcode=%d\n",
+                       ctxt->wr_op);
                break;
        }
+
+       svc_rdma_put_context(ctxt, free_pages);
 }
 
 /*
@@ -523,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
+       INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+       INIT_LIST_HEAD(&cma_xprt->sc_maps);
        init_waitqueue_head(&cma_xprt->sc_send_wait);
 
        spin_lock_init(&cma_xprt->sc_lock);
        spin_lock_init(&cma_xprt->sc_rq_dto_lock);
        spin_lock_init(&cma_xprt->sc_frmr_q_lock);
-
-       cma_xprt->sc_ord = svcrdma_ord;
-
-       cma_xprt->sc_max_req_size = svcrdma_max_req_size;
-       cma_xprt->sc_max_requests = svcrdma_max_requests;
-       cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
-       atomic_set(&cma_xprt->sc_sq_count, 0);
-       atomic_set(&cma_xprt->sc_ctxt_used, 0);
+       spin_lock_init(&cma_xprt->sc_ctxt_lock);
+       spin_lock_init(&cma_xprt->sc_map_lock);
 
        if (listener)
                set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -543,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        return cma_xprt;
 }
 
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
+int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 {
        struct ib_recv_wr recv_wr, *bad_recv_wr;
        struct svc_rdma_op_ctxt *ctxt;
@@ -561,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                        pr_err("svcrdma: Too many sges (%d)\n", sge_no);
                        goto err_put_ctxt;
                }
-               page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+               page = alloc_page(flags);
+               if (!page)
+                       goto err_put_ctxt;
                ctxt->pages[sge_no] = page;
                pa = ib_dma_map_page(xprt->sc_cm_id->device,
                                     page, 0, PAGE_SIZE,
@@ -571,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                atomic_inc(&xprt->sc_dma_used);
                ctxt->sge[sge_no].addr = pa;
                ctxt->sge[sge_no].length = PAGE_SIZE;
-               ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+               ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->count = sge_no + 1;
                buflen += PAGE_SIZE;
        }
@@ -886,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        struct rdma_conn_param conn_param;
        struct ib_cq_init_attr cq_attr = {};
        struct ib_qp_init_attr qp_attr;
-       struct ib_device_attr devattr;
-       int uninitialized_var(dma_mr_acc);
-       int need_dma_mr = 0;
-       int ret;
-       int i;
+       struct ib_device *dev;
+       unsigned int i;
+       int ret = 0;
 
        listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
        clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -910,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
                newxprt, newxprt->sc_cm_id);
 
-       ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
-       if (ret) {
-               dprintk("svcrdma: could not query device attributes on "
-                       "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
-               goto errout;
-       }
+       dev = newxprt->sc_cm_id->device;
 
        /* Qualify the transport resource defaults with the
         * capabilities of this particular device */
-       newxprt->sc_max_sge = min((size_t)devattr.max_sge,
+       newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
                                  (size_t)RPCSVC_MAXPAGES);
-       newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+       newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
                                       RPCSVC_MAXPAGES);
-       newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
-                                  (size_t)svcrdma_max_requests);
-       newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
+       newxprt->sc_max_req_size = svcrdma_max_req_size;
+       newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
+                                        svcrdma_max_requests);
+       newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
+                                           svcrdma_max_bc_requests);
+       newxprt->sc_rq_depth = newxprt->sc_max_requests +
+                              newxprt->sc_max_bc_requests;
+       newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+
+       if (!svc_rdma_prealloc_ctxts(newxprt))
+               goto errout;
+       if (!svc_rdma_prealloc_maps(newxprt))
+               goto errout;
 
        /*
         * Limit ORD based on client limit, local device limit, and
         * configured svcrdma limit.
         */
-       newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+       newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
        newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
 
-       newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
+       newxprt->sc_pd = ib_alloc_pd(dev);
        if (IS_ERR(newxprt->sc_pd)) {
                dprintk("svcrdma: error creating PD for connect request\n");
                goto errout;
        }
        cq_attr.cqe = newxprt->sc_sq_depth;
-       newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+       newxprt->sc_sq_cq = ib_create_cq(dev,
                                         sq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
@@ -949,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
-       cq_attr.cqe = newxprt->sc_max_requests;
-       newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+       cq_attr.cqe = newxprt->sc_rq_depth;
+       newxprt->sc_rq_cq = ib_create_cq(dev,
                                         rq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
@@ -964,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        qp_attr.event_handler = qp_event_handler;
        qp_attr.qp_context = &newxprt->sc_xprt;
        qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
-       qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
+       qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
        qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
        qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
        qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -978,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                "    cap.max_send_sge = %d\n"
                "    cap.max_recv_sge = %d\n",
                newxprt->sc_cm_id, newxprt->sc_pd,
-               newxprt->sc_cm_id->device, newxprt->sc_pd->device,
+               dev, newxprt->sc_pd->device,
                qp_attr.cap.max_send_wr,
                qp_attr.cap.max_recv_wr,
                qp_attr.cap.max_send_sge,
@@ -1014,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
         *      of an RDMA_READ. IB does not.
         */
        newxprt->sc_reader = rdma_read_chunk_lcl;
-       if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+       if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
                newxprt->sc_frmr_pg_list_len =
-                       devattr.max_fast_reg_page_list_len;
+                       dev->attrs.max_fast_reg_page_list_len;
                newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
                newxprt->sc_reader = rdma_read_chunk_frmr;
        }
@@ -1024,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        /*
         * Determine if a DMA MR is required and if so, what privs are required
         */
-       if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                                newxprt->sc_cm_id->port_num) &&
-           !rdma_ib_or_roce(newxprt->sc_cm_id->device,
-                            newxprt->sc_cm_id->port_num))
+       if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
+           !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
                goto errout;
 
-       if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
-           !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
-               need_dma_mr = 1;
-               dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
-               if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                                       newxprt->sc_cm_id->port_num) &&
-                   !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
-                       dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
-       }
-
-       if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                               newxprt->sc_cm_id->port_num))
+       if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
                newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
 
-       /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
-       if (need_dma_mr) {
-               /* Register all of physical memory */
-               newxprt->sc_phys_mr =
-                       ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
-               if (IS_ERR(newxprt->sc_phys_mr)) {
-                       dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
-                               ret);
-                       goto errout;
-               }
-               newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
-       } else
-               newxprt->sc_dma_lkey =
-                       newxprt->sc_cm_id->device->local_dma_lkey;
-
        /* Post receive buffers */
-       for (i = 0; i < newxprt->sc_max_requests; i++) {
-               ret = svc_rdma_post_recv(newxprt);
+       for (i = 0; i < newxprt->sc_rq_depth; i++) {
+               ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
                if (ret) {
                        dprintk("svcrdma: failure posting receive buffers\n");
                        goto errout;
@@ -1160,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work)
 {
        struct svcxprt_rdma *rdma =
                container_of(work, struct svcxprt_rdma, sc_work);
-       dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+       struct svc_xprt *xprt = &rdma->sc_xprt;
+
+       dprintk("svcrdma: %s(%p)\n", __func__, rdma);
 
        /* We should only be called from kref_put */
-       if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+       if (atomic_read(&xprt->xpt_ref.refcount) != 0)
                pr_err("svcrdma: sc_xprt still in use? (%d)\n",
-                      atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
+                      atomic_read(&xprt->xpt_ref.refcount));
 
        /*
         * Destroy queued, but not processed read completions. Note
@@ -1193,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work)
        }
 
        /* Warn if we leaked a resource or under-referenced */
-       if (atomic_read(&rdma->sc_ctxt_used) != 0)
+       if (rdma->sc_ctxt_used != 0)
                pr_err("svcrdma: ctxt still in use? (%d)\n",
-                      atomic_read(&rdma->sc_ctxt_used));
+                      rdma->sc_ctxt_used);
        if (atomic_read(&rdma->sc_dma_used) != 0)
                pr_err("svcrdma: dma still in use? (%d)\n",
                       atomic_read(&rdma->sc_dma_used));
 
-       /* De-allocate fastreg mr */
+       /* Final put of backchannel client transport */
+       if (xprt->xpt_bc_xprt) {
+               xprt_put(xprt->xpt_bc_xprt);
+               xprt->xpt_bc_xprt = NULL;
+       }
+
        rdma_dealloc_frmr_q(rdma);
+       svc_rdma_destroy_ctxts(rdma);
+       svc_rdma_destroy_maps(rdma);
 
        /* Destroy the QP if present (not a listener) */
        if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1213,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work)
        if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
                ib_destroy_cq(rdma->sc_rq_cq);
 
-       if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
-               ib_dereg_mr(rdma->sc_phys_mr);
-
        if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
                ib_dealloc_pd(rdma->sc_pd);
 
@@ -1321,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
        int length;
        int ret;
 
-       p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+       p = alloc_page(GFP_KERNEL);
+       if (!p)
+               return;
        va = page_address(p);
 
        /* XDR encode error */
@@ -1341,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
                return;
        }
        atomic_inc(&xprt->sc_dma_used);
-       ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+       ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
        ctxt->sge[0].length = length;
 
        /* Prepare SEND WR */
index 740bddcf3488fe9c2e6db48f0b7967909a4caca0..b1b009f10ea375a3f63148973e58e486df1aa282 100644 (file)
@@ -63,7 +63,7 @@
  */
 
 static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
-static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
 static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = {
 
 #endif
 
-#define RPCRDMA_BIND_TO                (60U * HZ)
-#define RPCRDMA_INIT_REEST_TO  (5U * HZ)
-#define RPCRDMA_MAX_REEST_TO   (30U * HZ)
-#define RPCRDMA_IDLE_DISC_TO   (5U * 60 * HZ)
-
-static struct rpc_xprt_ops xprt_rdma_procs;    /* forward reference */
+static struct rpc_xprt_ops xprt_rdma_procs;    /*forward reference */
 
 static void
 xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
        xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
 }
 
-static void
+void
 xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
 {
        char buf[128];
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
        xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 }
 
-static void
+void
 xprt_rdma_free_addresses(struct rpc_xprt *xprt)
 {
        unsigned int i;
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
        if (req == NULL)
                return NULL;
 
-       flags = GFP_NOIO | __GFP_NOWARN;
+       flags = RPCRDMA_DEF_GFP;
        if (RPC_IS_SWAPPER(task))
                flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
 
@@ -642,7 +637,7 @@ drop_connection:
        return -ENOTCONN;       /* implies disconnect */
 }
 
-static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
        long idle_time = 0;
@@ -743,6 +738,11 @@ void xprt_rdma_cleanup(void)
 
        rpcrdma_destroy_wq();
        frwr_destroy_recovery_wq();
+
+       rc = xprt_unregister_transport(&xprt_rdma_bc);
+       if (rc)
+               dprintk("RPC:       %s: xprt_unregister(bc) returned %i\n",
+                       __func__, rc);
 }
 
 int xprt_rdma_init(void)
@@ -766,6 +766,14 @@ int xprt_rdma_init(void)
                return rc;
        }
 
+       rc = xprt_register_transport(&xprt_rdma_bc);
+       if (rc) {
+               xprt_unregister_transport(&xprt_rdma);
+               rpcrdma_destroy_wq();
+               frwr_destroy_recovery_wq();
+               return rc;
+       }
+
        dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
 
        dprintk("Defaults:\n");
index 732c71ce5dcab6758da0fe7661b877bfd8fa77ad..878f1bfb1db98f6972e641130d36a80cdf97faf8 100644 (file)
@@ -462,7 +462,6 @@ int
 rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 {
        struct rpcrdma_ia *ia = &xprt->rx_ia;
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        int rc;
 
        ia->ri_dma_mr = NULL;
@@ -482,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                goto out2;
        }
 
-       rc = ib_query_device(ia->ri_device, devattr);
-       if (rc) {
-               dprintk("RPC:       %s: ib_query_device failed %d\n",
-                       __func__, rc);
-               goto out3;
-       }
-
        if (memreg == RPCRDMA_FRMR) {
-               if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
-                   (devattr->max_fast_reg_page_list_len == 0)) {
+               if (!(ia->ri_device->attrs.device_cap_flags &
+                               IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+                   (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
                        dprintk("RPC:       %s: FRMR registration "
                                "not supported by HCA\n", __func__);
                        memreg = RPCRDMA_MTHCAFMR;
@@ -566,24 +559,23 @@ int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                                struct rpcrdma_create_data_internal *cdata)
 {
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        struct ib_cq *sendcq, *recvcq;
        struct ib_cq_init_attr cq_attr = {};
        unsigned int max_qp_wr;
        int rc, err;
 
-       if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+       if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
                dprintk("RPC:       %s: insufficient sge's available\n",
                        __func__);
                return -ENOMEM;
        }
 
-       if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+       if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
                dprintk("RPC:       %s: insufficient wqe's available\n",
                        __func__);
                return -ENOMEM;
        }
-       max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
+       max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
 
        /* check provider's send/recv wr limits */
        if (cdata->max_requests > max_qp_wr)
@@ -668,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
        /* Client offers RDMA Read but does not initiate */
        ep->rep_remote_cma.initiator_depth = 0;
-       if (devattr->max_qp_rd_atom > 32)       /* arbitrary but <= 255 */
+       if (ia->ri_device->attrs.max_qp_rd_atom > 32)   /* arbitrary but <= 255 */
                ep->rep_remote_cma.responder_resources = 32;
        else
                ep->rep_remote_cma.responder_resources =
-                                               devattr->max_qp_rd_atom;
+                                               ia->ri_device->attrs.max_qp_rd_atom;
 
        ep->rep_remote_cma.retry_count = 7;
        ep->rep_remote_cma.flow_control = 0;
index 728101ddc44bfc4b3b6247c8d13f3222cb0f1386..38fe11b0987528c3d345676202db487cae269a87 100644 (file)
 #define RDMA_RESOLVE_TIMEOUT   (5000)  /* 5 seconds */
 #define RDMA_CONNECT_RETRY_MAX (2)     /* retries if no listener backlog */
 
+#define RPCRDMA_BIND_TO                (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO  (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO   (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO   (5U * 60 * HZ)
+
 /*
  * Interface Adapter -- one per transport instance
  */
@@ -68,7 +73,6 @@ struct rpcrdma_ia {
        struct completion       ri_done;
        int                     ri_async_rc;
        unsigned int            ri_max_frmr_depth;
-       struct ib_device_attr   ri_devattr;
        struct ib_qp_attr       ri_qp_attr;
        struct ib_qp_init_attr  ri_qp_init_attr;
 };
@@ -142,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
        return (struct rpcrdma_msg *)rb->rg_base;
 }
 
+#define RPCRDMA_DEF_GFP                (GFP_NOIO | __GFP_NOWARN)
+
 /*
  * struct rpcrdma_rep -- this structure encapsulates state required to recv
  * and complete a reply, asychronously. It needs several pieces of
@@ -309,6 +315,8 @@ struct rpcrdma_buffer {
        u32                     rb_bc_srv_max_requests;
        spinlock_t              rb_reqslock;    /* protect rb_allreqs */
        struct list_head        rb_allreqs;
+
+       u32                     rb_bc_max_requests;
 };
 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
 
@@ -516,6 +524,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
 
 /* RPC/RDMA module init - xprtrdma/transport.c
  */
+extern unsigned int xprt_rdma_max_inline_read;
+void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
+void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
 int xprt_rdma_init(void);
 void xprt_rdma_cleanup(void);
 
@@ -531,11 +543,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
 void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-/* Temporary NFS request map cache. Created in svc_rdma.c  */
-extern struct kmem_cache *svc_rdma_map_cachep;
-/* WR context cache. Created in svc_rdma.c  */
-extern struct kmem_cache *svc_rdma_ctxt_cachep;
-/* Workqueue created in svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
+extern struct xprt_class xprt_rdma_bc;
 
 #endif                         /* _LINUX_SUNRPC_XPRT_RDMA_H */
index ebc661d3b6e37edb55d7c0a719ed9937a9f27a0d..47f7da58a7f0e93e3ffd632396a5a8db65c7f7c3 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
 #include <net/ip_fib.h>
 #include <net/switchdev.h>
 
@@ -567,7 +568,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
 
-static DEFINE_MUTEX(switchdev_mutex);
 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
 
 /**
@@ -582,9 +582,9 @@ int register_switchdev_notifier(struct notifier_block *nb)
 {
        int err;
 
-       mutex_lock(&switchdev_mutex);
+       rtnl_lock();
        err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
-       mutex_unlock(&switchdev_mutex);
+       rtnl_unlock();
        return err;
 }
 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -600,9 +600,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb)
 {
        int err;
 
-       mutex_lock(&switchdev_mutex);
+       rtnl_lock();
        err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
-       mutex_unlock(&switchdev_mutex);
+       rtnl_unlock();
        return err;
 }
 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -616,16 +616,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
  *     Call all network notifier blocks. This should be called by driver
  *     when it needs to propagate hardware event.
  *     Return values are same as for atomic_notifier_call_chain().
+ *     rtnl_lock must be held.
  */
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
                             struct switchdev_notifier_info *info)
 {
        int err;
 
+       ASSERT_RTNL();
+
        info->dev = dev;
-       mutex_lock(&switchdev_mutex);
        err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
-       mutex_unlock(&switchdev_mutex);
        return err;
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
index 0c2944fb9ae0d34ca8e153d9412c5896b47ffacf..347cdc99ed094ab8326071b69958296641576198 100644 (file)
@@ -1973,8 +1973,10 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
 
        hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
                          NLM_F_MULTI, TIPC_NL_LINK_GET);
-       if (!hdr)
+       if (!hdr) {
+               tipc_bcast_unlock(net);
                return -EMSGSIZE;
+       }
 
        attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
        if (!attrs)
index fa97d9649a2851f1e6d5c8a1f39f2e4f7603e61b..9d7a16fc5ca4b6ab3456c5129cde96664a6fb4fd 100644 (file)
@@ -346,12 +346,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
        skb_queue_head_init(&n->bc_entry.inputq2);
        for (i = 0; i < MAX_BEARERS; i++)
                spin_lock_init(&n->links[i].lock);
-       hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
-       list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
-               if (n->addr < temp_node->addr)
-                       break;
-       }
-       list_add_tail_rcu(&n->list, &temp_node->list);
        n->state = SELF_DOWN_PEER_LEAVING;
        n->signature = INVALID_NODE_SIG;
        n->active_links[0] = INVALID_BEARER_ID;
@@ -372,6 +366,12 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
        tipc_node_get(n);
        setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
        n->keepalive_intv = U32_MAX;
+       hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
+       list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+               if (n->addr < temp_node->addr)
+                       break;
+       }
+       list_add_tail_rcu(&n->list, &temp_node->list);
 exit:
        spin_unlock_bh(&tn->node_list_lock);
        return n;
index 350cca33ee0a64b08ce6135aa130011263eba136..69ee2eeef968851192035cb166410f1f37e7722f 100644 (file)
@@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
                                struct sockaddr_tipc *addr, void *usr_data,
                                void *buf, size_t len)
 {
-       struct tipc_subscriber *subscriber = usr_data;
+       struct tipc_subscriber *subscrb = usr_data;
        struct tipc_subscription *sub = NULL;
        struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-       tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
-       if (sub)
-               tipc_nametbl_subscribe(sub);
-       else
-               tipc_conn_terminate(tn->topsrv, subscriber->conid);
+       if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
+               return tipc_conn_terminate(tn->topsrv, subscrb->conid);
+
+       tipc_nametbl_subscribe(sub);
 }
 
 /* Handle one request to establish a new subscriber */
index c5bf5ef2bf89476b6d7ab766b8591e6af2e869fd..f75f847e688d4a9ca5b0bde01c3e052c88ca51cb 100644 (file)
@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
        UNIXCB(skb).fp = NULL;
 
        for (i = scm->fp->count-1; i >= 0; i--)
-               unix_notinflight(scm->fp->fp[i]);
+               unix_notinflight(scm->fp->user, scm->fp->fp[i]);
 }
 
 static void unix_destruct_scm(struct sk_buff *skb)
@@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
                return -ENOMEM;
 
        for (i = scm->fp->count - 1; i >= 0; i--)
-               unix_inflight(scm->fp->fp[i]);
+               unix_inflight(scm->fp->user, scm->fp->fp[i]);
        return max_level;
 }
 
@@ -1781,7 +1781,12 @@ restart_locked:
                        goto out_unlock;
        }
 
-       if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+       /* other == sk && unix_peer(other) != sk if
+        * - unix_peer(sk) == NULL, destination address bound to sk
+        * - unix_peer(sk) == sk by time of get but disconnected before lock
+        */
+       if (other != sk &&
+           unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
                if (timeo) {
                        timeo = unix_wait_for_peer(other, timeo);
 
@@ -2277,13 +2282,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
        size_t size = state->size;
        unsigned int last_len;
 
-       err = -EINVAL;
-       if (sk->sk_state != TCP_ESTABLISHED)
+       if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+               err = -EINVAL;
                goto out;
+       }
 
-       err = -EOPNOTSUPP;
-       if (flags & MSG_OOB)
+       if (unlikely(flags & MSG_OOB)) {
+               err = -EOPNOTSUPP;
                goto out;
+       }
 
        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
        timeo = sock_rcvtimeo(sk, noblock);
@@ -2305,6 +2312,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
                bool drop_skb;
                struct sk_buff *skb, *last;
 
+redo:
                unix_state_lock(sk);
                if (sock_flag(sk, SOCK_DEAD)) {
                        err = -ECONNRESET;
@@ -2329,9 +2337,11 @@ again:
                                goto unlock;
 
                        unix_state_unlock(sk);
-                       err = -EAGAIN;
-                       if (!timeo)
+                       if (!timeo) {
+                               err = -EAGAIN;
                                break;
+                       }
+
                        mutex_unlock(&u->readlock);
 
                        timeo = unix_stream_data_wait(sk, timeo, last,
@@ -2339,11 +2349,12 @@ again:
 
                        if (signal_pending(current)) {
                                err = sock_intr_errno(timeo);
+                               scm_destroy(&scm);
                                goto out;
                        }
 
                        mutex_lock(&u->readlock);
-                       continue;
+                       goto redo;
 unlock:
                        unix_state_unlock(sk);
                        break;
index c512f64d528766f940b30236bf3e3c8fa2047661..4d9679701a6df5113df3e24a1c9b86e2a63b3710 100644 (file)
@@ -220,7 +220,7 @@ done:
        return skb->len;
 }
 
-static struct sock *unix_lookup_by_ino(int ino)
+static struct sock *unix_lookup_by_ino(unsigned int ino)
 {
        int i;
        struct sock *sk;
index 8fcdc2283af50c5caecd409b79cae6028ca2c836..6a0d48525fcf9a71f54bb43495b200b300f5341e 100644 (file)
@@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp)
  * descriptor if it is for an AF_UNIX socket.
  */
 
-void unix_inflight(struct file *fp)
+void unix_inflight(struct user_struct *user, struct file *fp)
 {
        struct sock *s = unix_get_socket(fp);
 
@@ -133,11 +133,11 @@ void unix_inflight(struct file *fp)
                }
                unix_tot_inflight++;
        }
-       fp->f_cred->user->unix_inflight++;
+       user->unix_inflight++;
        spin_unlock(&unix_gc_lock);
 }
 
-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct user_struct *user, struct file *fp)
 {
        struct sock *s = unix_get_socket(fp);
 
@@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp)
                        list_del_init(&u->link);
                unix_tot_inflight--;
        }
-       fp->f_cred->user->unix_inflight--;
+       user->unix_inflight--;
        spin_unlock(&unix_gc_lock);
 }
 
index 7fd1220fbfa0bb2477e2b7e6edcf714a5ae6a097..bbe65dcb973834761c30cef249a537ba498898fb 100644 (file)
@@ -1557,8 +1557,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
        if (err < 0)
                goto out;
 
-       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
        while (total_written < len) {
                ssize_t written;
 
@@ -1578,7 +1576,9 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                                goto out_wait;
 
                        release_sock(sk);
+                       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                        timeout = schedule_timeout(timeout);
+                       finish_wait(sk_sleep(sk), &wait);
                        lock_sock(sk);
                        if (signal_pending(current)) {
                                err = sock_intr_errno(timeout);
@@ -1588,8 +1588,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                                goto out_wait;
                        }
 
-                       prepare_to_wait(sk_sleep(sk), &wait,
-                                       TASK_INTERRUPTIBLE);
                }
 
                /* These checks occur both as part of and after the loop
@@ -1635,7 +1633,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 out_wait:
        if (total_written > 0)
                err = total_written;
-       finish_wait(sk_sleep(sk), &wait);
 out:
        release_sock(sk);
        return err;
@@ -1716,7 +1713,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
        if (err < 0)
                goto out;
 
-       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
        while (1) {
                s64 ready = vsock_stream_has_data(vsk);
@@ -1727,7 +1723,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                         */
 
                        err = -ENOMEM;
-                       goto out_wait;
+                       goto out;
                } else if (ready > 0) {
                        ssize_t read;
 
@@ -1750,7 +1746,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                                        vsk, target, read,
                                        !(flags & MSG_PEEK), &recv_data);
                        if (err < 0)
-                               goto out_wait;
+                               goto out;
 
                        if (read >= target || flags & MSG_PEEK)
                                break;
@@ -1773,7 +1769,9 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                                break;
 
                        release_sock(sk);
+                       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                        timeout = schedule_timeout(timeout);
+                       finish_wait(sk_sleep(sk), &wait);
                        lock_sock(sk);
 
                        if (signal_pending(current)) {
@@ -1783,9 +1781,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                                err = -EAGAIN;
                                break;
                        }
-
-                       prepare_to_wait(sk_sleep(sk), &wait,
-                                       TASK_INTERRUPTIBLE);
                }
        }
 
@@ -1816,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                err = copied;
        }
 
-out_wait:
-       finish_wait(sk_sleep(sk), &wait);
 out:
        release_sock(sk);
        return err;
index 3b0ce1c484a3dd33f2e2c81edeeadb79a48e2dcc..547ceecc052310f3db3be04d827dff22d567dc1e 100644 (file)
@@ -231,20 +231,22 @@ static const struct ieee80211_regdomain world_regdom = {
                /* IEEE 802.11b/g, channels 1..11 */
                REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
                /* IEEE 802.11b/g, channels 12..13. */
-               REG_RULE(2467-10, 2472+10, 40, 6, 20,
-                       NL80211_RRF_NO_IR),
+               REG_RULE(2467-10, 2472+10, 20, 6, 20,
+                       NL80211_RRF_NO_IR | NL80211_RRF_AUTO_BW),
                /* IEEE 802.11 channel 14 - Only JP enables
                 * this and for 802.11b only */
                REG_RULE(2484-10, 2484+10, 20, 6, 20,
                        NL80211_RRF_NO_IR |
                        NL80211_RRF_NO_OFDM),
                /* IEEE 802.11a, channel 36..48 */
-               REG_RULE(5180-10, 5240+10, 160, 6, 20,
-                        NL80211_RRF_NO_IR),
+               REG_RULE(5180-10, 5240+10, 80, 6, 20,
+                        NL80211_RRF_NO_IR |
+                        NL80211_RRF_AUTO_BW),
 
                /* IEEE 802.11a, channel 52..64 - DFS required */
-               REG_RULE(5260-10, 5320+10, 160, 6, 20,
+               REG_RULE(5260-10, 5320+10, 80, 6, 20,
                        NL80211_RRF_NO_IR |
+                       NL80211_RRF_AUTO_BW |
                        NL80211_RRF_DFS),
 
                /* IEEE 802.11a, channel 100..144 - DFS required */
@@ -2745,7 +2747,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
        const struct ieee80211_power_rule *power_rule = NULL;
        char bw[32], cac_time[32];
 
-       pr_info("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
+       pr_debug("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
 
        for (i = 0; i < rd->n_reg_rules; i++) {
                reg_rule = &rd->reg_rules[i];
@@ -2772,7 +2774,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
                 * in certain regions
                 */
                if (power_rule->max_antenna_gain)
-                       pr_info("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
+                       pr_debug("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
                                freq_range->start_freq_khz,
                                freq_range->end_freq_khz,
                                bw,
@@ -2780,7 +2782,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
                                power_rule->max_eirp,
                                cac_time);
                else
-                       pr_info("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
+                       pr_debug("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
                                freq_range->start_freq_khz,
                                freq_range->end_freq_khz,
                                bw,
@@ -2813,35 +2815,35 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
                        struct cfg80211_registered_device *rdev;
                        rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
                        if (rdev) {
-                               pr_info("Current regulatory domain updated by AP to: %c%c\n",
+                               pr_debug("Current regulatory domain updated by AP to: %c%c\n",
                                        rdev->country_ie_alpha2[0],
                                        rdev->country_ie_alpha2[1]);
                        } else
-                               pr_info("Current regulatory domain intersected:\n");
+                               pr_debug("Current regulatory domain intersected:\n");
                } else
-                       pr_info("Current regulatory domain intersected:\n");
+                       pr_debug("Current regulatory domain intersected:\n");
        } else if (is_world_regdom(rd->alpha2)) {
-               pr_info("World regulatory domain updated:\n");
+               pr_debug("World regulatory domain updated:\n");
        } else {
                if (is_unknown_alpha2(rd->alpha2))
-                       pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
+                       pr_debug("Regulatory domain changed to driver built-in settings (unknown country)\n");
                else {
                        if (reg_request_cell_base(lr))
-                               pr_info("Regulatory domain changed to country: %c%c by Cell Station\n",
+                               pr_debug("Regulatory domain changed to country: %c%c by Cell Station\n",
                                        rd->alpha2[0], rd->alpha2[1]);
                        else
-                               pr_info("Regulatory domain changed to country: %c%c\n",
+                               pr_debug("Regulatory domain changed to country: %c%c\n",
                                        rd->alpha2[0], rd->alpha2[1]);
                }
        }
 
-       pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
+       pr_debug(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
        print_rd_rules(rd);
 }
 
 static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 {
-       pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
+       pr_debug("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
        print_rd_rules(rd);
 }
 
@@ -2862,7 +2864,8 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd,
                return -EALREADY;
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
@@ -2898,7 +2901,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
                return -EALREADY;
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
@@ -2956,7 +2960,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
         */
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
index e080746e1a6b528a218b416b5344368cd6db7065..48958d3cec9e38473ff3296f7f6bf4d5db41612a 100644 (file)
@@ -594,7 +594,8 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname)
                if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
                    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
                    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+                   strcmp(symname, ".TOC.") == 0)
                        return 1;
        /* Do not ignore this symbol */
        return 0;
diff --git a/scripts/prune-kernel b/scripts/prune-kernel
new file mode 100755 (executable)
index 0000000..ab5034e
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# because I use CONFIG_LOCALVERSION_AUTO, not the same version again and
+# again, /boot and /lib/modules/ eventually fill up.
+# Dumb script to purge that stuff:
+
+for f in "$@"
+do
+        if rpm -qf "/lib/modules/$f" >/dev/null; then
+                echo "keeping $f (installed from rpm)"
+        elif [ $(uname -r) = "$f" ]; then
+                echo "keeping $f (running kernel) "
+        else
+                echo "removing $f"
+                rm -f "/boot/initramfs-$f.img" "/boot/System.map-$f"
+                rm -f "/boot/vmlinuz-$f"   "/boot/config-$f"
+                rm -rf "/lib/modules/$f"
+                new-kernel-pkg --remove $f
+        fi
+done
index 16622aef9bdea83bee67e5e0080c7b9a17d240ae..28414b0207ce58f1fdd8503da0f61a39ba5c7cfe 100644 (file)
@@ -99,7 +99,7 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
 
        dir = d_inode(parent);
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry))
                goto out;
@@ -129,14 +129,14 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
        }
        d_instantiate(dentry, inode);
        dget(dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 
 out1:
        dput(dentry);
        dentry = ERR_PTR(error);
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        simple_release_fs(&mount, &mount_count);
        return dentry;
 }
@@ -195,7 +195,7 @@ void securityfs_remove(struct dentry *dentry)
        if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        if (simple_positive(dentry)) {
                if (d_is_dir(dentry))
                        simple_rmdir(d_inode(parent), dentry);
@@ -203,7 +203,7 @@ void securityfs_remove(struct dentry *dentry)
                        simple_unlink(d_inode(parent), dentry);
                dput(dentry);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        simple_release_fs(&mount, &mount_count);
 }
 EXPORT_SYMBOL_GPL(securityfs_remove);
index f7160253f17faad71c17a86b19833123a2edebd4..e6ea9d4b1de91a8d58e3ccab0fd3cef222ea6fcd 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/integrity.h>
 #include <linux/evm.h>
 #include <crypto/hash.h>
+#include <crypto/algapi.h>
 #include "evm.h"
 
 int evm_initialized;
@@ -148,7 +149,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                                   xattr_value_len, calc.digest);
                if (rc)
                        break;
-               rc = memcmp(xattr_data->digest, calc.digest,
+               rc = crypto_memneq(xattr_data->digest, calc.digest,
                            sizeof(calc.digest));
                if (rc)
                        rc = -EINVAL;
index c21f09bf8b99210f3c3dcbff4497e1194848db2c..9d96551d0196cb3e7136ace6e747f9504c07fc7a 100644 (file)
@@ -121,7 +121,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
        if (!(mode & FMODE_WRITE))
                return;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (atomic_read(&inode->i_writecount) == 1) {
                if ((iint->version != inode->i_version) ||
                    (iint->flags & IMA_NEW_FILE)) {
@@ -130,7 +130,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
                                ima_update_xattr(iint, file);
                }
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 /**
@@ -186,7 +186,7 @@ static int process_measurement(struct file *file, int mask, int function,
        if (action & IMA_FILE_APPRAISE)
                function = FILE_CHECK;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (action) {
                iint = integrity_inode_get(inode);
@@ -250,7 +250,7 @@ out_free:
        if (pathbuf)
                __putname(pathbuf);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
                return -EACCES;
        return 0;
index 07a87311055c5b50916c87f2117f4c70b0321cfd..09ef276c4bdcaf8e0c419df61d1cd7b07bf5253c 100644 (file)
@@ -430,7 +430,8 @@ static int __key_instantiate_and_link(struct key *key,
 
                        /* and link it into the destination keyring */
                        if (keyring) {
-                               set_bit(KEY_FLAG_KEEP, &key->flags);
+                               if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+                                       set_bit(KEY_FLAG_KEEP, &key->flags);
 
                                __key_link(key, _edit);
                        }
index f8110cfd80ff64bf05ef428f03dff935e6d704c4..f1ab71504e1d59e1c6aaa7c0d22870e371ec85b3 100644 (file)
@@ -3249,7 +3249,7 @@ static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t
 
 static void selinux_inode_getsecid(struct inode *inode, u32 *secid)
 {
-       struct inode_security_struct *isec = inode_security(inode);
+       struct inode_security_struct *isec = inode_security_novalidate(inode);
        *secid = isec->sid;
 }
 
index 2bbb41822d8ec8882f8dacbbb4c5f8a1feac59ca..8495b93681906bd39f4065723461cddac2e7d347 100644 (file)
@@ -83,6 +83,7 @@ static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
        { TCPDIAG_GETSOCK,      NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
        { DCCPDIAG_GETSOCK,     NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
        { SOCK_DIAG_BY_FAMILY,  NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+       { SOCK_DESTROY,         NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE },
 };
 
 static struct nlmsg_perm nlmsg_xfrm_perms[] =
index 732c1c77dccd8be483d382e621569e4008b4db0c..1b1fd27de632692dc307e5a72cf65898f0e245b3 100644 (file)
@@ -380,9 +380,9 @@ static int sel_open_policy(struct inode *inode, struct file *filp)
                goto err;
 
        if (i_size_read(inode) != security_policydb_len()) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, security_policydb_len());
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        rc = security_read_policy(&plm->data, &plm->len);
index e3e949126a5639c6a4a623750ec35922704c5110..a2a1e24becc6b8f2f6288a8853b02e6dd884becd 100644 (file)
@@ -97,11 +97,11 @@ config SND_PCM_TIMER
        bool "PCM timer interface" if EXPERT
        default y
        help
-         If you disable this option, pcm timer will be inavailable, so
-         those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+         If you disable this option, pcm timer will be unavailable, so
+         those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
          incorrectlly.
 
-         For some embedded device, we may disable it to reduce memory
+         For some embedded devices, we may disable it to reduce memory
          footprint, about 20KB on x86_64 platform.
 
 config SND_SEQUENCER_OSS
index 18b8dc45bb8f77ea0c2ce16e79305c7872f057bc..7fac3cae8abd0a232a3d7a1c2563999495df6ec2 100644 (file)
 #include <sound/compress_offload.h>
 #include <sound/compress_driver.h>
 
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
 /* TODO:
  * - add substream support for multiple devices in case of
  *     SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@ out:
        return retval;
 }
 
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 static int
 snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
 {
@@ -463,6 +471,7 @@ out:
        kfree(caps);
        return retval;
 }
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
 
 /* revisit this with snd_pcm_preallocate_xxx */
 static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
        case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
                retval = snd_compr_get_caps(stream, arg);
                break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
        case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
                retval = snd_compr_get_codec_caps(stream, arg);
                break;
+#endif
        case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
                retval = snd_compr_set_params(stream, arg);
                break;
index 196a6fe100ca8f40cc7f9b01eb00dc81c713ba93..a85d45595d02a265f1f8e1a4cd36c812d8cd08f0 100644 (file)
@@ -1405,6 +1405,8 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file,
                return -EFAULT;
        if (tlv.length < sizeof(unsigned int) * 2)
                return -EINVAL;
+       if (!tlv.numid)
+               return -EINVAL;
        down_read(&card->controls_rwsem);
        kctl = snd_ctl_find_numid(card, tlv.numid);
        if (kctl == NULL) {
index f845ecf7e172935f938bc52ecfe9c95c7bac4e11..656d9a9032dc2165d1f30f4ef193250a07bec555 100644 (file)
@@ -90,7 +90,7 @@ static int snd_hrtimer_start(struct snd_timer *t)
        struct snd_hrtimer *stime = t->private_data;
 
        atomic_set(&stime->running, 0);
-       hrtimer_cancel(&stime->hrt);
+       hrtimer_try_to_cancel(&stime->hrt);
        hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution),
                      HRTIMER_MODE_REL);
        atomic_set(&stime->running, 1);
@@ -101,6 +101,7 @@ static int snd_hrtimer_stop(struct snd_timer *t)
 {
        struct snd_hrtimer *stime = t->private_data;
        atomic_set(&stime->running, 0);
+       hrtimer_try_to_cancel(&stime->hrt);
        return 0;
 }
 
index 0e73d03b30e3f0d712b26f65453346e419caa96d..ebc9fdfe64df618088a8d55efff8b5312af3385d 100644 (file)
@@ -835,7 +835,8 @@ static int choose_rate(struct snd_pcm_substream *substream,
        return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL);
 }
 
-static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream)
+static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream,
+                                    bool trylock)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_pcm_hw_params *params, *sparams;
@@ -849,7 +850,10 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream)
        struct snd_mask sformat_mask;
        struct snd_mask mask;
 
-       if (mutex_lock_interruptible(&runtime->oss.params_lock))
+       if (trylock) {
+               if (!(mutex_trylock(&runtime->oss.params_lock)))
+                       return -EAGAIN;
+       } else if (mutex_lock_interruptible(&runtime->oss.params_lock))
                return -EINTR;
        sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL);
        params = kmalloc(sizeof(*params), GFP_KERNEL);
@@ -1092,7 +1096,7 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil
                if (asubstream == NULL)
                        asubstream = substream;
                if (substream->runtime->oss.params) {
-                       err = snd_pcm_oss_change_params(substream);
+                       err = snd_pcm_oss_change_params(substream, false);
                        if (err < 0)
                                return err;
                }
@@ -1132,7 +1136,7 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream)
                return 0;
        runtime = substream->runtime;
        if (runtime->oss.params) {
-               err = snd_pcm_oss_change_params(substream);
+               err = snd_pcm_oss_change_params(substream, false);
                if (err < 0)
                        return err;
        }
@@ -2163,7 +2167,7 @@ static int snd_pcm_oss_get_space(struct snd_pcm_oss_file *pcm_oss_file, int stre
        runtime = substream->runtime;
 
        if (runtime->oss.params &&
-           (err = snd_pcm_oss_change_params(substream)) < 0)
+           (err = snd_pcm_oss_change_params(substream, false)) < 0)
                return err;
 
        info.fragsize = runtime->oss.period_bytes;
@@ -2804,7 +2808,12 @@ static int snd_pcm_oss_mmap(struct file *file, struct vm_area_struct *area)
                return -EIO;
        
        if (runtime->oss.params) {
-               if ((err = snd_pcm_oss_change_params(substream)) < 0)
+               /* use mutex_trylock() for params_lock for avoiding a deadlock
+                * between mmap_sem and params_lock taken by
+                * copy_from/to_user() in snd_pcm_oss_write/read()
+                */
+               err = snd_pcm_oss_change_params(substream, true);
+               if (err < 0)
                        return err;
        }
 #ifdef CONFIG_SND_PCM_OSS_PLUGINS
index b48b434444ed0e0239936887a891d15da0054e00..9630e9f72b7ba2ad77f50a516ecc559c0c200975 100644 (file)
@@ -255,10 +255,15 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
        if (! (runtime = substream->runtime))
                return -ENOTTY;
 
-       /* only fifo_size is different, so just copy all */
-       data = memdup_user(data32, sizeof(*data32));
-       if (IS_ERR(data))
-               return PTR_ERR(data);
+       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       /* only fifo_size (RO from userspace) is different, so just copy all */
+       if (copy_from_user(data, data32, sizeof(*data32))) {
+               err = -EFAULT;
+               goto error;
+       }
 
        if (refine)
                err = snd_pcm_hw_refine(substream, data);
index fadd3eb8e8bb2d77264fafec9be05ba54e36af85..9106d8e2300eab3e566f9a3807c469ee02aa7bcd 100644 (file)
@@ -74,6 +74,18 @@ static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream);
 static DEFINE_RWLOCK(snd_pcm_link_rwlock);
 static DECLARE_RWSEM(snd_pcm_link_rwsem);
 
+/* Writer in rwsem may block readers even during its waiting in queue,
+ * and this may lead to a deadlock when the code path takes read sem
+ * twice (e.g. one in snd_pcm_action_nonatomic() and another in
+ * snd_pcm_stream_lock()).  As a (suboptimal) workaround, let writer to
+ * spin until it gets the lock.
+ */
+static inline void down_write_nonblock(struct rw_semaphore *lock)
+{
+       while (!down_write_trylock(lock))
+               cond_resched();
+}
+
 /**
  * snd_pcm_stream_lock - Lock the PCM stream
  * @substream: PCM substream
@@ -1813,7 +1825,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
                res = -ENOMEM;
                goto _nolock;
        }
-       down_write(&snd_pcm_link_rwsem);
+       down_write_nonblock(&snd_pcm_link_rwsem);
        write_lock_irq(&snd_pcm_link_rwlock);
        if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN ||
            substream->runtime->status->state != substream1->runtime->status->state ||
@@ -1860,7 +1872,7 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream)
        struct snd_pcm_substream *s;
        int res = 0;
 
-       down_write(&snd_pcm_link_rwsem);
+       down_write_nonblock(&snd_pcm_link_rwsem);
        write_lock_irq(&snd_pcm_link_rwlock);
        if (!snd_pcm_stream_linked(substream)) {
                res = -EALREADY;
index a7759846fbaadff0c9493760ce7b14eff7ca8ad9..795437b1008200cd534f9a46ad0272f3dbc20ca4 100644 (file)
@@ -942,31 +942,36 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
        unsigned long flags;
        long result = 0, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
+       unsigned long appl_ptr;
 
+       spin_lock_irqsave(&runtime->lock, flags);
        while (count > 0 && runtime->avail) {
                count1 = runtime->buffer_size - runtime->appl_ptr;
                if (count1 > count)
                        count1 = count;
-               spin_lock_irqsave(&runtime->lock, flags);
                if (count1 > (int)runtime->avail)
                        count1 = runtime->avail;
+
+               /* update runtime->appl_ptr before unlocking for userbuf */
+               appl_ptr = runtime->appl_ptr;
+               runtime->appl_ptr += count1;
+               runtime->appl_ptr %= runtime->buffer_size;
+               runtime->avail -= count1;
+
                if (kernelbuf)
-                       memcpy(kernelbuf + result, runtime->buffer + runtime->appl_ptr, count1);
+                       memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1);
                if (userbuf) {
                        spin_unlock_irqrestore(&runtime->lock, flags);
                        if (copy_to_user(userbuf + result,
-                                        runtime->buffer + runtime->appl_ptr, count1)) {
+                                        runtime->buffer + appl_ptr, count1)) {
                                return result > 0 ? result : -EFAULT;
                        }
                        spin_lock_irqsave(&runtime->lock, flags);
                }
-               runtime->appl_ptr += count1;
-               runtime->appl_ptr %= runtime->buffer_size;
-               runtime->avail -= count1;
-               spin_unlock_irqrestore(&runtime->lock, flags);
                result += count1;
                count -= count1;
        }
+       spin_unlock_irqrestore(&runtime->lock, flags);
        return result;
 }
 
@@ -1055,23 +1060,16 @@ int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream)
 EXPORT_SYMBOL(snd_rawmidi_transmit_empty);
 
 /**
- * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * __snd_rawmidi_transmit_peek - copy data from the internal buffer
  * @substream: the rawmidi substream
  * @buffer: the buffer pointer
  * @count: data size to transfer
  *
- * Copies data from the internal output buffer to the given buffer.
- *
- * Call this in the interrupt handler when the midi output is ready,
- * and call snd_rawmidi_transmit_ack() after the transmission is
- * finished.
- *
- * Return: The size of copied data, or a negative error code on failure.
+ * This is a variant of snd_rawmidi_transmit_peek() without spinlock.
  */
-int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                              unsigned char *buffer, int count)
 {
-       unsigned long flags;
        int result, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
 
@@ -1081,7 +1079,6 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                return -EINVAL;
        }
        result = 0;
-       spin_lock_irqsave(&runtime->lock, flags);
        if (runtime->avail >= runtime->buffer_size) {
                /* warning: lowlevel layer MUST trigger down the hardware */
                goto __skip;
@@ -1106,25 +1103,47 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                }
        }
       __skip:
+       return result;
+}
+EXPORT_SYMBOL(__snd_rawmidi_transmit_peek);
+
+/**
+ * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * @substream: the rawmidi substream
+ * @buffer: the buffer pointer
+ * @count: data size to transfer
+ *
+ * Copies data from the internal output buffer to the given buffer.
+ *
+ * Call this in the interrupt handler when the midi output is ready,
+ * and call snd_rawmidi_transmit_ack() after the transmission is
+ * finished.
+ *
+ * Return: The size of copied data, or a negative error code on failure.
+ */
+int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+                             unsigned char *buffer, int count)
+{
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
+       result = __snd_rawmidi_transmit_peek(substream, buffer, count);
        spin_unlock_irqrestore(&runtime->lock, flags);
        return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit_peek);
 
 /**
- * snd_rawmidi_transmit_ack - acknowledge the transmission
+ * __snd_rawmidi_transmit_ack - acknowledge the transmission
  * @substream: the rawmidi substream
  * @count: the transferred count
  *
- * Advances the hardware pointer for the internal output buffer with
- * the given size and updates the condition.
- * Call after the transmission is finished.
- *
- * Return: The advanced size if successful, or a negative error code on failure.
+ * This is a variant of __snd_rawmidi_transmit_ack() without spinlock.
  */
-int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
 {
-       unsigned long flags;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
 
        if (runtime->buffer == NULL) {
@@ -1132,7 +1151,6 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
                          "snd_rawmidi_transmit_ack: output is not active!!!\n");
                return -EINVAL;
        }
-       spin_lock_irqsave(&runtime->lock, flags);
        snd_BUG_ON(runtime->avail + count > runtime->buffer_size);
        runtime->hw_ptr += count;
        runtime->hw_ptr %= runtime->buffer_size;
@@ -1142,9 +1160,32 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
                if (runtime->drain || snd_rawmidi_ready(substream))
                        wake_up(&runtime->sleep);
        }
-       spin_unlock_irqrestore(&runtime->lock, flags);
        return count;
 }
+EXPORT_SYMBOL(__snd_rawmidi_transmit_ack);
+
+/**
+ * snd_rawmidi_transmit_ack - acknowledge the transmission
+ * @substream: the rawmidi substream
+ * @count: the transferred count
+ *
+ * Advances the hardware pointer for the internal output buffer with
+ * the given size and updates the condition.
+ * Call after the transmission is finished.
+ *
+ * Return: The advanced size if successful, or a negative error code on failure.
+ */
+int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
+{
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
+       result = __snd_rawmidi_transmit_ack(substream, count);
+       spin_unlock_irqrestore(&runtime->lock, flags);
+       return result;
+}
 EXPORT_SYMBOL(snd_rawmidi_transmit_ack);
 
 /**
@@ -1160,12 +1201,22 @@ EXPORT_SYMBOL(snd_rawmidi_transmit_ack);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
                         unsigned char *buffer, int count)
 {
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
        if (!substream->opened)
-               return -EBADFD;
-       count = snd_rawmidi_transmit_peek(substream, buffer, count);
-       if (count < 0)
-               return count;
-       return snd_rawmidi_transmit_ack(substream, count);
+               result = -EBADFD;
+       else {
+               count = __snd_rawmidi_transmit_peek(substream, buffer, count);
+               if (count <= 0)
+                       result = count;
+               else
+                       result = __snd_rawmidi_transmit_ack(substream, count);
+       }
+       spin_unlock_irqrestore(&runtime->lock, flags);
+       return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit);
 
@@ -1177,8 +1228,9 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
        unsigned long flags;
        long count1, result;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
+       unsigned long appl_ptr;
 
-       if (snd_BUG_ON(!kernelbuf && !userbuf))
+       if (!kernelbuf && !userbuf)
                return -EINVAL;
        if (snd_BUG_ON(!runtime->buffer))
                return -EINVAL;
@@ -1197,12 +1249,19 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                        count1 = count;
                if (count1 > (long)runtime->avail)
                        count1 = runtime->avail;
+
+               /* update runtime->appl_ptr before unlocking for userbuf */
+               appl_ptr = runtime->appl_ptr;
+               runtime->appl_ptr += count1;
+               runtime->appl_ptr %= runtime->buffer_size;
+               runtime->avail -= count1;
+
                if (kernelbuf)
-                       memcpy(runtime->buffer + runtime->appl_ptr,
+                       memcpy(runtime->buffer + appl_ptr,
                               kernelbuf + result, count1);
                else if (userbuf) {
                        spin_unlock_irqrestore(&runtime->lock, flags);
-                       if (copy_from_user(runtime->buffer + runtime->appl_ptr,
+                       if (copy_from_user(runtime->buffer + appl_ptr,
                                           userbuf + result, count1)) {
                                spin_lock_irqsave(&runtime->lock, flags);
                                result = result > 0 ? result : -EFAULT;
@@ -1210,9 +1269,6 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                        }
                        spin_lock_irqsave(&runtime->lock, flags);
                }
-               runtime->appl_ptr += count1;
-               runtime->appl_ptr %= runtime->buffer_size;
-               runtime->avail -= count1;
                result += count1;
                count -= count1;
        }
index b1221b29728e115ad95e50d4638de3e3949bb9da..6779e82b46dd7060bd982137ca75cfcdbbfa989e 100644 (file)
@@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level)
 
        dp->index = i;
        if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
-               pr_err("ALSA: seq_oss: too many applications\n");
+               pr_debug("ALSA: seq_oss: too many applications\n");
                rc = -ENOMEM;
                goto _error;
        }
index 0f3b38184fe58da2809f9fa4fa9c79a83967c59e..b16dbef041747e1762c3285875a5573ac119b031 100644 (file)
@@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp)
        struct seq_oss_synth *rec;
        struct seq_oss_synthinfo *info;
 
-       if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+       if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
                return;
        for (i = 0; i < dp->max_synthdev; i++) {
                info = &dp->synths[i];
index 13cfa815732db759935f2daaf4c997c73016eb8c..58e79e02f2174e29f409cf129030af87d240dfce 100644 (file)
@@ -678,6 +678,9 @@ static int deliver_to_subscribers(struct snd_seq_client *client,
        else
                down_read(&grp->list_mutex);
        list_for_each_entry(subs, &grp->list_head, src_list) {
+               /* both ports ready? */
+               if (atomic_read(&subs->ref_count) != 2)
+                       continue;
                event->dest = subs->info.dest;
                if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP)
                        /* convert time according to flag with subscription */
index 81f7c109dc46e1b8ef9b4a2a67477fdf72940ef6..65175902a68a841650ac818f8fde713eca60c5b5 100644 (file)
@@ -49,11 +49,12 @@ static int snd_seq_call_port_info_ioctl(struct snd_seq_client *client, unsigned
        struct snd_seq_port_info *data;
        mm_segment_t fs;
 
-       data = memdup_user(data32, sizeof(*data32));
-       if (IS_ERR(data))
-               return PTR_ERR(data);
+       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
 
-       if (get_user(data->flags, &data32->flags) ||
+       if (copy_from_user(data, data32, sizeof(*data32)) ||
+           get_user(data->flags, &data32->flags) ||
            get_user(data->time_queue, &data32->time_queue))
                goto error;
        data->kernel = NULL;
index 801076687bb16f082a8780125c69e879bdef4019..c850345c43b53dd5616b155f34f741d0ca30701c 100644 (file)
@@ -383,15 +383,20 @@ int snd_seq_pool_init(struct snd_seq_pool *pool)
 
        if (snd_BUG_ON(!pool))
                return -EINVAL;
-       if (pool->ptr)                  /* should be atomic? */
-               return 0;
 
-       pool->ptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size);
-       if (!pool->ptr)
+       cellptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size);
+       if (!cellptr)
                return -ENOMEM;
 
        /* add new cells to the free cell list */
        spin_lock_irqsave(&pool->lock, flags);
+       if (pool->ptr) {
+               spin_unlock_irqrestore(&pool->lock, flags);
+               vfree(cellptr);
+               return 0;
+       }
+
+       pool->ptr = cellptr;
        pool->free = NULL;
 
        for (cell = 0; cell < pool->size; cell++) {
index 55170a20ae7237246f5560e14c5b5066bb52ba35..fe686ee41c6da064ad4e1aa43ce172e359da1184 100644 (file)
@@ -173,10 +173,6 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client,
 }
 
 /* */
-enum group_type {
-       SRC_LIST, DEST_LIST
-};
-
 static int subscribe_port(struct snd_seq_client *client,
                          struct snd_seq_client_port *port,
                          struct snd_seq_port_subs_info *grp,
@@ -203,6 +199,20 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr,
        return NULL;
 }
 
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+                                       struct snd_seq_client_port *port,
+                                       struct snd_seq_subscribers *subs,
+                                       bool is_src, bool ack);
+
+static inline struct snd_seq_subscribers *
+get_subscriber(struct list_head *p, bool is_src)
+{
+       if (is_src)
+               return list_entry(p, struct snd_seq_subscribers, src_list);
+       else
+               return list_entry(p, struct snd_seq_subscribers, dest_list);
+}
+
 /*
  * remove all subscribers on the list
  * this is called from port_delete, for each src and dest list.
@@ -210,7 +220,7 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr,
 static void clear_subscriber_list(struct snd_seq_client *client,
                                  struct snd_seq_client_port *port,
                                  struct snd_seq_port_subs_info *grp,
-                                 int grptype)
+                                 int is_src)
 {
        struct list_head *p, *n;
 
@@ -219,15 +229,13 @@ static void clear_subscriber_list(struct snd_seq_client *client,
                struct snd_seq_client *c;
                struct snd_seq_client_port *aport;
 
-               if (grptype == SRC_LIST) {
-                       subs = list_entry(p, struct snd_seq_subscribers, src_list);
+               subs = get_subscriber(p, is_src);
+               if (is_src)
                        aport = get_client_port(&subs->info.dest, &c);
-               } else {
-                       subs = list_entry(p, struct snd_seq_subscribers, dest_list);
+               else
                        aport = get_client_port(&subs->info.sender, &c);
-               }
-               list_del(p);
-               unsubscribe_port(client, port, grp, &subs->info, 0);
+               delete_and_unsubscribe_port(client, port, subs, is_src, false);
+
                if (!aport) {
                        /* looks like the connected port is being deleted.
                         * we decrease the counter, and when both ports are deleted
@@ -235,21 +243,14 @@ static void clear_subscriber_list(struct snd_seq_client *client,
                         */
                        if (atomic_dec_and_test(&subs->ref_count))
                                kfree(subs);
-               } else {
-                       /* ok we got the connected port */
-                       struct snd_seq_port_subs_info *agrp;
-                       agrp = (grptype == SRC_LIST) ? &aport->c_dest : &aport->c_src;
-                       down_write(&agrp->list_mutex);
-                       if (grptype == SRC_LIST)
-                               list_del(&subs->dest_list);
-                       else
-                               list_del(&subs->src_list);
-                       up_write(&agrp->list_mutex);
-                       unsubscribe_port(c, aport, agrp, &subs->info, 1);
-                       kfree(subs);
-                       snd_seq_port_unlock(aport);
-                       snd_seq_client_unlock(c);
+                       continue;
                }
+
+               /* ok we got the connected port */
+               delete_and_unsubscribe_port(c, aport, subs, !is_src, true);
+               kfree(subs);
+               snd_seq_port_unlock(aport);
+               snd_seq_client_unlock(c);
        }
 }
 
@@ -262,8 +263,8 @@ static int port_delete(struct snd_seq_client *client,
        snd_use_lock_sync(&port->use_lock); 
 
        /* clear subscribers info */
-       clear_subscriber_list(client, port, &port->c_src, SRC_LIST);
-       clear_subscriber_list(client, port, &port->c_dest, DEST_LIST);
+       clear_subscriber_list(client, port, &port->c_src, true);
+       clear_subscriber_list(client, port, &port->c_dest, false);
 
        if (port->private_free)
                port->private_free(port->private_data);
@@ -479,85 +480,123 @@ static int match_subs_info(struct snd_seq_port_subscribe *r,
        return 0;
 }
 
-
-/* connect two ports */
-int snd_seq_port_connect(struct snd_seq_client *connector,
-                        struct snd_seq_client *src_client,
-                        struct snd_seq_client_port *src_port,
-                        struct snd_seq_client *dest_client,
-                        struct snd_seq_client_port *dest_port,
-                        struct snd_seq_port_subscribe *info)
+static int check_and_subscribe_port(struct snd_seq_client *client,
+                                   struct snd_seq_client_port *port,
+                                   struct snd_seq_subscribers *subs,
+                                   bool is_src, bool exclusive, bool ack)
 {
-       struct snd_seq_port_subs_info *src = &src_port->c_src;
-       struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
-       struct snd_seq_subscribers *subs, *s;
-       int err, src_called = 0;
-       unsigned long flags;
-       int exclusive;
-
-       subs = kzalloc(sizeof(*subs), GFP_KERNEL);
-       if (! subs)
-               return -ENOMEM;
-
-       subs->info = *info;
-       atomic_set(&subs->ref_count, 2);
+       struct snd_seq_port_subs_info *grp;
+       struct list_head *p;
+       struct snd_seq_subscribers *s;
+       int err;
 
-       down_write(&src->list_mutex);
-       down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
-
-       exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0;
+       grp = is_src ? &port->c_src : &port->c_dest;
        err = -EBUSY;
+       down_write(&grp->list_mutex);
        if (exclusive) {
-               if (! list_empty(&src->list_head) || ! list_empty(&dest->list_head))
+               if (!list_empty(&grp->list_head))
                        goto __error;
        } else {
-               if (src->exclusive || dest->exclusive)
+               if (grp->exclusive)
                        goto __error;
                /* check whether already exists */
-               list_for_each_entry(s, &src->list_head, src_list) {
-                       if (match_subs_info(info, &s->info))
-                               goto __error;
-               }
-               list_for_each_entry(s, &dest->list_head, dest_list) {
-                       if (match_subs_info(info, &s->info))
+               list_for_each(p, &grp->list_head) {
+                       s = get_subscriber(p, is_src);
+                       if (match_subs_info(&subs->info, &s->info))
                                goto __error;
                }
        }
 
-       if ((err = subscribe_port(src_client, src_port, src, info,
-                                 connector->number != src_client->number)) < 0)
-               goto __error;
-       src_called = 1;
-
-       if ((err = subscribe_port(dest_client, dest_port, dest, info,
-                                 connector->number != dest_client->number)) < 0)
+       err = subscribe_port(client, port, grp, &subs->info, ack);
+       if (err < 0) {
+               grp->exclusive = 0;
                goto __error;
+       }
 
        /* add to list */
-       write_lock_irqsave(&src->list_lock, flags);
-       // write_lock(&dest->list_lock); // no other lock yet
-       list_add_tail(&subs->src_list, &src->list_head);
-       list_add_tail(&subs->dest_list, &dest->list_head);
-       // write_unlock(&dest->list_lock); // no other lock yet
-       write_unlock_irqrestore(&src->list_lock, flags);
+       write_lock_irq(&grp->list_lock);
+       if (is_src)
+               list_add_tail(&subs->src_list, &grp->list_head);
+       else
+               list_add_tail(&subs->dest_list, &grp->list_head);
+       grp->exclusive = exclusive;
+       atomic_inc(&subs->ref_count);
+       write_unlock_irq(&grp->list_lock);
+       err = 0;
 
-       src->exclusive = dest->exclusive = exclusive;
+ __error:
+       up_write(&grp->list_mutex);
+       return err;
+}
+
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+                                       struct snd_seq_client_port *port,
+                                       struct snd_seq_subscribers *subs,
+                                       bool is_src, bool ack)
+{
+       struct snd_seq_port_subs_info *grp;
+       struct list_head *list;
+       bool empty;
+
+       grp = is_src ? &port->c_src : &port->c_dest;
+       list = is_src ? &subs->src_list : &subs->dest_list;
+       down_write(&grp->list_mutex);
+       write_lock_irq(&grp->list_lock);
+       empty = list_empty(list);
+       if (!empty)
+               list_del_init(list);
+       grp->exclusive = 0;
+       write_unlock_irq(&grp->list_lock);
+       up_write(&grp->list_mutex);
+
+       if (!empty)
+               unsubscribe_port(client, port, grp, &subs->info, ack);
+}
+
+/* connect two ports */
+int snd_seq_port_connect(struct snd_seq_client *connector,
+                        struct snd_seq_client *src_client,
+                        struct snd_seq_client_port *src_port,
+                        struct snd_seq_client *dest_client,
+                        struct snd_seq_client_port *dest_port,
+                        struct snd_seq_port_subscribe *info)
+{
+       struct snd_seq_subscribers *subs;
+       bool exclusive;
+       int err;
+
+       subs = kzalloc(sizeof(*subs), GFP_KERNEL);
+       if (!subs)
+               return -ENOMEM;
+
+       subs->info = *info;
+       atomic_set(&subs->ref_count, 0);
+       INIT_LIST_HEAD(&subs->src_list);
+       INIT_LIST_HEAD(&subs->dest_list);
+
+       exclusive = !!(info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE);
+
+       err = check_and_subscribe_port(src_client, src_port, subs, true,
+                                      exclusive,
+                                      connector->number != src_client->number);
+       if (err < 0)
+               goto error;
+       err = check_and_subscribe_port(dest_client, dest_port, subs, false,
+                                      exclusive,
+                                      connector->number != dest_client->number);
+       if (err < 0)
+               goto error_dest;
 
-       up_write(&dest->list_mutex);
-       up_write(&src->list_mutex);
        return 0;
 
__error:
-       if (src_called)
-               unsubscribe_port(src_client, src_port, src, info,
-                                connector->number != src_client->number);
error_dest:
+       delete_and_unsubscribe_port(src_client, src_port, subs, true,
+                                   connector->number != src_client->number);
+ error:
        kfree(subs);
-       up_write(&dest->list_mutex);
-       up_write(&src->list_mutex);
        return err;
 }
 
-
 /* remove the connection */
 int snd_seq_port_disconnect(struct snd_seq_client *connector,
                            struct snd_seq_client *src_client,
@@ -567,37 +606,28 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
                            struct snd_seq_port_subscribe *info)
 {
        struct snd_seq_port_subs_info *src = &src_port->c_src;
-       struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
        struct snd_seq_subscribers *subs;
        int err = -ENOENT;
-       unsigned long flags;
 
        down_write(&src->list_mutex);
-       down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
-
        /* look for the connection */
        list_for_each_entry(subs, &src->list_head, src_list) {
                if (match_subs_info(info, &subs->info)) {
-                       write_lock_irqsave(&src->list_lock, flags);
-                       // write_lock(&dest->list_lock);  // no lock yet
-                       list_del(&subs->src_list);
-                       list_del(&subs->dest_list);
-                       // write_unlock(&dest->list_lock);
-                       write_unlock_irqrestore(&src->list_lock, flags);
-                       src->exclusive = dest->exclusive = 0;
-                       unsubscribe_port(src_client, src_port, src, info,
-                                        connector->number != src_client->number);
-                       unsubscribe_port(dest_client, dest_port, dest, info,
-                                        connector->number != dest_client->number);
-                       kfree(subs);
+                       atomic_dec(&subs->ref_count); /* mark as not ready */
                        err = 0;
                        break;
                }
        }
-
-       up_write(&dest->list_mutex);
        up_write(&src->list_mutex);
-       return err;
+       if (err < 0)
+               return err;
+
+       delete_and_unsubscribe_port(src_client, src_port, subs, true,
+                                   connector->number != src_client->number);
+       delete_and_unsubscribe_port(dest_client, dest_port, subs, false,
+                                   connector->number != dest_client->number);
+       kfree(subs);
+       return 0;
 }
 
 
index 82b220c769c131ecd05fea96fd0692c12d56642f..293104926098f7074001b6f6a4248d2a09ddd360 100644 (file)
@@ -90,6 +90,9 @@ void snd_seq_timer_delete(struct snd_seq_timer **tmr)
 
 void snd_seq_timer_defaults(struct snd_seq_timer * tmr)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&tmr->lock, flags);
        /* setup defaults */
        tmr->ppq = 96;          /* 96 PPQ */
        tmr->tempo = 500000;    /* 120 BPM */
@@ -105,21 +108,25 @@ void snd_seq_timer_defaults(struct snd_seq_timer * tmr)
        tmr->preferred_resolution = seq_default_timer_resolution;
 
        tmr->skew = tmr->skew_base = SKEW_BASE;
+       spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
-void snd_seq_timer_reset(struct snd_seq_timer * tmr)
+static void seq_timer_reset(struct snd_seq_timer *tmr)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&tmr->lock, flags);
-
        /* reset time & songposition */
        tmr->cur_time.tv_sec = 0;
        tmr->cur_time.tv_nsec = 0;
 
        tmr->tick.cur_tick = 0;
        tmr->tick.fraction = 0;
+}
+
+void snd_seq_timer_reset(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
 
+       spin_lock_irqsave(&tmr->lock, flags);
+       seq_timer_reset(tmr);
        spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
@@ -138,8 +145,11 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri,
        tmr = q->timer;
        if (tmr == NULL)
                return;
-       if (!tmr->running)
+       spin_lock_irqsave(&tmr->lock, flags);
+       if (!tmr->running) {
+               spin_unlock_irqrestore(&tmr->lock, flags);
                return;
+       }
 
        resolution *= ticks;
        if (tmr->skew != tmr->skew_base) {
@@ -148,8 +158,6 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri,
                        (((resolution & 0xffff) * tmr->skew) >> 16);
        }
 
-       spin_lock_irqsave(&tmr->lock, flags);
-
        /* update timer */
        snd_seq_inc_time_nsec(&tmr->cur_time, resolution);
 
@@ -296,26 +304,30 @@ int snd_seq_timer_open(struct snd_seq_queue *q)
        t->callback = snd_seq_timer_interrupt;
        t->callback_data = q;
        t->flags |= SNDRV_TIMER_IFLG_AUTO;
+       spin_lock_irq(&tmr->lock);
        tmr->timeri = t;
+       spin_unlock_irq(&tmr->lock);
        return 0;
 }
 
 int snd_seq_timer_close(struct snd_seq_queue *q)
 {
        struct snd_seq_timer *tmr;
+       struct snd_timer_instance *t;
        
        tmr = q->timer;
        if (snd_BUG_ON(!tmr))
                return -EINVAL;
-       if (tmr->timeri) {
-               snd_timer_stop(tmr->timeri);
-               snd_timer_close(tmr->timeri);
-               tmr->timeri = NULL;
-       }
+       spin_lock_irq(&tmr->lock);
+       t = tmr->timeri;
+       tmr->timeri = NULL;
+       spin_unlock_irq(&tmr->lock);
+       if (t)
+               snd_timer_close(t);
        return 0;
 }
 
-int snd_seq_timer_stop(struct snd_seq_timer * tmr)
+static int seq_timer_stop(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
@@ -326,6 +338,17 @@ int snd_seq_timer_stop(struct snd_seq_timer * tmr)
        return 0;
 }
 
+int snd_seq_timer_stop(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_stop(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
 static int initialize_timer(struct snd_seq_timer *tmr)
 {
        struct snd_timer *t;
@@ -358,13 +381,13 @@ static int initialize_timer(struct snd_seq_timer *tmr)
        return 0;
 }
 
-int snd_seq_timer_start(struct snd_seq_timer * tmr)
+static int seq_timer_start(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
        if (tmr->running)
-               snd_seq_timer_stop(tmr);
-       snd_seq_timer_reset(tmr);
+               seq_timer_stop(tmr);
+       seq_timer_reset(tmr);
        if (initialize_timer(tmr) < 0)
                return -EINVAL;
        snd_timer_start(tmr->timeri, tmr->ticks);
@@ -373,14 +396,25 @@ int snd_seq_timer_start(struct snd_seq_timer * tmr)
        return 0;
 }
 
-int snd_seq_timer_continue(struct snd_seq_timer * tmr)
+int snd_seq_timer_start(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_start(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
+static int seq_timer_continue(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
        if (tmr->running)
                return -EBUSY;
        if (! tmr->initialized) {
-               snd_seq_timer_reset(tmr);
+               seq_timer_reset(tmr);
                if (initialize_timer(tmr) < 0)
                        return -EINVAL;
        }
@@ -390,11 +424,24 @@ int snd_seq_timer_continue(struct snd_seq_timer * tmr)
        return 0;
 }
 
+int snd_seq_timer_continue(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_continue(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
 /* return current 'real' time. use timeofday() to get better granularity. */
 snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
 {
        snd_seq_real_time_t cur_time;
+       unsigned long flags;
 
+       spin_lock_irqsave(&tmr->lock, flags);
        cur_time = tmr->cur_time;
        if (tmr->running) { 
                struct timeval tm;
@@ -410,7 +457,7 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
                }
                snd_seq_sanity_real_time(&cur_time);
        }
-                
+       spin_unlock_irqrestore(&tmr->lock, flags);
        return cur_time;        
 }
 
index 3da2d48610b3a91e4b93b0967b1fac5afebe41f7..c82ed3e70506db65adcbd48f6bdd55bd9628633d 100644 (file)
@@ -155,21 +155,26 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
        struct snd_virmidi *vmidi = substream->runtime->private_data;
        int count, res;
        unsigned char buf[32], *pbuf;
+       unsigned long flags;
 
        if (up) {
                vmidi->trigger = 1;
                if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH &&
                    !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) {
-                       snd_rawmidi_transmit_ack(substream, substream->runtime->buffer_size - substream->runtime->avail);
-                       return;         /* ignored */
+                       while (snd_rawmidi_transmit(substream, buf,
+                                                   sizeof(buf)) > 0) {
+                               /* ignored */
+                       }
+                       return;
                }
                if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
                        if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
                                return;
                        vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
                }
+               spin_lock_irqsave(&substream->runtime->lock, flags);
                while (1) {
-                       count = snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
+                       count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
                        if (count <= 0)
                                break;
                        pbuf = buf;
@@ -179,16 +184,18 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
                                        snd_midi_event_reset_encode(vmidi->parser);
                                        continue;
                                }
-                               snd_rawmidi_transmit_ack(substream, res);
+                               __snd_rawmidi_transmit_ack(substream, res);
                                pbuf += res;
                                count -= res;
                                if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
                                        if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
-                                               return;
+                                               goto out;
                                        vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
                                }
                        }
                }
+       out:
+               spin_unlock_irqrestore(&substream->runtime->lock, flags);
        } else {
                vmidi->trigger = 0;
        }
@@ -254,9 +261,13 @@ static int snd_virmidi_output_open(struct snd_rawmidi_substream *substream)
  */
 static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream)
 {
+       struct snd_virmidi_dev *rdev = substream->rmidi->private_data;
        struct snd_virmidi *vmidi = substream->runtime->private_data;
-       snd_midi_event_free(vmidi->parser);
+
+       write_lock_irq(&rdev->filelist_lock);
        list_del(&vmidi->list);
+       write_unlock_irq(&rdev->filelist_lock);
+       snd_midi_event_free(vmidi->parser);
        substream->runtime->private_data = NULL;
        kfree(vmidi);
        return 0;
index cb25aded53499cdf86ceb954e2a4ffdca4913595..dca817fc78941b5f9109e8117b3fc5adb621562e 100644 (file)
@@ -65,6 +65,7 @@ struct snd_timer_user {
        int qtail;
        int qused;
        int queue_size;
+       bool disconnected;
        struct snd_timer_read *queue;
        struct snd_timer_tread *tqueue;
        spinlock_t qlock;
@@ -290,6 +291,9 @@ int snd_timer_open(struct snd_timer_instance **ti,
                mutex_unlock(&register_mutex);
                return -ENOMEM;
        }
+       /* take a card refcount for safe disconnection */
+       if (timer->card)
+               get_device(&timer->card->card_dev);
        timeri->slave_class = tid->dev_sclass;
        timeri->slave_id = slave_id;
        if (list_empty(&timer->open_list_head) && timer->hw.open)
@@ -359,6 +363,9 @@ int snd_timer_close(struct snd_timer_instance *timeri)
                }
                spin_unlock(&timer->lock);
                spin_unlock_irq(&slave_active_lock);
+               /* release a card refcount for safe disconnection */
+               if (timer->card)
+                       put_device(&timer->card->card_dev);
                mutex_unlock(&register_mutex);
        }
  out:
@@ -415,7 +422,7 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event)
        spin_lock_irqsave(&timer->lock, flags);
        list_for_each_entry(ts, &ti->slave_active_head, active_list)
                if (ts->ccallback)
-                       ts->ccallback(ti, event + 100, &tstamp, resolution);
+                       ts->ccallback(ts, event + 100, &tstamp, resolution);
        spin_unlock_irqrestore(&timer->lock, flags);
 }
 
@@ -444,6 +451,10 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri)
        unsigned long flags;
 
        spin_lock_irqsave(&slave_active_lock, flags);
+       if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+               spin_unlock_irqrestore(&slave_active_lock, flags);
+               return -EBUSY;
+       }
        timeri->flags |= SNDRV_TIMER_IFLG_RUNNING;
        if (timeri->master && timeri->timer) {
                spin_lock(&timeri->timer->lock);
@@ -468,18 +479,28 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks)
                return -EINVAL;
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
                result = snd_timer_start_slave(timeri);
-               snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+               if (result >= 0)
+                       snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
                return result;
        }
        timer = timeri->timer;
        if (timer == NULL)
                return -EINVAL;
+       if (timer->card && timer->card->shutdown)
+               return -ENODEV;
        spin_lock_irqsave(&timer->lock, flags);
+       if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+                            SNDRV_TIMER_IFLG_START)) {
+               result = -EBUSY;
+               goto unlock;
+       }
        timeri->ticks = timeri->cticks = ticks;
        timeri->pticks = 0;
        result = snd_timer_start1(timer, timeri, ticks);
+ unlock:
        spin_unlock_irqrestore(&timer->lock, flags);
-       snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+       if (result >= 0)
+               snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
        return result;
 }
 
@@ -493,9 +514,17 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event)
 
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
                spin_lock_irqsave(&slave_active_lock, flags);
+               if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) {
+                       spin_unlock_irqrestore(&slave_active_lock, flags);
+                       return -EBUSY;
+               }
+               if (timeri->timer)
+                       spin_lock(&timeri->timer->lock);
                timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
                list_del_init(&timeri->ack_list);
                list_del_init(&timeri->active_list);
+               if (timeri->timer)
+                       spin_unlock(&timeri->timer->lock);
                spin_unlock_irqrestore(&slave_active_lock, flags);
                goto __end;
        }
@@ -503,8 +532,17 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event)
        if (!timer)
                return -EINVAL;
        spin_lock_irqsave(&timer->lock, flags);
+       if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+                              SNDRV_TIMER_IFLG_START))) {
+               spin_unlock_irqrestore(&timer->lock, flags);
+               return -EBUSY;
+       }
        list_del_init(&timeri->ack_list);
        list_del_init(&timeri->active_list);
+       if (timer->card && timer->card->shutdown) {
+               spin_unlock_irqrestore(&timer->lock, flags);
+               return 0;
+       }
        if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) &&
            !(--timer->running)) {
                timer->hw.stop(timer);
@@ -565,11 +603,18 @@ int snd_timer_continue(struct snd_timer_instance *timeri)
        timer = timeri->timer;
        if (! timer)
                return -EINVAL;
+       if (timer->card && timer->card->shutdown)
+               return -ENODEV;
        spin_lock_irqsave(&timer->lock, flags);
+       if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+               result = -EBUSY;
+               goto unlock;
+       }
        if (!timeri->cticks)
                timeri->cticks = 1;
        timeri->pticks = 0;
        result = snd_timer_start1(timer, timeri, timer->sticks);
+ unlock:
        spin_unlock_irqrestore(&timer->lock, flags);
        snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_CONTINUE);
        return result;
@@ -628,6 +673,9 @@ static void snd_timer_tasklet(unsigned long arg)
        unsigned long resolution, ticks;
        unsigned long flags;
 
+       if (timer->card && timer->card->shutdown)
+               return;
+
        spin_lock_irqsave(&timer->lock, flags);
        /* now process all callbacks */
        while (!list_empty(&timer->sack_list_head)) {
@@ -668,6 +716,9 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
        if (timer == NULL)
                return;
 
+       if (timer->card && timer->card->shutdown)
+               return;
+
        spin_lock_irqsave(&timer->lock, flags);
 
        /* remember the current resolution */
@@ -697,8 +748,8 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
                        ti->cticks = ti->ticks;
                } else {
                        ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
-                       if (--timer->running)
-                               list_del_init(&ti->active_list);
+                       --timer->running;
+                       list_del_init(&ti->active_list);
                }
                if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) ||
                    (ti->flags & SNDRV_TIMER_IFLG_FAST))
@@ -881,8 +932,15 @@ static int snd_timer_dev_register(struct snd_device *dev)
 static int snd_timer_dev_disconnect(struct snd_device *device)
 {
        struct snd_timer *timer = device->device_data;
+       struct snd_timer_instance *ti;
+
        mutex_lock(&register_mutex);
        list_del_init(&timer->device_list);
+       /* wake up pending sleepers */
+       list_for_each_entry(ti, &timer->open_list_head, open_list) {
+               if (ti->disconnect)
+                       ti->disconnect(ti);
+       }
        mutex_unlock(&register_mutex);
        return 0;
 }
@@ -893,6 +951,8 @@ void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstam
        unsigned long resolution = 0;
        struct snd_timer_instance *ti, *ts;
 
+       if (timer->card && timer->card->shutdown)
+               return;
        if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE))
                return;
        if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART ||
@@ -1002,11 +1062,21 @@ static int snd_timer_s_stop(struct snd_timer * timer)
        return 0;
 }
 
+static int snd_timer_s_close(struct snd_timer *timer)
+{
+       struct snd_timer_system_private *priv;
+
+       priv = (struct snd_timer_system_private *)timer->private_data;
+       del_timer_sync(&priv->tlist);
+       return 0;
+}
+
 static struct snd_timer_hardware snd_timer_system =
 {
        .flags =        SNDRV_TIMER_HW_FIRST | SNDRV_TIMER_HW_TASKLET,
        .resolution =   1000000000L / HZ,
        .ticks =        10000000L,
+       .close =        snd_timer_s_close,
        .start =        snd_timer_s_start,
        .stop =         snd_timer_s_stop
 };
@@ -1051,6 +1121,8 @@ static void snd_timer_proc_read(struct snd_info_entry *entry,
 
        mutex_lock(&register_mutex);
        list_for_each_entry(timer, &snd_timer_list, device_list) {
+               if (timer->card && timer->card->shutdown)
+                       continue;
                switch (timer->tmr_class) {
                case SNDRV_TIMER_CLASS_GLOBAL:
                        snd_iprintf(buffer, "G%i: ", timer->tmr_device);
@@ -1185,6 +1257,14 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri,
        wake_up(&tu->qchange_sleep);
 }
 
+static void snd_timer_user_disconnect(struct snd_timer_instance *timeri)
+{
+       struct snd_timer_user *tu = timeri->callback_data;
+
+       tu->disconnected = true;
+       wake_up(&tu->qchange_sleep);
+}
+
 static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
                                      unsigned long resolution,
                                      unsigned long ticks)
@@ -1558,6 +1638,7 @@ static int snd_timer_user_tselect(struct file *file,
                        ? snd_timer_user_tinterrupt : snd_timer_user_interrupt;
                tu->timeri->ccallback = snd_timer_user_ccallback;
                tu->timeri->callback_data = (void *)tu;
+               tu->timeri->disconnect = snd_timer_user_disconnect;
        }
 
       __err:
@@ -1852,6 +1933,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 {
        struct snd_timer_user *tu;
        long result = 0, unit;
+       int qhead;
        int err = 0;
 
        tu = file->private_data;
@@ -1863,7 +1945,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
                        if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
                                err = -EAGAIN;
-                               break;
+                               goto _error;
                        }
 
                        set_current_state(TASK_INTERRUPTIBLE);
@@ -1876,40 +1958,39 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
                        remove_wait_queue(&tu->qchange_sleep, &wait);
 
+                       if (tu->disconnected) {
+                               err = -ENODEV;
+                               goto _error;
+                       }
                        if (signal_pending(current)) {
                                err = -ERESTARTSYS;
-                               break;
+                               goto _error;
                        }
                }
 
+               qhead = tu->qhead++;
+               tu->qhead %= tu->queue_size;
                spin_unlock_irq(&tu->qlock);
-               if (err < 0)
-                       goto _error;
 
                if (tu->tread) {
-                       if (copy_to_user(buffer, &tu->tqueue[tu->qhead++],
-                                        sizeof(struct snd_timer_tread))) {
+                       if (copy_to_user(buffer, &tu->tqueue[qhead],
+                                        sizeof(struct snd_timer_tread)))
                                err = -EFAULT;
-                               goto _error;
-                       }
                } else {
-                       if (copy_to_user(buffer, &tu->queue[tu->qhead++],
-                                        sizeof(struct snd_timer_read))) {
+                       if (copy_to_user(buffer, &tu->queue[qhead],
+                                        sizeof(struct snd_timer_read)))
                                err = -EFAULT;
-                               goto _error;
-                       }
                }
 
-               tu->qhead %= tu->queue_size;
-
-               result += unit;
-               buffer += unit;
-
                spin_lock_irq(&tu->qlock);
                tu->qused--;
+               if (err < 0)
+                       goto _error;
+               result += unit;
+               buffer += unit;
        }
-       spin_unlock_irq(&tu->qlock);
  _error:
+       spin_unlock_irq(&tu->qlock);
        return result > 0 ? result : err;
 }
 
@@ -1925,6 +2006,8 @@ static unsigned int snd_timer_user_poll(struct file *file, poll_table * wait)
        mask = 0;
        if (tu->qused)
                mask |= POLLIN | POLLRDNORM;
+       if (tu->disconnected)
+               mask |= POLLERR;
 
        return mask;
 }
index 75b74850c00535ee320f3827e094d9d4da1e30e3..c0f8f613f1f1b5e954ffa82b760c82353dd17c80 100644 (file)
@@ -109,6 +109,9 @@ struct dummy_timer_ops {
        snd_pcm_uframes_t (*pointer)(struct snd_pcm_substream *);
 };
 
+#define get_dummy_ops(substream) \
+       (*(const struct dummy_timer_ops **)(substream)->runtime->private_data)
+
 struct dummy_model {
        const char *name;
        int (*playback_constraints)(struct snd_pcm_runtime *runtime);
@@ -137,7 +140,6 @@ struct snd_dummy {
        int iobox;
        struct snd_kcontrol *cd_volume_ctl;
        struct snd_kcontrol *cd_switch_ctl;
-       const struct dummy_timer_ops *timer_ops;
 };
 
 /*
@@ -231,6 +233,8 @@ static struct dummy_model *dummy_models[] = {
  */
 
 struct dummy_systimer_pcm {
+       /* ops must be the first item */
+       const struct dummy_timer_ops *timer_ops;
        spinlock_t lock;
        struct timer_list timer;
        unsigned long base_time;
@@ -366,6 +370,8 @@ static const struct dummy_timer_ops dummy_systimer_ops = {
  */
 
 struct dummy_hrtimer_pcm {
+       /* ops must be the first item */
+       const struct dummy_timer_ops *timer_ops;
        ktime_t base_time;
        ktime_t period_time;
        atomic_t running;
@@ -492,31 +498,25 @@ static const struct dummy_timer_ops dummy_hrtimer_ops = {
 
 static int dummy_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
-       struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-
        switch (cmd) {
        case SNDRV_PCM_TRIGGER_START:
        case SNDRV_PCM_TRIGGER_RESUME:
-               return dummy->timer_ops->start(substream);
+               return get_dummy_ops(substream)->start(substream);
        case SNDRV_PCM_TRIGGER_STOP:
        case SNDRV_PCM_TRIGGER_SUSPEND:
-               return dummy->timer_ops->stop(substream);
+               return get_dummy_ops(substream)->stop(substream);
        }
        return -EINVAL;
 }
 
 static int dummy_pcm_prepare(struct snd_pcm_substream *substream)
 {
-       struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-
-       return dummy->timer_ops->prepare(substream);
+       return get_dummy_ops(substream)->prepare(substream);
 }
 
 static snd_pcm_uframes_t dummy_pcm_pointer(struct snd_pcm_substream *substream)
 {
-       struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-
-       return dummy->timer_ops->pointer(substream);
+       return get_dummy_ops(substream)->pointer(substream);
 }
 
 static struct snd_pcm_hardware dummy_pcm_hardware = {
@@ -562,17 +562,19 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream)
        struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
        struct dummy_model *model = dummy->model;
        struct snd_pcm_runtime *runtime = substream->runtime;
+       const struct dummy_timer_ops *ops;
        int err;
 
-       dummy->timer_ops = &dummy_systimer_ops;
+       ops = &dummy_systimer_ops;
 #ifdef CONFIG_HIGH_RES_TIMERS
        if (hrtimer)
-               dummy->timer_ops = &dummy_hrtimer_ops;
+               ops = &dummy_hrtimer_ops;
 #endif
 
-       err = dummy->timer_ops->create(substream);
+       err = ops->create(substream);
        if (err < 0)
                return err;
+       get_dummy_ops(substream) = ops;
 
        runtime->hw = dummy->pcm_hw;
        if (substream->pcm->device & 1) {
@@ -594,7 +596,7 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream)
                        err = model->capture_constraints(substream->runtime);
        }
        if (err < 0) {
-               dummy->timer_ops->free(substream);
+               get_dummy_ops(substream)->free(substream);
                return err;
        }
        return 0;
@@ -602,8 +604,7 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream)
 
 static int dummy_pcm_close(struct snd_pcm_substream *substream)
 {
-       struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-       dummy->timer_ops->free(substream);
+       get_dummy_ops(substream)->free(substream);
        return 0;
 }
 
index 926e5dcbb66a1a3ec523482ceb5e199f8c455b64..5022c9b97ddfc007bc6119ab6a0cb41fafb2c16d 100644 (file)
@@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = {
        [6] = 0x07,
 };
 
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
 {
        unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
-               if (snd_bebob_rate_table[i] == rate)
-                       return i;
+               if (snd_bebob_rate_table[i] == rate) {
+                       *index = i;
+                       return 0;
+               }
        }
        return -EINVAL;
 }
@@ -425,7 +427,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate)
                goto end;
 
        /* confirm params for both streams */
-       index = get_formation_index(rate);
+       err = get_formation_index(rate, &index);
+       if (err < 0)
+               goto end;
        pcm_channels = bebob->tx_stream_formations[index].pcm;
        midi_channels = bebob->tx_stream_formations[index].midi;
        err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
index b02a5e8cad448c40368b9396dc1c0890c91a3a42..0ac92aba5bc1c9a4c01b1764a6f16a098e3bbae1 100644 (file)
@@ -63,7 +63,7 @@ struct amdtp_dot {
 #define BYTE_PER_SAMPLE (4)
 #define MAGIC_DOT_BYTE (2)
 #define MAGIC_BYTE_OFF(x) (((x) * BYTE_PER_SAMPLE) + MAGIC_DOT_BYTE)
-static const u8 dot_scrt(const u8 idx, const unsigned int off)
+static u8 dot_scrt(const u8 idx, const unsigned int off)
 {
        /*
         * the length of the added pattern only depends on the lower nibble
index 904ce0329fa1ac3d7e3364d66664d6ba60d6b6d8..040a96d1ba8ec1fa1dbfa4521a46e7efb865f435 100644 (file)
@@ -230,6 +230,7 @@ int snd_tscm_transaction_register(struct snd_tscm *tscm)
        return err;
 error:
        fw_core_remove_address_handler(&tscm->async_handler);
+       tscm->async_handler.callback_data = NULL;
        return err;
 }
 
@@ -276,6 +277,9 @@ void snd_tscm_transaction_unregister(struct snd_tscm *tscm)
        __be32 reg;
        unsigned int i;
 
+       if (tscm->async_handler.callback_data == NULL)
+               return;
+
        /* Turn off FireWire LED. */
        reg = cpu_to_be32(0x0000008e);
        snd_fw_transaction(tscm->unit, TCODE_WRITE_QUADLET_REQUEST,
@@ -297,6 +301,8 @@ void snd_tscm_transaction_unregister(struct snd_tscm *tscm)
                           &reg, sizeof(reg), 0);
 
        fw_core_remove_address_handler(&tscm->async_handler);
+       tscm->async_handler.callback_data = NULL;
+
        for (i = 0; i < TSCM_MIDI_OUT_PORT_MAX; i++)
                snd_fw_async_midi_port_destroy(&tscm->out_ports[i]);
 }
index ee0bc183950888ba6ade12bb8441fa81be41ec11..e281c338e562d59e861303789d82c319b9a2ae9c 100644 (file)
@@ -21,7 +21,6 @@ static struct snd_tscm_spec model_specs[] = {
                .pcm_playback_analog_channels = 8,
                .midi_capture_ports = 4,
                .midi_playback_ports = 4,
-               .is_controller = true,
        },
        {
                .name = "FW-1082",
@@ -31,9 +30,16 @@ static struct snd_tscm_spec model_specs[] = {
                .pcm_playback_analog_channels = 2,
                .midi_capture_ports = 2,
                .midi_playback_ports = 2,
-               .is_controller = true,
        },
-       /* FW-1804 may be supported. */
+       {
+               .name = "FW-1804",
+               .has_adat = true,
+               .has_spdif = true,
+               .pcm_capture_analog_channels = 8,
+               .pcm_playback_analog_channels = 2,
+               .midi_capture_ports = 2,
+               .midi_playback_ports = 4,
+       },
 };
 
 static int identify_model(struct snd_tscm *tscm)
index 2d028d2bd3bdcdcd8be15a184e555786726c94d8..30ab77e924f7f68f9b939a230109152722ad9057 100644 (file)
@@ -39,7 +39,6 @@ struct snd_tscm_spec {
        unsigned int pcm_playback_analog_channels;
        unsigned int midi_capture_ports;
        unsigned int midi_playback_ports;
-       bool is_controller;
 };
 
 #define TSCM_MIDI_IN_PORT_MAX  4
@@ -72,9 +71,6 @@ struct snd_tscm {
        struct snd_fw_async_midi_port out_ports[TSCM_MIDI_OUT_PORT_MAX];
        u8 running_status[TSCM_MIDI_OUT_PORT_MAX];
        bool on_sysex[TSCM_MIDI_OUT_PORT_MAX];
-
-       /* For control messages. */
-       struct snd_firewire_tascam_status *status;
 };
 
 #define TSCM_ADDR_BASE                 0xffff00000000ull
index b5a17cb510a0b4ee71115973a7d73309e9061b03..8c486235c905253ea0609183d813362a294e42f9 100644 (file)
@@ -426,18 +426,22 @@ EXPORT_SYMBOL_GPL(snd_hdac_bus_stop_chip);
  * @bus: HD-audio core bus
  * @status: INTSTS register value
  * @ask: callback to be called for woken streams
+ *
+ * Returns the bits of handled streams, or zero if no stream is handled.
  */
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
                                    void (*ack)(struct hdac_bus *,
                                                struct hdac_stream *))
 {
        struct hdac_stream *azx_dev;
        u8 sd_status;
+       int handled = 0;
 
        list_for_each_entry(azx_dev, &bus->stream_list, list) {
                if (status & azx_dev->sd_int_sta_mask) {
                        sd_status = snd_hdac_stream_readb(azx_dev, SD_STS);
                        snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK);
+                       handled |= 1 << azx_dev->index;
                        if (!azx_dev->substream || !azx_dev->running ||
                            !(sd_status & SD_INT_COMPLETE))
                                continue;
@@ -445,6 +449,7 @@ void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
                                ack(bus, azx_dev);
                }
        }
+       return handled;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_bus_handle_stream_irq);
 
index c50177fb469f5f67974c853b109c07a68ef2524d..f6854dbd7d8d0eed41b5158c25b20e03211ccac5 100644 (file)
@@ -306,7 +306,7 @@ out_master_del:
 out_err:
        kfree(acomp);
        bus->audio_component = NULL;
-       dev_err(dev, "failed to add i915 component master (%d)\n", ret);
+       dev_info(dev, "failed to add i915 component master (%d)\n", ret);
 
        return ret;
 }
index 0216475fc759e23442a9f2ab7775c6966458b2cd..37adcc6cbe6bd852a290cc95ea58cc05863e88ba 100644 (file)
@@ -3,6 +3,7 @@
 config SND_WSS_LIB
         tristate
         select SND_PCM
+       select SND_TIMER
 
 config SND_SB_COMMON
         tristate
@@ -42,6 +43,7 @@ config SND_AD1816A
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Analog Devices SoundPort
          AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@ config SND_GUSCLASSIC
        tristate "Gravis UltraSound Classic"
        select SND_RAWMIDI
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Classic
          soundcards.
@@ -221,6 +224,7 @@ config SND_GUSEXTREME
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Extreme
          soundcards.
index 656ce39bddbc531c27d5a15b6d07e899e17f1cd6..8f6594a7d37f7b940ded27f6a8a0395ea8fa1442 100644 (file)
@@ -155,6 +155,7 @@ config SND_AZT3328
        select SND_PCM
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@ config SND_EMU10K1
        select SND_HWDEP
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@ config SND_YMFPCI
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_AC97_CODEC
+       select SND_TIMER
        help
          Say Y here to include support for Yamaha PCI audio chips -
          YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
index 28e2f8b42f5e8e4e7f3aaaaa88f45d6834b02591..891453451543102994b7912584f00483b2c472f0 100644 (file)
@@ -1141,6 +1141,14 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
                emu->emu1010.firmware_thread =
                        kthread_create(emu1010_firmware_thread, emu,
                                       "emu1010_firmware");
+               if (IS_ERR(emu->emu1010.firmware_thread)) {
+                       err = PTR_ERR(emu->emu1010.firmware_thread);
+                       emu->emu1010.firmware_thread = NULL;
+                       dev_info(emu->card->dev,
+                                       "emu1010: Creating thread failed\n");
+                       return err;
+               }
+
                wake_up_process(emu->emu1010.firmware_thread);
        }
 
index 70671ad65d24565fb6c8522bd69444817aac2933..6efadbfb3fe3511b4726541cdd9ebd4aae6cd65d 100644 (file)
@@ -174,14 +174,40 @@ static inline bool codec_probed(struct hda_codec *codec)
        return device_attach(hda_codec_dev(codec)) > 0 && codec->preset;
 }
 
-/* try to auto-load and bind the codec module */
-static void codec_bind_module(struct hda_codec *codec)
+/* try to auto-load codec module */
+static void request_codec_module(struct hda_codec *codec)
 {
 #ifdef MODULE
        char modalias[32];
+       const char *mod = NULL;
+
+       switch (codec->probe_id) {
+       case HDA_CODEC_ID_GENERIC_HDMI:
+#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
+               mod = "snd-hda-codec-hdmi";
+#endif
+               break;
+       case HDA_CODEC_ID_GENERIC:
+#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
+               mod = "snd-hda-codec-generic";
+#endif
+               break;
+       default:
+               snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
+               mod = modalias;
+               break;
+       }
+
+       if (mod)
+               request_module(mod);
+#endif /* MODULE */
+}
 
-       snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
-       request_module(modalias);
+/* try to auto-load and bind the codec module */
+static void codec_bind_module(struct hda_codec *codec)
+{
+#ifdef MODULE
+       request_codec_module(codec);
        if (codec_probed(codec))
                return;
 #endif
@@ -218,17 +244,13 @@ static int codec_bind_generic(struct hda_codec *codec)
 
        if (is_likely_hdmi_codec(codec)) {
                codec->probe_id = HDA_CODEC_ID_GENERIC_HDMI;
-#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
-               request_module("snd-hda-codec-hdmi");
-#endif
+               request_codec_module(codec);
                if (codec_probed(codec))
                        return 0;
        }
 
        codec->probe_id = HDA_CODEC_ID_GENERIC;
-#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
-       request_module("snd-hda-codec-generic");
-#endif
+       request_codec_module(codec);
        if (codec_probed(codec))
                return 0;
        return -ENODEV;
index 37cf9cee983555ad1a47e4bb43ff2e9db6de39e9..27de8015717d95fdf45ac512a77c60eb32202d67 100644 (file)
@@ -930,6 +930,8 @@ irqreturn_t azx_interrupt(int irq, void *dev_id)
        struct azx *chip = dev_id;
        struct hdac_bus *bus = azx_bus(chip);
        u32 status;
+       bool active, handled = false;
+       int repeat = 0; /* count for avoiding endless loop */
 
 #ifdef CONFIG_PM
        if (azx_has_pm_runtime(chip))
@@ -939,33 +941,36 @@ irqreturn_t azx_interrupt(int irq, void *dev_id)
 
        spin_lock(&bus->reg_lock);
 
-       if (chip->disabled) {
-               spin_unlock(&bus->reg_lock);
-               return IRQ_NONE;
-       }
-
-       status = azx_readl(chip, INTSTS);
-       if (status == 0 || status == 0xffffffff) {
-               spin_unlock(&bus->reg_lock);
-               return IRQ_NONE;
-       }
+       if (chip->disabled)
+               goto unlock;
 
-       snd_hdac_bus_handle_stream_irq(bus, status, stream_update);
+       do {
+               status = azx_readl(chip, INTSTS);
+               if (status == 0 || status == 0xffffffff)
+                       break;
 
-       /* clear rirb int */
-       status = azx_readb(chip, RIRBSTS);
-       if (status & RIRB_INT_MASK) {
-               if (status & RIRB_INT_RESPONSE) {
-                       if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
-                               udelay(80);
-                       snd_hdac_bus_update_rirb(bus);
+               handled = true;
+               active = false;
+               if (snd_hdac_bus_handle_stream_irq(bus, status, stream_update))
+                       active = true;
+
+               /* clear rirb int */
+               status = azx_readb(chip, RIRBSTS);
+               if (status & RIRB_INT_MASK) {
+                       active = true;
+                       if (status & RIRB_INT_RESPONSE) {
+                               if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
+                                       udelay(80);
+                               snd_hdac_bus_update_rirb(bus);
+                       }
+                       azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
                }
-               azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
-       }
+       } while (active && ++repeat < 10);
 
+ unlock:
        spin_unlock(&bus->reg_lock);
 
-       return IRQ_HANDLED;
+       return IRQ_RETVAL(handled);
 }
 EXPORT_SYMBOL_GPL(azx_interrupt);
 
index 30c8efe0f80a3a456bbbe09b6ce05abf810a8552..7ca5b89f088a6922e6acd09c12befae864994320 100644 (file)
@@ -4028,9 +4028,9 @@ static void pin_power_callback(struct hda_codec *codec,
                               struct hda_jack_callback *jack,
                               bool on)
 {
-       if (jack && jack->tbl->nid)
+       if (jack && jack->nid)
                sync_power_state_change(codec,
-                                       set_pin_power_jack(codec, jack->tbl->nid, on));
+                                       set_pin_power_jack(codec, jack->nid, on));
 }
 
 /* callback only doing power up -- called at first */
index c0bef11afa7e2cc01138a46708f4d6e9cd4bd529..e5240cb3749f4186e79f70dfc9e2c6536eaaed1d 100644 (file)
@@ -90,6 +90,8 @@ enum {
 #define NVIDIA_HDA_ENABLE_COHBIT      0x01
 
 /* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL         0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE        (0x1 << 6)
 #define INTEL_SCH_HDA_DEVC      0x78
 #define INTEL_SCH_HDA_DEVC_NOSNOOP       (0x1<<11)
 
@@ -361,7 +363,10 @@ enum {
                                        ((pci)->device == 0x0d0c) || \
                                        ((pci)->device == 0x160c))
 
-#define IS_BROXTON(pci)        ((pci)->device == 0x5a98)
+#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
+#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
+#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
 
 static char *driver_short_names[] = {
        [AZX_DRIVER_ICH] = "HDA Intel",
@@ -534,15 +539,26 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
 {
        struct hdac_bus *bus = azx_bus(chip);
        struct pci_dev *pci = chip->pci;
+       u32 val;
 
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, true);
+       if (IS_SKL_PLUS(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        azx_init_chip(chip, full_reset);
+       if (IS_SKL_PLUS(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, false);
 
        /* reduce dma latency to avoid noise */
-       if (IS_BROXTON(pci))
+       if (IS_BXT(pci))
                bxt_reduce_dma_latency(chip);
 }
 
@@ -964,11 +980,6 @@ static int azx_resume(struct device *dev)
 /* put codec down to D3 at hibernation for Intel SKL+;
  * otherwise BIOS may still access the codec and screw up the driver
  */
-#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
-#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
-#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
-
 static int azx_freeze_noirq(struct device *dev)
 {
        struct pci_dev *pci = to_pci_dev(dev);
@@ -2078,9 +2089,11 @@ static int azx_probe_continue(struct azx *chip)
                         * for other chips, still continue probing as other
                         * codecs can be on the same link.
                         */
-                       if (CONTROLLER_IN_GPU(pci))
+                       if (CONTROLLER_IN_GPU(pci)) {
+                               dev_err(chip->card->dev,
+                                       "HSW/BDW HD-audio HDMI/DP requires binding with gfx driver\n");
                                goto out_free;
-                       else
+                       else
                                goto skip_i915;
                }
 
@@ -2149,9 +2162,17 @@ i915_power_fail:
 static void azx_remove(struct pci_dev *pci)
 {
        struct snd_card *card = pci_get_drvdata(pci);
+       struct azx *chip;
+       struct hda_intel *hda;
+
+       if (card) {
+               /* cancel the pending probing work */
+               chip = card->private_data;
+               hda = container_of(chip, struct hda_intel, chip);
+               cancel_work_sync(&hda->probe_work);
 
-       if (card)
                snd_card_free(card);
+       }
 }
 
 static void azx_shutdown(struct pci_dev *pci)
index c945e257d368890bcd3f72037568e566c0ba9221..a33234e04d4f7a1fe311ce7db50f1a56e9e1d7d4 100644 (file)
@@ -259,7 +259,7 @@ snd_hda_jack_detect_enable_callback(struct hda_codec *codec, hda_nid_t nid,
                if (!callback)
                        return ERR_PTR(-ENOMEM);
                callback->func = func;
-               callback->tbl = jack;
+               callback->nid = jack->nid;
                callback->next = jack->callback;
                jack->callback = callback;
        }
index 858708a044f57ef563d79a100c5a92e5f3adaa3d..e9814c0168ea5d77da2922454ed4256d7ad2a30a 100644 (file)
@@ -21,7 +21,7 @@ struct hda_jack_callback;
 typedef void (*hda_jack_callback_fn) (struct hda_codec *, struct hda_jack_callback *);
 
 struct hda_jack_callback {
-       struct hda_jack_tbl *tbl;
+       hda_nid_t nid;
        hda_jack_callback_fn func;
        unsigned int private_data;      /* arbitrary data */
        struct hda_jack_callback *next;
index 4ef2259f88cae3b1cf328880447dbf528ad42a95..9ceb2bc36e68026bd02dfca8bdca4f9ffab2e849 100644 (file)
@@ -4427,13 +4427,16 @@ static void ca0132_process_dsp_response(struct hda_codec *codec,
 static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
 {
        struct ca0132_spec *spec = codec->spec;
+       struct hda_jack_tbl *tbl;
 
        /* Delay enabling the HP amp, to let the mic-detection
         * state machine run.
         */
        cancel_delayed_work_sync(&spec->unsol_hp_work);
        schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
-       cb->tbl->block_report = 1;
+       tbl = snd_hda_jack_tbl_get(codec, cb->nid);
+       if (tbl)
+               tbl->block_report = 1;
 }
 
 static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
index a12ae8ac091451261a2ba613543677fabedb8cd0..c1c855a6c0af8199d03b04419d16d3494507ddeb 100644 (file)
@@ -614,6 +614,7 @@ enum {
        CS4208_MAC_AUTO,
        CS4208_MBA6,
        CS4208_MBP11,
+       CS4208_MACMINI,
        CS4208_GPIO0,
 };
 
@@ -621,6 +622,7 @@ static const struct hda_model_fixup cs4208_models[] = {
        { .id = CS4208_GPIO0, .name = "gpio0" },
        { .id = CS4208_MBA6, .name = "mba6" },
        { .id = CS4208_MBP11, .name = "mbp11" },
+       { .id = CS4208_MACMINI, .name = "macmini" },
        {}
 };
 
@@ -632,6 +634,7 @@ static const struct snd_pci_quirk cs4208_fixup_tbl[] = {
 /* codec SSID matching */
 static const struct snd_pci_quirk cs4208_mac_fixup_tbl[] = {
        SND_PCI_QUIRK(0x106b, 0x5e00, "MacBookPro 11,2", CS4208_MBP11),
+       SND_PCI_QUIRK(0x106b, 0x6c00, "MacMini 7,1", CS4208_MACMINI),
        SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6),
        SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6),
        SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11),
@@ -666,6 +669,24 @@ static void cs4208_fixup_mac(struct hda_codec *codec,
        snd_hda_apply_fixup(codec, action);
 }
 
+/* MacMini 7,1 has the inverted jack detection */
+static void cs4208_fixup_macmini(struct hda_codec *codec,
+                                const struct hda_fixup *fix, int action)
+{
+       static const struct hda_pintbl pincfgs[] = {
+               { 0x18, 0x00ab9150 }, /* mic (audio-in) jack: disable detect */
+               { 0x21, 0x004be140 }, /* SPDIF: disable detect */
+               { }
+       };
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               /* HP pin (0x10) has an inverted detection */
+               codec->inv_jack_detect = 1;
+               /* disable the bogus Mic and SPDIF jack detections */
+               snd_hda_apply_pincfgs(codec, pincfgs);
+       }
+}
+
 static int cs4208_spdif_sw_put(struct snd_kcontrol *kcontrol,
                               struct snd_ctl_elem_value *ucontrol)
 {
@@ -709,6 +730,12 @@ static const struct hda_fixup cs4208_fixups[] = {
                .chained = true,
                .chain_id = CS4208_GPIO0,
        },
+       [CS4208_MACMINI] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs4208_fixup_macmini,
+               .chained = true,
+               .chain_id = CS4208_GPIO0,
+       },
        [CS4208_GPIO0] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cs4208_fixup_gpio0,
index 426a29a1c19bbda5841bd9d4574bf66e83cb8b76..8ee78dbd4c6054065186501b59ecd957c25285a8 100644 (file)
@@ -448,7 +448,8 @@ static int hdmi_eld_ctl_get(struct snd_kcontrol *kcontrol,
        eld = &per_pin->sink_eld;
 
        mutex_lock(&per_pin->lock);
-       if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data)) {
+       if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data) ||
+           eld->eld_size > ELD_MAX_SIZE) {
                mutex_unlock(&per_pin->lock);
                snd_BUG();
                return -EINVAL;
@@ -1193,7 +1194,7 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid)
 static void jack_callback(struct hda_codec *codec,
                          struct hda_jack_callback *jack)
 {
-       check_presence_and_report(codec, jack->tbl->nid);
+       check_presence_and_report(codec, jack->nid);
 }
 
 static void hdmi_intrinsic_event(struct hda_codec *codec, unsigned int res)
@@ -3653,6 +3654,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP",     patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",  patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",      patch_nvhdmi_2ch),
 HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP",   patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",   patch_via_hdmi),
index 8143c0e24a27ea99bd212bfd33f2bdf805424885..1f357cd72d9c3dedf5bcf42fa3d62ceaa092d3f6 100644 (file)
@@ -282,7 +282,7 @@ static void alc_update_knob_master(struct hda_codec *codec,
        uctl = kzalloc(sizeof(*uctl), GFP_KERNEL);
        if (!uctl)
                return;
-       val = snd_hda_codec_read(codec, jack->tbl->nid, 0,
+       val = snd_hda_codec_read(codec, jack->nid, 0,
                                 AC_VERB_GET_VOLUME_KNOB_CONTROL, 0);
        val &= HDA_AMP_VOLMASK;
        uctl->value.integer.value[0] = val;
@@ -327,6 +327,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0292:
                alc_update_coef_idx(codec, 0x4, 1<<15, 0);
                break;
+       case 0x10ec0225:
        case 0x10ec0233:
        case 0x10ec0255:
        case 0x10ec0256:
@@ -900,6 +901,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = {
        { 0x10ec0899, 0x1028, 0, "ALC3861" },
        { 0x10ec0298, 0x1028, 0, "ALC3266" },
        { 0x10ec0256, 0x1028, 0, "ALC3246" },
+       { 0x10ec0225, 0x1028, 0, "ALC3253" },
        { 0x10ec0670, 0x1025, 0, "ALC669X" },
        { 0x10ec0676, 0x1025, 0, "ALC679X" },
        { 0x10ec0282, 0x1043, 0, "ALC3229" },
@@ -1785,7 +1787,6 @@ enum {
        ALC882_FIXUP_NO_PRIMARY_HP,
        ALC887_FIXUP_ASUS_BASS,
        ALC887_FIXUP_BASS_CHMAP,
-       ALC882_FIXUP_DISABLE_AAMIX,
 };
 
 static void alc889_fixup_coef(struct hda_codec *codec,
@@ -1947,8 +1948,6 @@ static void alc882_fixup_no_primary_hp(struct hda_codec *codec,
 
 static void alc_fixup_bass_chmap(struct hda_codec *codec,
                                 const struct hda_fixup *fix, int action);
-static void alc_fixup_disable_aamix(struct hda_codec *codec,
-                                   const struct hda_fixup *fix, int action);
 
 static const struct hda_fixup alc882_fixups[] = {
        [ALC882_FIXUP_ABIT_AW9D_MAX] = {
@@ -2186,10 +2185,6 @@ static const struct hda_fixup alc882_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc_fixup_bass_chmap,
        },
-       [ALC882_FIXUP_DISABLE_AAMIX] = {
-               .type = HDA_FIXUP_FUNC,
-               .v.func = alc_fixup_disable_aamix,
-       },
 };
 
 static const struct snd_pci_quirk alc882_fixup_tbl[] = {
@@ -2228,6 +2223,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT),
        SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP),
        SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP),
+       SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP),
 
        /* All Apple entries are in codec SSIDs */
        SND_PCI_QUIRK(0x106b, 0x00a0, "MacBookPro 3,1", ALC889_FIXUP_MBP_VREF),
@@ -2257,7 +2253,6 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
        SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
        SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
-       SND_PCI_QUIRK(0x1458, 0xa182, "Gigabyte Z170X-UD3", ALC882_FIXUP_DISABLE_AAMIX),
        SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
        SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
        SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -2651,6 +2646,7 @@ enum {
        ALC269_TYPE_ALC298,
        ALC269_TYPE_ALC255,
        ALC269_TYPE_ALC256,
+       ALC269_TYPE_ALC225,
 };
 
 /*
@@ -2680,6 +2676,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
        case ALC269_TYPE_ALC298:
        case ALC269_TYPE_ALC255:
        case ALC269_TYPE_ALC256:
+       case ALC269_TYPE_ALC225:
                ssids = alc269_ssids;
                break;
        default:
@@ -3658,6 +3655,16 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
                WRITE_COEF(0xb7, 0x802b),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x4a, 1<<8, 0),
+               UPDATE_COEFEX(0x57, 0x05, 1<<14, 0),
+               UPDATE_COEF(0x63, 3<<14, 3<<14),
+               UPDATE_COEF(0x4a, 3<<4, 2<<4),
+               UPDATE_COEF(0x4a, 3<<10, 3<<10),
+               UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+               UPDATE_COEF(0x4a, 3<<10, 0),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3682,6 +3689,9 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0668);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to unplugged mode.\n");
 }
@@ -3727,6 +3737,13 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
                UPDATE_COEF(0xc3, 0, 1<<12),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEFEX(0x57, 0x05, 1<<14, 1<<14),
+               UPDATE_COEF(0x4a, 3<<4, 2<<4),
+               UPDATE_COEF(0x63, 3<<14, 0),
+               {}
+       };
+
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3772,12 +3789,22 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
                alc_process_coef_fw(codec, coef0688);
                snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
                break;
+       case 0x10ec0225:
+               alc_update_coef_idx(codec, 0x45, 0x3f<<10, 0x31<<10);
+               snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+               alc_process_coef_fw(codec, coef0225);
+               snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
+               break;
        }
        codec_dbg(codec, "Headset jack set to mic-in mode.\n");
 }
 
 static void alc_headset_mode_default(struct hda_codec *codec)
 {
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+               {}
+       };
        static struct coef_fw coef0255[] = {
                WRITE_COEF(0x45, 0xc089),
                WRITE_COEF(0x45, 0xc489),
@@ -3819,6 +3846,9 @@ static void alc_headset_mode_default(struct hda_codec *codec)
        };
 
        switch (codec->core.vendor_id) {
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        case 0x10ec0255:
        case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
@@ -3884,6 +3914,13 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0000),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x35<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               UPDATE_COEF(0x4a, 7<<6, 7<<6),
+               UPDATE_COEF(0x4a, 3<<4, 3<<4),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3912,6 +3949,9 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0688);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to iPhone-style headset mode.\n");
 }
@@ -3955,6 +3995,13 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0000),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x39<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               UPDATE_COEF(0x4a, 7<<6, 7<<6),
+               UPDATE_COEF(0x4a, 3<<4, 3<<4),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3983,6 +4030,9 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0688);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to Nokia-style headset mode.\n");
 }
@@ -4014,6 +4064,11 @@ static void alc_determine_headset_type(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0c00),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -4058,6 +4113,12 @@ static void alc_determine_headset_type(struct hda_codec *codec)
                val = alc_read_coef_idx(codec, 0xbe);
                is_ctia = (val & 0x1c02) == 0x1c02;
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               msleep(800);
+               val = alc_read_coef_idx(codec, 0x46);
+               is_ctia = (val & 0x00f0) == 0x00f0;
+               break;
        }
 
        codec_dbg(codec, "Headset jack detected iPhone-style headset: %s\n",
@@ -4695,6 +4756,9 @@ enum {
        ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
        ALC293_FIXUP_LENOVO_SPK_NOISE,
        ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
+       ALC255_FIXUP_DELL_SPK_NOISE,
+       ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+       ALC280_FIXUP_HP_HEADSET_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5314,6 +5378,29 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc233_fixup_lenovo_line2_mic_hotkey,
        },
+       [ALC255_FIXUP_DELL_SPK_NOISE] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_disable_aamix,
+               .chained = true,
+               .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
+       [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                       /* Disable pass-through path for FRONT 14h */
+                       { 0x20, AC_VERB_SET_COEF_INDEX, 0x36 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 },
+                       {}
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
+       [ALC280_FIXUP_HP_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_disable_aamix,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MIC,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5356,6 +5443,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
        SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
        SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+       SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -5416,6 +5504,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
        SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
        SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+       SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -5560,6 +5649,9 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
        {}
 };
+#define ALC225_STANDARD_PINS \
+       {0x12, 0xb7a60130}, \
+       {0x21, 0x04211020}
 
 #define ALC256_STANDARD_PINS \
        {0x12, 0x90a60140}, \
@@ -5581,6 +5673,12 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {0x21, 0x03211020}
 
 static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+       SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC225_STANDARD_PINS,
+               {0x14, 0x901701a0}),
+       SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC225_STANDARD_PINS,
+               {0x14, 0x901701b0}),
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
                {0x14, 0x90170110},
                {0x21, 0x02211020}),
@@ -5906,6 +6004,9 @@ static int patch_alc269(struct hda_codec *codec)
                spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
                alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
                break;
+       case 0x10ec0225:
+               spec->codec_variant = ALC269_TYPE_ALC225;
+               break;
        }
 
        if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
@@ -6566,6 +6667,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
        SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
+       SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
        SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
        SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
        SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),
@@ -6795,6 +6897,7 @@ static int patch_alc680(struct hda_codec *codec)
  */
 static const struct hda_device_id snd_hda_id_realtek[] = {
        HDA_CODEC_ENTRY(0x10ec0221, "ALC221", patch_alc269),
+       HDA_CODEC_ENTRY(0x10ec0225, "ALC225", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0231, "ALC231", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269),
index 2c7c5eb8b1e9514779b78bc278412803862062e9..37b70f8e878f715968c7496e01eeb493f8a2b7a4 100644 (file)
@@ -493,9 +493,9 @@ static void jack_update_power(struct hda_codec *codec,
        if (!spec->num_pwrs)
                return;
 
-       if (jack && jack->tbl->nid) {
-               stac_toggle_power_map(codec, jack->tbl->nid,
-                                     snd_hda_jack_detect(codec, jack->tbl->nid),
+       if (jack && jack->nid) {
+               stac_toggle_power_map(codec, jack->nid,
+                                     snd_hda_jack_detect(codec, jack->nid),
                                      true);
                return;
        }
index 3191e0a7d273213515ccd3e049e66c4f6ddc5f67..d1fb035f44db8fd7026d97e1148f0c09eb4d1679 100644 (file)
@@ -635,6 +635,7 @@ static int acp_dma_open(struct snd_pcm_substream *substream)
                                            SNDRV_PCM_HW_PARAM_PERIODS);
        if (ret < 0) {
                dev_err(prtd->platform->dev, "set integer constraint failed\n");
+               kfree(adata);
                return ret;
        }
 
index 33143fe1de0bdeaa0c6b04bc9bfd159b49bf658b..91785318b2834f325b2ebfb55476ce591b5e377b 100644 (file)
@@ -1929,6 +1929,25 @@ static struct {
        { 1000000, 13500000, 0,  1 },
 };
 
+static const unsigned int pseudo_fref_max[ARIZONA_FLL_MAX_FRATIO] = {
+       13500000,
+        6144000,
+        6144000,
+        3072000,
+        3072000,
+        2822400,
+        2822400,
+        1536000,
+        1536000,
+        1536000,
+        1536000,
+        1536000,
+        1536000,
+        1536000,
+        1536000,
+         768000,
+};
+
 static struct {
        unsigned int min;
        unsigned int max;
@@ -2042,16 +2061,32 @@ static int arizona_calc_fratio(struct arizona_fll *fll,
        /* Adjust FRATIO/refdiv to avoid integer mode if possible */
        refdiv = cfg->refdiv;
 
+       arizona_fll_dbg(fll, "pseudo: initial ratio=%u fref=%u refdiv=%u\n",
+                       init_ratio, Fref, refdiv);
+
        while (div <= ARIZONA_FLL_MAX_REFDIV) {
                for (ratio = init_ratio; ratio <= ARIZONA_FLL_MAX_FRATIO;
                     ratio++) {
                        if ((ARIZONA_FLL_VCO_CORNER / 2) /
-                           (fll->vco_mult * ratio) < Fref)
+                           (fll->vco_mult * ratio) < Fref) {
+                               arizona_fll_dbg(fll, "pseudo: hit VCO corner\n");
                                break;
+                       }
+
+                       if (Fref > pseudo_fref_max[ratio - 1]) {
+                               arizona_fll_dbg(fll,
+                                       "pseudo: exceeded max fref(%u) for ratio=%u\n",
+                                       pseudo_fref_max[ratio - 1],
+                                       ratio);
+                               break;
+                       }
 
                        if (target % (ratio * Fref)) {
                                cfg->refdiv = refdiv;
                                cfg->fratio = ratio - 1;
+                               arizona_fll_dbg(fll,
+                                       "pseudo: found fref=%u refdiv=%d(%d) ratio=%d\n",
+                                       Fref, refdiv, div, ratio);
                                return ratio;
                        }
                }
@@ -2060,6 +2095,9 @@ static int arizona_calc_fratio(struct arizona_fll *fll,
                        if (target % (ratio * Fref)) {
                                cfg->refdiv = refdiv;
                                cfg->fratio = ratio - 1;
+                               arizona_fll_dbg(fll,
+                                       "pseudo: found fref=%u refdiv=%d(%d) ratio=%d\n",
+                                       Fref, refdiv, div, ratio);
                                return ratio;
                        }
                }
@@ -2068,6 +2106,9 @@ static int arizona_calc_fratio(struct arizona_fll *fll,
                Fref /= 2;
                refdiv++;
                init_ratio = arizona_find_fratio(Fref, NULL);
+               arizona_fll_dbg(fll,
+                               "pseudo: change fref=%u refdiv=%d(%d) ratio=%u\n",
+                               Fref, refdiv, div, init_ratio);
        }
 
        arizona_fll_warn(fll, "Falling back to integer mode operation\n");
index bc08f0c5a5f69fe4a184ac86e611e4df56eabb5c..1bd31644a782eac26042fccf47bfbc65e380b63f 100644 (file)
@@ -266,6 +266,8 @@ static int rt286_jack_detect(struct rt286_priv *rt286, bool *hp, bool *mic)
                } else {
                        *mic = false;
                        regmap_write(rt286->regmap, RT286_SET_MIC1, 0x20);
+                       regmap_update_bits(rt286->regmap,
+                               RT286_CBJ_CTRL1, 0x0400, 0x0000);
                }
        } else {
                regmap_read(rt286->regmap, RT286_GET_HP_SENSE, &buf);
@@ -470,24 +472,6 @@ static int rt286_set_dmic1_event(struct snd_soc_dapm_widget *w,
        return 0;
 }
 
-static int rt286_vref_event(struct snd_soc_dapm_widget *w,
-                            struct snd_kcontrol *kcontrol, int event)
-{
-       struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm);
-
-       switch (event) {
-       case SND_SOC_DAPM_PRE_PMU:
-               snd_soc_update_bits(codec,
-                       RT286_CBJ_CTRL1, 0x0400, 0x0000);
-               mdelay(50);
-               break;
-       default:
-               return 0;
-       }
-
-       return 0;
-}
-
 static int rt286_ldo2_event(struct snd_soc_dapm_widget *w,
                             struct snd_kcontrol *kcontrol, int event)
 {
@@ -536,7 +520,7 @@ static const struct snd_soc_dapm_widget rt286_dapm_widgets[] = {
        SND_SOC_DAPM_SUPPLY_S("HV", 1, RT286_POWER_CTRL1,
                12, 1, NULL, 0),
        SND_SOC_DAPM_SUPPLY("VREF", RT286_POWER_CTRL1,
-               0, 1, rt286_vref_event, SND_SOC_DAPM_PRE_PMU),
+               0, 1, NULL, 0),
        SND_SOC_DAPM_SUPPLY_S("LDO1", 1, RT286_POWER_CTRL2,
                2, 0, NULL, 0),
        SND_SOC_DAPM_SUPPLY_S("LDO2", 2, RT286_POWER_CTRL1,
@@ -910,8 +894,6 @@ static int rt286_set_bias_level(struct snd_soc_codec *codec,
 
        case SND_SOC_BIAS_ON:
                mdelay(10);
-               snd_soc_update_bits(codec,
-                       RT286_CBJ_CTRL1, 0x0400, 0x0400);
                snd_soc_update_bits(codec,
                        RT286_DC_GAIN, 0x200, 0x0);
 
@@ -920,8 +902,6 @@ static int rt286_set_bias_level(struct snd_soc_codec *codec,
        case SND_SOC_BIAS_STANDBY:
                snd_soc_write(codec,
                        RT286_SET_AUDIO_POWER, AC_PWRST_D3);
-               snd_soc_update_bits(codec,
-                       RT286_CBJ_CTRL1, 0x0400, 0x0000);
                break;
 
        default:
index c61d38b585fb06de6b6ca0f41370c3b78c10113c..93e8c9017633f97ed264cd9e54c4aaab606d54f3 100644 (file)
@@ -776,7 +776,7 @@ static const struct snd_kcontrol_new rt5645_snd_controls[] = {
 
        /* IN1/IN2 Control */
        SOC_SINGLE_TLV("IN1 Boost", RT5645_IN1_CTRL1,
-               RT5645_BST_SFT1, 8, 0, bst_tlv),
+               RT5645_BST_SFT1, 12, 0, bst_tlv),
        SOC_SINGLE_TLV("IN2 Boost", RT5645_IN2_CTRL,
                RT5645_BST_SFT2, 8, 0, bst_tlv),
 
index 820d8fa62b5e5682fcdf52c823fa70f2f8b27631..fb8ea05c0de1d9ddb60134f68c1dc3b180a39c29 100644 (file)
@@ -3985,7 +3985,6 @@ static int rt5659_i2c_probe(struct i2c_client *i2c,
        if (rt5659 == NULL)
                return -ENOMEM;
 
-       rt5659->i2c = i2c;
        i2c_set_clientdata(i2c, rt5659);
 
        if (pdata)
@@ -4157,24 +4156,17 @@ static int rt5659_i2c_probe(struct i2c_client *i2c,
 
        INIT_DELAYED_WORK(&rt5659->jack_detect_work, rt5659_jack_detect_work);
 
-       if (rt5659->i2c->irq) {
-               ret = request_threaded_irq(rt5659->i2c->irq, NULL, rt5659_irq,
-                       IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
+       if (i2c->irq) {
+               ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+                       rt5659_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
                        | IRQF_ONESHOT, "rt5659", rt5659);
                if (ret)
                        dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret);
 
        }
 
-       ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_rt5659,
+       return snd_soc_register_codec(&i2c->dev, &soc_codec_dev_rt5659,
                        rt5659_dai, ARRAY_SIZE(rt5659_dai));
-
-       if (ret) {
-               if (rt5659->i2c->irq)
-                       free_irq(rt5659->i2c->irq, rt5659);
-       }
-
-       return 0;
 }
 
 static int rt5659_i2c_remove(struct i2c_client *i2c)
@@ -4191,24 +4183,29 @@ void rt5659_i2c_shutdown(struct i2c_client *client)
        regmap_write(rt5659->regmap, RT5659_RESET, 0);
 }
 
+#ifdef CONFIG_OF
 static const struct of_device_id rt5659_of_match[] = {
        { .compatible = "realtek,rt5658", },
        { .compatible = "realtek,rt5659", },
-       {},
+       { },
 };
+MODULE_DEVICE_TABLE(of, rt5659_of_match);
+#endif
 
+#ifdef CONFIG_ACPI
 static struct acpi_device_id rt5659_acpi_match[] = {
-               { "10EC5658", 0},
-               { "10EC5659", 0},
-               { },
+       { "10EC5658", 0, },
+       { "10EC5659", 0, },
+       { },
 };
 MODULE_DEVICE_TABLE(acpi, rt5659_acpi_match);
+#endif
 
 struct i2c_driver rt5659_i2c_driver = {
        .driver = {
                .name = "rt5659",
                .owner = THIS_MODULE,
-               .of_match_table = rt5659_of_match,
+               .of_match_table = of_match_ptr(rt5659_of_match),
                .acpi_match_table = ACPI_PTR(rt5659_acpi_match),
        },
        .probe = rt5659_i2c_probe,
index 8f07ee903eaadf29a769f71b9c219cf9ac5bb3b6..d31c9e5bcec8adf93731532b1ef036da70ac959c 100644 (file)
@@ -1792,7 +1792,6 @@ struct rt5659_priv {
        struct snd_soc_codec *codec;
        struct rt5659_platform_data pdata;
        struct regmap *regmap;
-       struct i2c_client *i2c;
        struct gpio_desc *gpiod_ldo1_en;
        struct gpio_desc *gpiod_reset;
        struct snd_soc_jack *hs_jack;
index 21ca3a5e9f6603299f15a9bd5da3b64fa93e0f95..d374c18d4db7f9939fbdc3a5891bbdcb4cbe9e76 100644 (file)
@@ -31,7 +31,10 @@ static int sigmadsp_write_i2c(void *control_data,
 
        kfree(buf);
 
-       return ret;
+       if (ret < 0)
+               return ret;
+
+       return 0;
 }
 
 static int sigmadsp_read_i2c(void *control_data,
index 6088d30962a953cab19ee561ee288cca142dfccf..97c0f1e2388637dcdc686da1f1a5cf06037a6944 100644 (file)
@@ -2382,6 +2382,7 @@ error:
 
 static int wm5110_remove(struct platform_device *pdev)
 {
+       snd_soc_unregister_platform(&pdev->dev);
        snd_soc_unregister_codec(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
 
index ff237726775a16a4e66b8b104aaa878d9f187dab..d7f444f874604d7d8a6df4a05aa634fe5448fdfd 100644 (file)
@@ -240,13 +240,13 @@ SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL,
 SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL,
        7, 1, 1),
 
-SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume",
+SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT3 Volume",
               WM8960_INBMIX1, 4, 7, 0, lineinboost_tlv),
-SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT2 Volume",
+SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT2 Volume",
               WM8960_INBMIX1, 1, 7, 0, lineinboost_tlv),
-SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT3 Volume",
+SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume",
               WM8960_INBMIX2, 4, 7, 0, lineinboost_tlv),
-SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT2 Volume",
+SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT2 Volume",
               WM8960_INBMIX2, 1, 7, 0, lineinboost_tlv),
 SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT1 Volume",
                WM8960_RINPATH, 4, 3, 0, micboost_tlv),
@@ -643,29 +643,31 @@ static int wm8960_configure_clocking(struct snd_soc_codec *codec)
                return -EINVAL;
        }
 
-       /* check if the sysclk frequency is available. */
-       for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) {
-               if (sysclk_divs[i] == -1)
-                       continue;
-               sysclk = freq_out / sysclk_divs[i];
-               for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) {
-                       if (sysclk == dac_divs[j] * lrclk) {
+       if (wm8960->clk_id != WM8960_SYSCLK_PLL) {
+               /* check if the sysclk frequency is available. */
+               for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) {
+                       if (sysclk_divs[i] == -1)
+                               continue;
+                       sysclk = freq_out / sysclk_divs[i];
+                       for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) {
+                               if (sysclk != dac_divs[j] * lrclk)
+                                       continue;
                                for (k = 0; k < ARRAY_SIZE(bclk_divs); ++k)
                                        if (sysclk == bclk * bclk_divs[k] / 10)
                                                break;
                                if (k != ARRAY_SIZE(bclk_divs))
                                        break;
                        }
+                       if (j != ARRAY_SIZE(dac_divs))
+                               break;
                }
-               if (j != ARRAY_SIZE(dac_divs))
-                       break;
-       }
 
-       if (i != ARRAY_SIZE(sysclk_divs)) {
-               goto configure_clock;
-       } else if (wm8960->clk_id != WM8960_SYSCLK_AUTO) {
-               dev_err(codec->dev, "failed to configure clock\n");
-               return -EINVAL;
+               if (i != ARRAY_SIZE(sysclk_divs)) {
+                       goto configure_clock;
+               } else if (wm8960->clk_id != WM8960_SYSCLK_AUTO) {
+                       dev_err(codec->dev, "failed to configure clock\n");
+                       return -EINVAL;
+               }
        }
        /* get a available pll out frequency and set pll */
        for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) {
index ce664c239be32fda4d269aea1722bb14631e3b22..bff258d7bcea1f380403df5b9380ac7c9aabf835 100644 (file)
@@ -645,6 +645,8 @@ static int dw_i2s_probe(struct platform_device *pdev)
 
        dev->dev = &pdev->dev;
 
+       dev->i2s_reg_comp1 = I2S_COMP_PARAM_1;
+       dev->i2s_reg_comp2 = I2S_COMP_PARAM_2;
        if (pdata) {
                dev->capability = pdata->cap;
                clk_id = NULL;
@@ -652,9 +654,6 @@ static int dw_i2s_probe(struct platform_device *pdev)
                if (dev->quirks & DW_I2S_QUIRK_COMP_REG_OFFSET) {
                        dev->i2s_reg_comp1 = pdata->i2s_reg_comp1;
                        dev->i2s_reg_comp2 = pdata->i2s_reg_comp2;
-               } else {
-                       dev->i2s_reg_comp1 = I2S_COMP_PARAM_1;
-                       dev->i2s_reg_comp2 = I2S_COMP_PARAM_2;
                }
                ret = dw_configure_dai_by_pd(dev, dw_i2s_dai, res, pdata);
        } else {
index 40dfd8a3648408a2cc76bb6e4c4c3872c8e28ac3..ed8de1035cda159d0d186f2cded0fb7a97fbceb4 100644 (file)
@@ -112,20 +112,6 @@ struct fsl_ssi_rxtx_reg_val {
        struct fsl_ssi_reg_val tx;
 };
 
-static const struct reg_default fsl_ssi_reg_defaults[] = {
-       {CCSR_SSI_SCR,     0x00000000},
-       {CCSR_SSI_SIER,    0x00003003},
-       {CCSR_SSI_STCR,    0x00000200},
-       {CCSR_SSI_SRCR,    0x00000200},
-       {CCSR_SSI_STCCR,   0x00040000},
-       {CCSR_SSI_SRCCR,   0x00040000},
-       {CCSR_SSI_SACNT,   0x00000000},
-       {CCSR_SSI_STMSK,   0x00000000},
-       {CCSR_SSI_SRMSK,   0x00000000},
-       {CCSR_SSI_SACCEN,  0x00000000},
-       {CCSR_SSI_SACCDIS, 0x00000000},
-};
-
 static bool fsl_ssi_readable_reg(struct device *dev, unsigned int reg)
 {
        switch (reg) {
@@ -190,8 +176,7 @@ static const struct regmap_config fsl_ssi_regconfig = {
        .val_bits = 32,
        .reg_stride = 4,
        .val_format_endian = REGMAP_ENDIAN_NATIVE,
-       .reg_defaults = fsl_ssi_reg_defaults,
-       .num_reg_defaults = ARRAY_SIZE(fsl_ssi_reg_defaults),
+       .num_reg_defaults_raw = CCSR_SSI_SACCDIS / sizeof(uint32_t) + 1,
        .readable_reg = fsl_ssi_readable_reg,
        .volatile_reg = fsl_ssi_volatile_reg,
        .precious_reg = fsl_ssi_precious_reg,
@@ -201,6 +186,7 @@ static const struct regmap_config fsl_ssi_regconfig = {
 
 struct fsl_ssi_soc_data {
        bool imx;
+       bool imx21regs; /* imx21-class SSI - no SACC{ST,EN,DIS} regs */
        bool offline_config;
        u32 sisr_write_mask;
 };
@@ -303,6 +289,7 @@ static struct fsl_ssi_soc_data fsl_ssi_mpc8610 = {
 
 static struct fsl_ssi_soc_data fsl_ssi_imx21 = {
        .imx = true,
+       .imx21regs = true,
        .offline_config = true,
        .sisr_write_mask = 0,
 };
@@ -586,8 +573,12 @@ static void fsl_ssi_setup_ac97(struct fsl_ssi_private *ssi_private)
         */
        regmap_write(regs, CCSR_SSI_SACNT,
                        CCSR_SSI_SACNT_AC97EN | CCSR_SSI_SACNT_FV);
-       regmap_write(regs, CCSR_SSI_SACCDIS, 0xff);
-       regmap_write(regs, CCSR_SSI_SACCEN, 0x300);
+
+       /* no SACC{ST,EN,DIS} regs on imx21-class SSI */
+       if (!ssi_private->soc->imx21regs) {
+               regmap_write(regs, CCSR_SSI_SACCDIS, 0xff);
+               regmap_write(regs, CCSR_SSI_SACCEN, 0x300);
+       }
 
        /*
         * Enable SSI, Transmit and Receive. AC97 has to communicate with the
@@ -1397,6 +1388,7 @@ static int fsl_ssi_probe(struct platform_device *pdev)
        struct resource *res;
        void __iomem *iomem;
        char name[64];
+       struct regmap_config regconfig = fsl_ssi_regconfig;
 
        of_id = of_match_device(fsl_ssi_ids, &pdev->dev);
        if (!of_id || !of_id->data)
@@ -1444,15 +1436,25 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                return PTR_ERR(iomem);
        ssi_private->ssi_phys = res->start;
 
+       if (ssi_private->soc->imx21regs) {
+               /*
+                * According to datasheet imx21-class SSI
+                * don't have SACC{ST,EN,DIS} regs.
+                */
+               regconfig.max_register = CCSR_SSI_SRMSK;
+               regconfig.num_reg_defaults_raw =
+                       CCSR_SSI_SRMSK / sizeof(uint32_t) + 1;
+       }
+
        ret = of_property_match_string(np, "clock-names", "ipg");
        if (ret < 0) {
                ssi_private->has_ipg_clk_name = false;
                ssi_private->regs = devm_regmap_init_mmio(&pdev->dev, iomem,
-                       &fsl_ssi_regconfig);
+                       &regconfig);
        } else {
                ssi_private->has_ipg_clk_name = true;
                ssi_private->regs = devm_regmap_init_mmio_clk(&pdev->dev,
-                       "ipg", iomem, &fsl_ssi_regconfig);
+                       "ipg", iomem, &regconfig);
        }
        if (IS_ERR(ssi_private->regs)) {
                dev_err(&pdev->dev, "Failed to init register map\n");
index a407e833c612523e90c07a732e9cc472b64252ee..fb896b2c9ba32a058eb097d97e7bd8732667ca47 100644 (file)
@@ -72,8 +72,6 @@ static int imx_spdif_audio_probe(struct platform_device *pdev)
                goto end;
        }
 
-       platform_set_drvdata(pdev, data);
-
 end:
        of_node_put(spdif_np);
 
index 1ded8811598ef4b05333e7b8e2204f72f0b8e142..2389ab47e25f68265c720ba3f4c4b52e5c321e59 100644 (file)
@@ -99,7 +99,7 @@ static int asoc_simple_card_hw_params(struct snd_pcm_substream *substream,
                if (ret && ret != -ENOTSUPP)
                        goto err;
        }
-
+       return 0;
 err:
        return ret;
 }
index 803f95e40679def6a185c3131f838b210914b6c2..7d7c872c280dbd62b0b1b66209e1e1cfe75791ee 100644 (file)
@@ -30,11 +30,15 @@ config SND_SST_IPC_ACPI
 config SND_SOC_INTEL_SST
        tristate
        select SND_SOC_INTEL_SST_ACPI if ACPI
+       select SND_SOC_INTEL_SST_MATCH if ACPI
        depends on (X86 || COMPILE_TEST)
 
 config SND_SOC_INTEL_SST_ACPI
        tristate
 
+config SND_SOC_INTEL_SST_MATCH
+       tristate
+
 config SND_SOC_INTEL_HASWELL
        tristate
 
@@ -57,7 +61,7 @@ config SND_SOC_INTEL_HASWELL_MACH
 config SND_SOC_INTEL_BYT_RT5640_MACH
        tristate "ASoC Audio driver for Intel Baytrail with RT5640 codec"
        depends on X86_INTEL_LPSS && I2C
-       depends on DW_DMAC_CORE=y && (SND_SOC_INTEL_BYTCR_RT5640_MACH = n)
+       depends on DW_DMAC_CORE=y && (SND_SST_IPC_ACPI = n)
        select SND_SOC_INTEL_SST
        select SND_SOC_INTEL_BAYTRAIL
        select SND_SOC_RT5640
@@ -69,7 +73,7 @@ config SND_SOC_INTEL_BYT_RT5640_MACH
 config SND_SOC_INTEL_BYT_MAX98090_MACH
        tristate "ASoC Audio driver for Intel Baytrail with MAX98090 codec"
        depends on X86_INTEL_LPSS && I2C
-       depends on DW_DMAC_CORE=y
+       depends on DW_DMAC_CORE=y && (SND_SST_IPC_ACPI = n)
        select SND_SOC_INTEL_SST
        select SND_SOC_INTEL_BAYTRAIL
        select SND_SOC_MAX98090
@@ -97,6 +101,7 @@ config SND_SOC_INTEL_BYTCR_RT5640_MACH
        select SND_SOC_RT5640
        select SND_SST_MFLD_PLATFORM
        select SND_SST_IPC_ACPI
+       select SND_SOC_INTEL_SST_MATCH if ACPI
        help
           This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR
           platforms with RT5640 audio codec.
@@ -109,6 +114,7 @@ config SND_SOC_INTEL_BYTCR_RT5651_MACH
        select SND_SOC_RT5651
        select SND_SST_MFLD_PLATFORM
        select SND_SST_IPC_ACPI
+       select SND_SOC_INTEL_SST_MATCH if ACPI
        help
           This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR
           platforms with RT5651 audio codec.
@@ -121,6 +127,7 @@ config SND_SOC_INTEL_CHT_BSW_RT5672_MACH
         select SND_SOC_RT5670
         select SND_SST_MFLD_PLATFORM
         select SND_SST_IPC_ACPI
+       select SND_SOC_INTEL_SST_MATCH if ACPI
         help
           This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell
           platforms with RT5672 audio codec.
@@ -133,6 +140,7 @@ config SND_SOC_INTEL_CHT_BSW_RT5645_MACH
        select SND_SOC_RT5645
        select SND_SST_MFLD_PLATFORM
        select SND_SST_IPC_ACPI
+       select SND_SOC_INTEL_SST_MATCH if ACPI
        help
          This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell
          platforms with RT5645/5650 audio codec.
@@ -145,6 +153,7 @@ config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH
        select SND_SOC_TS3A227E
        select SND_SST_MFLD_PLATFORM
        select SND_SST_IPC_ACPI
+       select SND_SOC_INTEL_SST_MATCH if ACPI
        help
       This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell
       platforms with MAX98090 audio codec it also can support TI jack chip as aux device.
index 55c33dc76ce44e2bd3d24de843ff552bb9bfaa30..52ed434cbca6a9e0a08ebc64bb8ee84111f479d9 100644 (file)
@@ -528,6 +528,7 @@ static struct snd_soc_dai_driver sst_platform_dai[] = {
        .ops = &sst_compr_dai_ops,
        .playback = {
                .stream_name = "Compress Playback",
+               .channels_min = 1,
        },
 },
 /* BE CPU  Dais */
index 7396ddb427d8f95a3491d144f597fcb9f2e382d3..2cbcbe4126611d0d1660d86279329da5bec504b1 100644 (file)
@@ -212,7 +212,10 @@ static int skylake_dmic_fixup(struct snd_soc_pcm_runtime *rtd,
 {
        struct snd_interval *channels = hw_param_interval(params,
                                                SNDRV_PCM_HW_PARAM_CHANNELS);
-       channels->min = channels->max = 4;
+       if (params_channels(params) == 2)
+               channels->min = channels->max = 2;
+       else
+               channels->min = channels->max = 4;
 
        return 0;
 }
index 668fdeee195e2f0343047c41b0b5210d5bdc4c07..fbbb25c2ceed2949bd909f4b2e2e152154493227 100644 (file)
@@ -1,13 +1,10 @@
 snd-soc-sst-dsp-objs := sst-dsp.o
-ifneq ($(CONFIG_SND_SST_IPC_ACPI),)
-snd-soc-sst-acpi-objs := sst-match-acpi.o
-else
-snd-soc-sst-acpi-objs := sst-acpi.o sst-match-acpi.o
-endif
-
+snd-soc-sst-acpi-objs := sst-acpi.o
+snd-soc-sst-match-objs := sst-match-acpi.o
 snd-soc-sst-ipc-objs := sst-ipc.o
 
 snd-soc-sst-dsp-$(CONFIG_DW_DMAC_CORE) += sst-firmware.o
 
 obj-$(CONFIG_SND_SOC_INTEL_SST) += snd-soc-sst-dsp.o snd-soc-sst-ipc.o
 obj-$(CONFIG_SND_SOC_INTEL_SST_ACPI) += snd-soc-sst-acpi.o
+obj-$(CONFIG_SND_SOC_INTEL_SST_MATCH) += snd-soc-sst-match.o
index 7a85c576dad33575efed4ca9eabb1ccf64970383..2c5eda14d51070947accc019b5d212e9adf0e6e3 100644 (file)
@@ -215,6 +215,7 @@ static struct sst_acpi_desc sst_acpi_broadwell_desc = {
        .dma_size = SST_LPT_DSP_DMA_SIZE,
 };
 
+#if !IS_ENABLED(CONFIG_SND_SST_IPC_ACPI)
 static struct sst_acpi_mach baytrail_machines[] = {
        { "10EC5640", "byt-rt5640", "intel/fw_sst_0f28.bin-48kHz_i2s_master", NULL, NULL, NULL },
        { "193C9890", "byt-max98090", "intel/fw_sst_0f28.bin-48kHz_i2s_master", NULL, NULL, NULL },
@@ -231,11 +232,14 @@ static struct sst_acpi_desc sst_acpi_baytrail_desc = {
        .sst_id = SST_DEV_ID_BYT,
        .resindex_dma_base = -1,
 };
+#endif
 
 static const struct acpi_device_id sst_acpi_match[] = {
        { "INT33C8", (unsigned long)&sst_acpi_haswell_desc },
        { "INT3438", (unsigned long)&sst_acpi_broadwell_desc },
+#if !IS_ENABLED(CONFIG_SND_SST_IPC_ACPI)
        { "80860F28", (unsigned long)&sst_acpi_baytrail_desc },
+#endif
        { }
 };
 MODULE_DEVICE_TABLE(acpi, sst_acpi_match);
index dd077e116d259b6b60f509f2b3cc10b020a2cb12..3b4539d21492484ac2ba15cec78a36f4e135aee6 100644 (file)
@@ -41,3 +41,6 @@ struct sst_acpi_mach *sst_acpi_find_machine(struct sst_acpi_mach *machines)
        return NULL;
 }
 EXPORT_SYMBOL_GPL(sst_acpi_find_machine);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
index de6dac496a0d8b611d4662864597e58ee20330f3..4629372d7c8e0b728c9ec0467ae34208532fa48b 100644 (file)
@@ -688,14 +688,14 @@ int skl_unbind_modules(struct skl_sst *ctx,
        /* get src queue index */
        src_index = skl_get_queue_index(src_mcfg->m_out_pin, dst_id, out_max);
        if (src_index < 0)
-               return -EINVAL;
+               return 0;
 
        msg.src_queue = src_index;
 
        /* get dst queue index */
        dst_index  = skl_get_queue_index(dst_mcfg->m_in_pin, src_id, in_max);
        if (dst_index < 0)
-               return -EINVAL;
+               return 0;
 
        msg.dst_queue = dst_index;
 
@@ -747,7 +747,7 @@ int skl_bind_modules(struct skl_sst *ctx,
 
        skl_dump_bind_info(ctx, src_mcfg, dst_mcfg);
 
-       if (src_mcfg->m_state < SKL_MODULE_INIT_DONE &&
+       if (src_mcfg->m_state < SKL_MODULE_INIT_DONE ||
                dst_mcfg->m_state < SKL_MODULE_INIT_DONE)
                return 0;
 
index f3553258091a2bd7a78f2485aaee7c46fc667758..b6e6b61d10ec22349650ee5c1db3074068a180e4 100644 (file)
@@ -863,6 +863,7 @@ static int skl_get_delay_from_lpib(struct hdac_ext_bus *ebus,
                else
                        delay += hstream->bufsize;
        }
+       delay = (hstream->bufsize == delay) ? 0 : delay;
 
        if (delay >= hstream->period_bytes) {
                dev_info(bus->dev,
index 4624556f486de34c396a327a0d279a57a1fdbf96..a294fee431f07363f965a81b4c9ef42eb3a42f58 100644 (file)
@@ -54,12 +54,9 @@ static int is_skl_dsp_widget_type(struct snd_soc_dapm_widget *w)
 
 /*
  * Each pipelines needs memory to be allocated. Check if we have free memory
- * from available pool. Then only add this to pool
- * This is freed when pipe is deleted
- * Note: DSP does actual memory management we only keep track for complete
- * pool
+ * from available pool.
  */
-static bool skl_tplg_alloc_pipe_mem(struct skl *skl,
+static bool skl_is_pipe_mem_avail(struct skl *skl,
                                struct skl_module_cfg *mconfig)
 {
        struct skl_sst *ctx = skl->skl_sst;
@@ -74,10 +71,20 @@ static bool skl_tplg_alloc_pipe_mem(struct skl *skl,
                                "exceeds ppl memory available %d mem %d\n",
                                skl->resource.max_mem, skl->resource.mem);
                return false;
+       } else {
+               return true;
        }
+}
 
+/*
+ * Add the mem to the mem pool. This is freed when pipe is deleted.
+ * Note: DSP does actual memory management we only keep track for complete
+ * pool
+ */
+static void skl_tplg_alloc_pipe_mem(struct skl *skl,
+                               struct skl_module_cfg *mconfig)
+{
        skl->resource.mem += mconfig->pipe->memory_pages;
-       return true;
 }
 
 /*
@@ -85,10 +92,10 @@ static bool skl_tplg_alloc_pipe_mem(struct skl *skl,
  * quantified in MCPS (Million Clocks Per Second) required for module/pipe
  *
  * Each pipelines needs mcps to be allocated. Check if we have mcps for this
- * pipe. This adds the mcps to driver counter
- * This is removed on pipeline delete
+ * pipe.
  */
-static bool skl_tplg_alloc_pipe_mcps(struct skl *skl,
+
+static bool skl_is_pipe_mcps_avail(struct skl *skl,
                                struct skl_module_cfg *mconfig)
 {
        struct skl_sst *ctx = skl->skl_sst;
@@ -98,13 +105,18 @@ static bool skl_tplg_alloc_pipe_mcps(struct skl *skl,
                        "%s: module_id %d instance %d\n", __func__,
                        mconfig->id.module_id, mconfig->id.instance_id);
                dev_err(ctx->dev,
-                       "exceeds ppl memory available %d > mem %d\n",
+                       "exceeds ppl mcps available %d > mem %d\n",
                        skl->resource.max_mcps, skl->resource.mcps);
                return false;
+       } else {
+               return true;
        }
+}
 
+static void skl_tplg_alloc_pipe_mcps(struct skl *skl,
+                               struct skl_module_cfg *mconfig)
+{
        skl->resource.mcps += mconfig->mcps;
-       return true;
 }
 
 /*
@@ -411,7 +423,7 @@ skl_tplg_init_pipe_modules(struct skl *skl, struct skl_pipe *pipe)
                mconfig = w->priv;
 
                /* check resource available */
-               if (!skl_tplg_alloc_pipe_mcps(skl, mconfig))
+               if (!skl_is_pipe_mcps_avail(skl, mconfig))
                        return -ENOMEM;
 
                if (mconfig->is_loadable && ctx->dsp->fw_ops.load_mod) {
@@ -435,6 +447,7 @@ skl_tplg_init_pipe_modules(struct skl *skl, struct skl_pipe *pipe)
                ret = skl_tplg_set_module_params(w, ctx);
                if (ret < 0)
                        return ret;
+               skl_tplg_alloc_pipe_mcps(skl, mconfig);
        }
 
        return 0;
@@ -477,10 +490,10 @@ static int skl_tplg_mixer_dapm_pre_pmu_event(struct snd_soc_dapm_widget *w,
        struct skl_sst *ctx = skl->skl_sst;
 
        /* check resource available */
-       if (!skl_tplg_alloc_pipe_mcps(skl, mconfig))
+       if (!skl_is_pipe_mcps_avail(skl, mconfig))
                return -EBUSY;
 
-       if (!skl_tplg_alloc_pipe_mem(skl, mconfig))
+       if (!skl_is_pipe_mem_avail(skl, mconfig))
                return -ENOMEM;
 
        /*
@@ -526,11 +539,15 @@ static int skl_tplg_mixer_dapm_pre_pmu_event(struct snd_soc_dapm_widget *w,
                src_module = dst_module;
        }
 
+       skl_tplg_alloc_pipe_mem(skl, mconfig);
+       skl_tplg_alloc_pipe_mcps(skl, mconfig);
+
        return 0;
 }
 
 static int skl_tplg_bind_sinks(struct snd_soc_dapm_widget *w,
                                struct skl *skl,
+                               struct snd_soc_dapm_widget *src_w,
                                struct skl_module_cfg *src_mconfig)
 {
        struct snd_soc_dapm_path *p;
@@ -547,6 +564,10 @@ static int skl_tplg_bind_sinks(struct snd_soc_dapm_widget *w,
                dev_dbg(ctx->dev, "%s: sink widget=%s\n", __func__, p->sink->name);
 
                next_sink = p->sink;
+
+               if (!is_skl_dsp_widget_type(p->sink))
+                       return skl_tplg_bind_sinks(p->sink, skl, src_w, src_mconfig);
+
                /*
                 * here we will check widgets in sink pipelines, so that
                 * can be any widgets type and we are only interested if
@@ -576,7 +597,7 @@ static int skl_tplg_bind_sinks(struct snd_soc_dapm_widget *w,
        }
 
        if (!sink)
-               return skl_tplg_bind_sinks(next_sink, skl, src_mconfig);
+               return skl_tplg_bind_sinks(next_sink, skl, src_w, src_mconfig);
 
        return 0;
 }
@@ -605,7 +626,7 @@ static int skl_tplg_pga_dapm_pre_pmu_event(struct snd_soc_dapm_widget *w,
         * if sink is not started, start sink pipe first, then start
         * this pipe
         */
-       ret = skl_tplg_bind_sinks(w, skl, src_mconfig);
+       ret = skl_tplg_bind_sinks(w, skl, w, src_mconfig);
        if (ret)
                return ret;
 
@@ -773,10 +794,7 @@ static int skl_tplg_mixer_dapm_post_pmd_event(struct snd_soc_dapm_widget *w,
                        continue;
                }
 
-               ret = skl_unbind_modules(ctx, src_module, dst_module);
-               if (ret < 0)
-                       return ret;
-
+               skl_unbind_modules(ctx, src_module, dst_module);
                src_module = dst_module;
        }
 
@@ -814,9 +832,6 @@ static int skl_tplg_pga_dapm_post_pmd_event(struct snd_soc_dapm_widget *w,
                         * This is a connecter and if path is found that means
                         * unbind between source and sink has not happened yet
                         */
-                       ret = skl_stop_pipe(ctx, sink_mconfig->pipe);
-                       if (ret < 0)
-                               return ret;
                        ret = skl_unbind_modules(ctx, src_mconfig,
                                                        sink_mconfig);
                }
@@ -842,6 +857,12 @@ static int skl_tplg_vmixer_event(struct snd_soc_dapm_widget *w,
        case SND_SOC_DAPM_PRE_PMU:
                return skl_tplg_mixer_dapm_pre_pmu_event(w, skl);
 
+       case SND_SOC_DAPM_POST_PMU:
+               return skl_tplg_mixer_dapm_post_pmu_event(w, skl);
+
+       case SND_SOC_DAPM_PRE_PMD:
+               return skl_tplg_mixer_dapm_pre_pmd_event(w, skl);
+
        case SND_SOC_DAPM_POST_PMD:
                return skl_tplg_mixer_dapm_post_pmd_event(w, skl);
        }
@@ -916,6 +937,13 @@ static int skl_tplg_tlv_control_get(struct snd_kcontrol *kcontrol,
                skl_get_module_params(skl->skl_sst, (u32 *)bc->params,
                                      bc->max, bc->param_id, mconfig);
 
+       /* decrement size for TLV header */
+       size -= 2 * sizeof(u32);
+
+       /* check size as we don't want to send kernel data */
+       if (size > bc->max)
+               size = bc->max;
+
        if (bc->params) {
                if (copy_to_user(data, &bc->param_id, sizeof(u32)))
                        return -EFAULT;
@@ -1510,6 +1538,7 @@ int skl_tplg_init(struct snd_soc_platform *platform, struct hdac_ext_bus *ebus)
                                        &skl_tplg_ops, fw, 0);
        if (ret < 0) {
                dev_err(bus->dev, "tplg component load failed%d\n", ret);
+               release_firmware(fw);
                return -EINVAL;
        }
 
index 443a15de94b5fbc3813f126da74e4bb3781f6e15..092705e73db497fb1d75c1c9b45a3c2aaaa34507 100644 (file)
@@ -614,8 +614,6 @@ static int skl_probe(struct pci_dev *pci,
                goto out_unregister;
 
        /*configure PM */
-       pm_runtime_set_autosuspend_delay(bus->dev, SKL_SUSPEND_DELAY);
-       pm_runtime_use_autosuspend(bus->dev);
        pm_runtime_put_noidle(bus->dev);
        pm_runtime_allow(bus->dev);
 
index 15c04e2eae34a010d90abd645c8cdfb3bf07df41..9769676753878b833513d0e863dab776bdaae635 100644 (file)
@@ -9,7 +9,7 @@ config SND_SOC_MEDIATEK
 
 config SND_SOC_MT8173_MAX98090
        tristate "ASoC Audio driver for MT8173 with MAX98090 codec"
-       depends on SND_SOC_MEDIATEK
+       depends on SND_SOC_MEDIATEK && I2C
        select SND_SOC_MAX98090
        help
          This adds ASoC driver for Mediatek MT8173 boards
@@ -19,7 +19,7 @@ config SND_SOC_MT8173_MAX98090
 
 config SND_SOC_MT8173_RT5650_RT5676
        tristate "ASoC Audio driver for MT8173 with RT5650 RT5676 codecs"
-       depends on SND_SOC_MEDIATEK
+       depends on SND_SOC_MEDIATEK && I2C
        select SND_SOC_RT5645
        select SND_SOC_RT5677
        help
index c866ade28ad0a6a0005a9ec97e09c05bfa63a783..a6c7b8d87cd2f15198334840b9013980f87313c0 100644 (file)
@@ -381,9 +381,19 @@ static int mxs_saif_startup(struct snd_pcm_substream *substream,
        __raw_writel(BM_SAIF_CTRL_CLKGATE,
                saif->base + SAIF_CTRL + MXS_CLR_ADDR);
 
+       clk_prepare(saif->clk);
+
        return 0;
 }
 
+static void mxs_saif_shutdown(struct snd_pcm_substream *substream,
+                             struct snd_soc_dai *cpu_dai)
+{
+       struct mxs_saif *saif = snd_soc_dai_get_drvdata(cpu_dai);
+
+       clk_unprepare(saif->clk);
+}
+
 /*
  * Should only be called when port is inactive.
  * although can be called multiple times by upper layers.
@@ -424,8 +434,6 @@ static int mxs_saif_hw_params(struct snd_pcm_substream *substream,
                return ret;
        }
 
-       /* prepare clk in hw_param, enable in trigger */
-       clk_prepare(saif->clk);
        if (saif != master_saif) {
                /*
                * Set an initial clock rate for the saif internal logic to work
@@ -611,6 +619,7 @@ static int mxs_saif_trigger(struct snd_pcm_substream *substream, int cmd,
 
 static const struct snd_soc_dai_ops mxs_saif_dai_ops = {
        .startup = mxs_saif_startup,
+       .shutdown = mxs_saif_shutdown,
        .trigger = mxs_saif_trigger,
        .prepare = mxs_saif_prepare,
        .hw_params = mxs_saif_hw_params,
index 79688aa1941a5c4168c2c52924c79722ce6172c7..4aeb8e1a7160b812a6ab119cfec657b9f55b16cb 100644 (file)
@@ -440,18 +440,18 @@ static irqreturn_t lpass_platform_lpaif_irq(int irq, void *data)
 }
 
 static int lpass_platform_alloc_buffer(struct snd_pcm_substream *substream,
-               struct snd_soc_pcm_runtime *soc_runtime)
+               struct snd_soc_pcm_runtime *rt)
 {
        struct snd_dma_buffer *buf = &substream->dma_buffer;
        size_t size = lpass_platform_pcm_hardware.buffer_bytes_max;
 
        buf->dev.type = SNDRV_DMA_TYPE_DEV;
-       buf->dev.dev = soc_runtime->dev;
+       buf->dev.dev = rt->platform->dev;
        buf->private_data = NULL;
-       buf->area = dma_alloc_coherent(soc_runtime->dev, size, &buf->addr,
+       buf->area = dma_alloc_coherent(rt->platform->dev, size, &buf->addr,
                        GFP_KERNEL);
        if (!buf->area) {
-               dev_err(soc_runtime->dev, "%s: Could not allocate DMA buffer\n",
+               dev_err(rt->platform->dev, "%s: Could not allocate DMA buffer\n",
                                __func__);
                return -ENOMEM;
        }
@@ -461,12 +461,12 @@ static int lpass_platform_alloc_buffer(struct snd_pcm_substream *substream,
 }
 
 static void lpass_platform_free_buffer(struct snd_pcm_substream *substream,
-               struct snd_soc_pcm_runtime *soc_runtime)
+               struct snd_soc_pcm_runtime *rt)
 {
        struct snd_dma_buffer *buf = &substream->dma_buffer;
 
        if (buf->area) {
-               dma_free_coherent(soc_runtime->dev, buf->bytes, buf->area,
+               dma_free_coherent(rt->dev, buf->bytes, buf->area,
                                buf->addr);
        }
        buf->area = NULL;
@@ -499,9 +499,6 @@ static int lpass_platform_pcm_new(struct snd_soc_pcm_runtime *soc_runtime)
 
        snd_soc_pcm_set_drvdata(soc_runtime, data);
 
-       soc_runtime->dev->coherent_dma_mask = DMA_BIT_MASK(32);
-       soc_runtime->dev->dma_mask = &soc_runtime->dev->coherent_dma_mask;
-
        ret = lpass_platform_alloc_buffer(substream, soc_runtime);
        if (ret)
                return ret;
index 5a2812fa8946071014d54f9c18ae8311be56fe4e..0d37079879002d472c538b69ccc3321c0d7772ad 100644 (file)
@@ -310,7 +310,7 @@ struct dapm_kcontrol_data {
 };
 
 static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget,
-       struct snd_kcontrol *kcontrol)
+       struct snd_kcontrol *kcontrol, const char *ctrl_name)
 {
        struct dapm_kcontrol_data *data;
        struct soc_mixer_control *mc;
@@ -333,7 +333,7 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget,
                if (mc->autodisable) {
                        struct snd_soc_dapm_widget template;
 
-                       name = kasprintf(GFP_KERNEL, "%s %s", kcontrol->id.name,
+                       name = kasprintf(GFP_KERNEL, "%s %s", ctrl_name,
                                         "Autodisable");
                        if (!name) {
                                ret = -ENOMEM;
@@ -371,7 +371,7 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget,
                if (e->autodisable) {
                        struct snd_soc_dapm_widget template;
 
-                       name = kasprintf(GFP_KERNEL, "%s %s", kcontrol->id.name,
+                       name = kasprintf(GFP_KERNEL, "%s %s", ctrl_name,
                                         "Autodisable");
                        if (!name) {
                                ret = -ENOMEM;
@@ -871,7 +871,7 @@ static int dapm_create_or_share_kcontrol(struct snd_soc_dapm_widget *w,
 
                kcontrol->private_free = dapm_kcontrol_free;
 
-               ret = dapm_kcontrol_data_alloc(w, kcontrol);
+               ret = dapm_kcontrol_data_alloc(w, kcontrol, name);
                if (ret) {
                        snd_ctl_free_one(kcontrol);
                        goto exit_free;
index e898b427be7ee34961451b2d8b177848415436ed..1af4f23697a781416f89794050ab41fb631f0171 100644 (file)
@@ -1810,7 +1810,8 @@ int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream)
                    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) &&
                    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) &&
                    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED) &&
-                   (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP))
+                   (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
+                   (be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND))
                        continue;
 
                dev_dbg(be->dev, "ASoC: hw_free BE %s\n",
index d75deba5617db05dc02407b6c669d61ee1695a5e..dfcd38647606445d7dab130178c7ca4f765ff33c 100644 (file)
@@ -22,6 +22,7 @@ config SND_SUN_AMD7930
 config SND_SUN_CS4231
        tristate "Sun CS4231"
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for CS4231 sound device on Sun.
 
index 39522367897caedb4be23f1f92579d31a4d1fe38..fac7e6eb9529c26d2bb3e30ceaba11b42f55d15b 100644 (file)
@@ -221,6 +221,8 @@ static int snd_at73c213_pcm_open(struct snd_pcm_substream *substream)
        runtime->hw = snd_at73c213_playback_hw;
        chip->substream = substream;
 
+       clk_enable(chip->ssc->clk);
+
        return 0;
 }
 
@@ -228,6 +230,7 @@ static int snd_at73c213_pcm_close(struct snd_pcm_substream *substream)
 {
        struct snd_at73c213 *chip = snd_pcm_substream_chip(substream);
        chip->substream = NULL;
+       clk_disable(chip->ssc->clk);
        return 0;
 }
 
@@ -897,6 +900,8 @@ static int snd_at73c213_dev_init(struct snd_card *card,
        chip->card = card;
        chip->irq = -1;
 
+       clk_enable(chip->ssc->clk);
+
        retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip);
        if (retval) {
                dev_dbg(&chip->spi->dev, "unable to request irq %d\n", irq);
@@ -935,6 +940,8 @@ out_irq:
        free_irq(chip->irq, chip);
        chip->irq = -1;
 out:
+       clk_disable(chip->ssc->clk);
+
        return retval;
 }
 
@@ -1012,7 +1019,9 @@ static int snd_at73c213_remove(struct spi_device *spi)
        int retval;
 
        /* Stop playback. */
+       clk_enable(chip->ssc->clk);
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+       clk_disable(chip->ssc->clk);
 
        /* Mute sound. */
        retval = snd_at73c213_write_reg(chip, DAC_LMPG, 0x3f);
@@ -1080,6 +1089,7 @@ static int snd_at73c213_suspend(struct device *dev)
        struct snd_at73c213 *chip = card->private_data;
 
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+       clk_disable(chip->ssc->clk);
        clk_disable(chip->board->dac_clk);
 
        return 0;
@@ -1091,6 +1101,7 @@ static int snd_at73c213_resume(struct device *dev)
        struct snd_at73c213 *chip = card->private_data;
 
        clk_enable(chip->board->dac_clk);
+       clk_enable(chip->ssc->clk);
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN));
 
        return 0;
index cc39f63299ef0a1fe58572ab23139f46725f9587..007cf583112154bc56616234d25e92f4e5e8212c 100644 (file)
@@ -2455,7 +2455,6 @@ int snd_usbmidi_create(struct snd_card *card,
        else
                err = snd_usbmidi_create_endpoints(umidi, endpoints);
        if (err < 0) {
-               snd_usbmidi_free(umidi);
                return err;
        }
 
index 23ea6d800c4c5283adc27d88fd26e74feaf066e0..4f6ce1cac8e20ef8b504073a6dfcc6236c0cf90c 100644 (file)
@@ -1121,6 +1121,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        switch (chip->usb_id) {
        case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
        case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+       case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
        case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
        case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
        case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
@@ -1205,8 +1206,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev)
         * "Playback Design" products need a 50ms delay after setting the
         * USB interface.
         */
-       if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+       switch (le16_to_cpu(dev->descriptor.idVendor)) {
+       case 0x23ba: /* Playback Design */
+       case 0x0644: /* TEAC Corp. */
                mdelay(50);
+               break;
+       }
 }
 
 void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1226,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
            (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                mdelay(20);
 
+       /*
+        * "TEAC Corp." products need a 20ms delay after each
+        * class compliant request
+        */
+       if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+           (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+               mdelay(20);
+
        /* Marantz/Denon devices with USB DAC functionality need a delay
         * after each class compliant request
         */
@@ -1269,7 +1282,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */
        case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
        case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
-       case USB_ID(0x22d8, 0x0416): /* OPPO HA-1*/
+       case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */
                if (fp->altsetting == 2)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
@@ -1278,6 +1291,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
        case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
        case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */
+       case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */
                if (fp->altsetting == 3)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
index ea69ce35e902a828b40839cfd7beb14bb9ff29c8..c3bd294a63d1f7e4a75b2a950bbe398f33703cd6 100644 (file)
@@ -3746,7 +3746,7 @@ static const struct flag flags[] = {
        { "NET_TX_SOFTIRQ", 2 },
        { "NET_RX_SOFTIRQ", 3 },
        { "BLOCK_SOFTIRQ", 4 },
-       { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+       { "IRQ_POLL_SOFTIRQ", 5 },
        { "TASKLET_SOFTIRQ", 6 },
        { "SCHED_SOFTIRQ", 7 },
        { "HRTIMER_SOFTIRQ", 8 },
index 0a22407e1d7d8abb092ff3d24b6829a27c234bbe..5d34815c7ccb8e02ac6d4db0d82d4712af73f343 100644 (file)
@@ -77,6 +77,9 @@ include config/utilities.mak
 # Define NO_AUXTRACE if you do not want AUX area tracing support
 #
 # Define NO_LIBBPF if you do not want BPF support
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -166,6 +169,15 @@ ifeq ($(config),1)
 include config/Makefile
 endif
 
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
+endif
+
 export prefix bindir sharedir sysconfdir DESTDIR
 
 # sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
        $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
 $(LIBBPF): fixdep FORCE
-       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
+       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
 $(LIBBPF)-clean:
        $(call QUIET_CLEAN, libbpf)
@@ -610,6 +622,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
        $(python-clean)
 
+#
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+       @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+       @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+       @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
 #
 # Trick: if ../../.git does not exist - we are building out of tree for example,
 # then force version regeneration:
index 3e89ba825f6bf3b3f8b15b9110564c63283998b4..7f064eb371589aa887e112cddb54145d6268141b 100644 (file)
@@ -17,7 +17,7 @@ static pid_t spawn(void)
        if (pid)
                return pid;
 
-       while(1);
+       while(1)
                sleep(5);
        return 0;
 }
index e5959c136a1907ee9905f044ae2e7786cb47d075..511141b102e8464fe3e102007f792220306ad3b9 100644 (file)
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
 
 EXTLIBS = -lpthread -lrt -lm -ldl
 
+ifeq ($(FEATURES_DUMP),)
 include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
 
 ifeq ($(feature-stackprotector-all), 1)
   CFLAGS += -fstack-protector-all
index df38decc48c32df72e9dfa86ef3f7e7b035322e7..f918015512af96d1173891dac2e8f460ff65ca85 100644 (file)
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
 # no target specified, trigger the whole suite
 all:
        @echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
-       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
+       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
 else
 # run only specific test over 'Makefile'
 %:
@@ -13,6 +13,26 @@ else
 endif
 else
 PERF := .
+PERF_O := $(PERF)
+O_OPT :=
+
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  PERF_O := $(FULL_O)
+  ifeq ($(SET_O),1)
+    O_OPT := 'O=$(FULL_O)'
+  endif
+  K_O_OPT := 'O=$(FULL_O)'
+endif
+
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(cores),0)
+    cores := 1
+  endif
+  PARALLEL_OPT="-j$(cores)"
+endif
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -156,11 +176,11 @@ test_make_doc    := $(test_ok)
 test_make_help_O := $(test_ok)
 test_make_doc_O  := $(test_ok)
 
-test_make_python_perf_so := test -f $(PERF)/python/perf.so
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
 
-test_make_perf_o           := test -f $(PERF)/perf.o
-test_make_util_map_o       := test -f $(PERF)/util/map.o
-test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o
+test_make_perf_o           := test -f $(PERF_O)/perf.o
+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
 
 define test_dest_files
   for file in $(1); do                         \
@@ -227,7 +247,7 @@ test_make_perf_o_O            := test -f $$TMP_O/perf.o
 test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
 test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
 
-test_default = test -x $(PERF)/perf
+test_default = test -x $(PERF_O)/perf
 test = $(if $(test_$1),$(test_$1),$(test_default))
 
 test_default_O = test -x $$TMP_O/perf
@@ -247,12 +267,12 @@ endif
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
+clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
 
 $(run):
        $(call clean)
        @TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1; \
        echo "  test: $(call test,$@)" >> $@ 2>&1; \
@@ -263,7 +283,7 @@ $(run_O):
        $(call clean)
        @TMP_O=$$(mktemp -d); \
        TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1 && \
        echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
@@ -276,17 +296,22 @@ tarpkg:
        ( eval $$cmd ) >> $@ 2>&1 && \
        rm -f $@
 
+KERNEL_O := ../..
+ifneq ($(O),)
+  KERNEL_O := $(O)
+endif
+
 make_kernelsrc:
-       @echo "- make -C <kernelsrc> tools/perf"
+       @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
        $(call clean); \
-       (make -C ../.. tools/perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
-       @echo "- make -C <kernelsrc>/tools perf"
+       @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
        $(call clean); \
-       (make -C ../../tools perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
        @echo OK
index d4d7cc27252f1184bf2d678b6460f138099efd28..718bd46d47fa7bc88674a192dd1a8e8ab1fb7ca9 100644 (file)
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
                                nd = browser->curr_hot;
                        break;
                case K_UNTAB:
-                       if (nd != NULL)
+                       if (nd != NULL) {
                                nd = rb_next(nd);
                                if (nd == NULL)
                                        nd = rb_first(&browser->entries);
-                       else
+                       else
                                nd = browser->curr_hot;
                        break;
                case K_F1:
index c226303e3da045743fe676c7c2ffc0ff13eac674..68a7612019dc3c3be315604693b68170183044d6 100644 (file)
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
                        symlen = unresolved_col_width + 4 + 2;
                        hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
                                           symlen);
+                       hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+                                          symlen);
                }
 
                if (h->mem_info->iaddr.sym) {
index 81a2eb77ba7ff56f7558328437fa32e8345051cd..05d815851be19bd40e00672c913a83ad2003175a 100644 (file)
@@ -2068,6 +2068,15 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
                err = -ENOMEM;
                goto err_free_queues;
        }
+
+       /*
+        * Since this thread will not be kept in any rbtree not in a
+        * list, initialize its list node so that at thread__put() the
+        * current thread lifetime assuption is kept and we don't segfault
+        * at list_del_init().
+        */
+       INIT_LIST_HEAD(&pt->unknown_thread->node);
+
        err = thread__set_comm(pt->unknown_thread, "unknown", 0);
        if (err)
                goto err_delete_thread;
index 4f7b0efdde2fa0c0c9f7cab32c6a20809ea47a4f..813d9b272c813b0dd749470f6209aa3a0a0607b4 100644 (file)
@@ -399,6 +399,9 @@ static void tracepoint_error(struct parse_events_error *e, int err,
 {
        char help[BUFSIZ];
 
+       if (!e)
+               return;
+
        /*
         * We get error directly from syscall errno ( > 0),
         * or from encoded pointer's error ( < 0).
index 2be10fb27172727fe15d5f3822ff194a5ad96d95..4ce5c5e18f48cd43777d2eb9c77aea35944440b8 100644 (file)
@@ -686,8 +686,9 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
                pf->fb_ops = NULL;
 #if _ELFUTILS_PREREQ(0, 142)
        } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
-                  pf->cfi != NULL) {
-               if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
+                  (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+               if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
+                    (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) ||
                    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
                        pr_warning("Failed to get call frame on 0x%jx\n",
                                   (uintmax_t)pf->addr);
@@ -1015,8 +1016,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
        return DWARF_CB_OK;
 }
 
-/* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *dbg,
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
                                  struct probe_finder *pf)
 {
        struct perf_probe_point *pp = &pf->pev->point;
@@ -1025,27 +1025,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
        Dwarf_Die *diep;
        int ret = 0;
 
-#if _ELFUTILS_PREREQ(0, 142)
-       Elf *elf;
-       GElf_Ehdr ehdr;
-       GElf_Shdr shdr;
-
-       /* Get the call frame information from this dwarf */
-       elf = dwarf_getelf(dbg->dbg);
-       if (elf == NULL)
-               return -EINVAL;
-
-       if (gelf_getehdr(elf, &ehdr) == NULL)
-               return -EINVAL;
-
-       if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
-           shdr.sh_type == SHT_PROGBITS) {
-               pf->cfi = dwarf_getcfi_elf(elf);
-       } else {
-               pf->cfi = dwarf_getcfi(dbg->dbg);
-       }
-#endif
-
        off = 0;
        pf->lcache = intlist__new(NULL);
        if (!pf->lcache)
@@ -1108,6 +1087,39 @@ found:
        return ret;
 }
 
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+                                 struct probe_finder *pf)
+{
+       int ret = 0;
+
+#if _ELFUTILS_PREREQ(0, 142)
+       Elf *elf;
+       GElf_Ehdr ehdr;
+       GElf_Shdr shdr;
+
+       if (pf->cfi_eh || pf->cfi_dbg)
+               return debuginfo__find_probe_location(dbg, pf);
+
+       /* Get the call frame information from this dwarf */
+       elf = dwarf_getelf(dbg->dbg);
+       if (elf == NULL)
+               return -EINVAL;
+
+       if (gelf_getehdr(elf, &ehdr) == NULL)
+               return -EINVAL;
+
+       if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+           shdr.sh_type == SHT_PROGBITS)
+               pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+       pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+#endif
+
+       ret = debuginfo__find_probe_location(dbg, pf);
+       return ret;
+}
+
 struct local_vars_finder {
        struct probe_finder *pf;
        struct perf_probe_arg *args;
index bed82716e1b44960a0ebc435d0ba1e94ed30730d..0aec7704e39540b51a449cdf395b6a16ab682fba 100644 (file)
@@ -76,7 +76,10 @@ struct probe_finder {
 
        /* For variable searching */
 #if _ELFUTILS_PREREQ(0, 142)
-       Dwarf_CFI               *cfi;           /* Call Frame Information */
+       /* Call Frame Information from .eh_frame */
+       Dwarf_CFI               *cfi_eh;
+       /* Call Frame Information from .debug_frame */
+       Dwarf_CFI               *cfi_dbg;
 #endif
        Dwarf_Op                *fb_ops;        /* Frame base attribute */
        struct perf_probe_arg   *pvar;          /* Current target variable */
index d5636ba94b20f722d3f521614e8fae08b620fad4..40b7a0d0905b8d7f01972c6e38e225f108b15133 100644 (file)
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
 
                machine = machines__find(machines, pid);
                if (!machine)
-                       machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
+                       machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
                return machine;
        }
 
index 2f901d15e06370d26d579869d351811f4d31f940..afb0c45eba34ba8db7a6cb6d258326f545f7aca1 100644 (file)
@@ -310,7 +310,16 @@ int perf_stat_process_counter(struct perf_stat_config *config,
        int i, ret;
 
        aggr->val = aggr->ena = aggr->run = 0;
-       init_stats(ps->res_stats);
+
+       /*
+        * We calculate counter's data every interval,
+        * and the display code shows ps->res_stats
+        * avg value. We need to zero the stats for
+        * interval mode, otherwise overall avg running
+        * averages will be shown for each interval.
+        */
+       if (config->interval)
+               init_stats(ps->res_stats);
 
        if (counter->per_pkg)
                zero_per_pkg(counter);
index 3b2de6eb33763b0ab1a23195529fda8d6367eb1d..ab02209a7cf3b162431bfc006287f5fbd8c68f43 100644 (file)
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         * Read the build id if possible. This is required for
         * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
         */
-       if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)
+       if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
                dso__set_build_id(dso, build_id);
 
        /*
index 8ff7d620d9427490f529cf7ec63471ae1bcb3b17..33b52eaa39db293b0de4d41c7312f88b39a2c4e2 100644 (file)
@@ -209,7 +209,7 @@ static const struct flag flags[] = {
        { "NET_TX_SOFTIRQ", 2 },
        { "NET_RX_SOFTIRQ", 3 },
        { "BLOCK_SOFTIRQ", 4 },
-       { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+       { "IRQ_POLL_SOFTIRQ", 5 },
        { "TASKLET_SOFTIRQ", 6 },
        { "SCHED_SOFTIRQ", 7 },
        { "HRTIMER_SOFTIRQ", 8 },
index 7ec7df9e7fc731ab7e118425b5bd01acaab349d5..0c1a7e65bb815a692f9b20cd97b337c181699308 100644 (file)
@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
 
-pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
 {
        struct nfit_test_resource *nfit_res = get_nfit_res(addr);
 
index 90bd2ea4103232b68e29e167873ff21a3f76b262..b3281dcd4a5d5502af9f66720dfcdccb1aca4d58 100644 (file)
@@ -217,13 +217,16 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
        return rc;
 }
 
+#define NFIT_TEST_ARS_RECORDS 4
+
 static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
                unsigned int buf_len)
 {
        if (buf_len < sizeof(*nd_cmd))
                return -EINVAL;
 
-       nd_cmd->max_ars_out = 256;
+       nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+               + NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record);
        nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
 
        return 0;
@@ -246,7 +249,8 @@ static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
        if (buf_len < sizeof(*nd_cmd))
                return -EINVAL;
 
-       nd_cmd->out_length = 256;
+       nd_cmd->out_length = sizeof(struct nd_cmd_ars_status);
+       /* TODO: emit error records */
        nd_cmd->num_records = 0;
        nd_cmd->address = 0;
        nd_cmd->length = -1ULL;
index 77edcdcc016bbe9e891dbee8ce27c5824caaae63..057278448515a455a17ff2b48af2bf0902c2e7fb 100755 (executable)
@@ -88,7 +88,11 @@ test_delete()
                exit 1
        fi
 
-       rm $file
+       rm $file 2>/dev/null
+       if [ $? -ne 0 ]; then
+               chattr -i $file
+               rm $file
+       fi
 
        if [ -e $file ]; then
                echo "$file couldn't be deleted" >&2
@@ -111,6 +115,7 @@ test_zero_size_delete()
                exit 1
        fi
 
+       chattr -i $file
        printf "$attrs" > $file
 
        if [ -e $file ]; then
@@ -141,7 +146,11 @@ test_valid_filenames()
                        echo "$file could not be created" >&2
                        ret=1
                else
-                       rm $file
+                       rm $file 2>/dev/null
+                       if [ $? -ne 0 ]; then
+                               chattr -i $file
+                               rm $file
+                       fi
                fi
        done
 
@@ -174,7 +183,11 @@ test_invalid_filenames()
 
                if [ -e $file ]; then
                        echo "Creating $file should have failed" >&2
-                       rm $file
+                       rm $file 2>/dev/null
+                       if [ $? -ne 0 ]; then
+                               chattr -i $file
+                               rm $file
+                       fi
                        ret=1
                fi
        done
index 8c0764407b3c349431a6b78ca595e450e2661066..4af74f7330365e668d9e1379f6894f12d974374d 100644 (file)
@@ -1,10 +1,68 @@
+#include <errno.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <linux/fs.h>
+
+static int set_immutable(const char *path, int immutable)
+{
+       unsigned int flags;
+       int fd;
+       int rc;
+       int error;
+
+       fd = open(path, O_RDONLY);
+       if (fd < 0)
+               return fd;
+
+       rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+       if (rc < 0) {
+               error = errno;
+               close(fd);
+               errno = error;
+               return rc;
+       }
+
+       if (immutable)
+               flags |= FS_IMMUTABLE_FL;
+       else
+               flags &= ~FS_IMMUTABLE_FL;
+
+       rc = ioctl(fd, FS_IOC_SETFLAGS, &flags);
+       error = errno;
+       close(fd);
+       errno = error;
+       return rc;
+}
+
+static int get_immutable(const char *path)
+{
+       unsigned int flags;
+       int fd;
+       int rc;
+       int error;
+
+       fd = open(path, O_RDONLY);
+       if (fd < 0)
+               return fd;
+
+       rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+       if (rc < 0) {
+               error = errno;
+               close(fd);
+               errno = error;
+               return rc;
+       }
+       close(fd);
+       if (flags & FS_IMMUTABLE_FL)
+               return 1;
+       return 0;
+}
 
 int main(int argc, char **argv)
 {
@@ -27,7 +85,7 @@ int main(int argc, char **argv)
        buf[4] = 0;
 
        /* create a test variable */
-       fd = open(path, O_WRONLY | O_CREAT);
+       fd = open(path, O_WRONLY | O_CREAT, 0600);
        if (fd < 0) {
                perror("open(O_WRONLY)");
                return EXIT_FAILURE;
@@ -41,6 +99,18 @@ int main(int argc, char **argv)
 
        close(fd);
 
+       rc = get_immutable(path);
+       if (rc < 0) {
+               perror("ioctl(FS_IOC_GETFLAGS)");
+               return EXIT_FAILURE;
+       } else if (rc) {
+               rc = set_immutable(path, 0);
+               if (rc < 0) {
+                       perror("ioctl(FS_IOC_SETFLAGS)");
+                       return EXIT_FAILURE;
+               }
+       }
+
        fd = open(path, O_RDONLY);
        if (fd < 0) {
                perror("open");
index 773e276ff90b982a2bf24a44388f76b6ccdf4e9e..1e1abe0ad354267507dc0514351d651e19fd1fb6 100644 (file)
@@ -39,28 +39,23 @@ instance_slam() {
 }
 
 instance_slam &
-x=`jobs -l`
-p1=`echo $x | cut -d' ' -f2`
+p1=$!
 echo $p1
 
 instance_slam &
-x=`jobs -l | tail -1`
-p2=`echo $x | cut -d' ' -f2`
+p2=$!
 echo $p2
 
 instance_slam &
-x=`jobs -l | tail -1`
-p3=`echo $x | cut -d' ' -f2`
+p3=$!
 echo $p3
 
 instance_slam &
-x=`jobs -l | tail -1`
-p4=`echo $x | cut -d' ' -f2`
+p4=$!
 echo $p4
 
 instance_slam &
-x=`jobs -l | tail -1`
-p5=`echo $x | cut -d' ' -f2`
+p5=$!
 echo $p5
 
 ls -lR >/dev/null
index e86d937cc22c04cb36aecfe4fb4af0c5deed3825..60fe3c569bd90f9cbfdc9664720c598e9430e8a3 100644 (file)
@@ -45,7 +45,17 @@ static inline int ksft_exit_fail(void)
 }
 #endif
 
-#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+       return syscall(__NR_clock_adjtime, id, tx);
+}
+
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
@@ -193,10 +203,137 @@ out:
 }
 
 
+int set_offset(long long offset, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano) {
+               tmx.modes |= ADJ_NANO;
+
+               tmx.time.tv_sec = offset / NSEC_PER_SEC;
+               tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += NSEC_PER_SEC;
+               }
+       } else {
+               tmx.time.tv_sec = offset / USEC_PER_SEC;
+               tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += USEC_PER_SEC;
+               }
+       }
+
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret < 0) {
+               printf("(sec: %ld  usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano)
+               tmx.modes |= ADJ_NANO;
+
+       tmx.time.tv_sec = sec;
+       tmx.time.tv_usec = usec;
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret >= 0) {
+               printf("Invalid (sec: %ld  usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int validate_set_offset(void)
+{
+       printf("Testing ADJ_SETOFFSET... ");
+
+       /* Test valid values */
+       if (set_offset(NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC + 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC + 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+               return -1;
+
+       /* Test invalid values */
+       if (set_bad_offset(0, -1, 1))
+               return -1;
+       if (set_bad_offset(0, -1, 0))
+               return -1;
+       if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, -USEC_PER_SEC, 0))
+               return -1;
+
+       printf("[OK]\n");
+       return 0;
+}
+
 int main(int argc, char **argv)
 {
        if (validate_freq())
                return ksft_exit_fail();
 
+       if (validate_set_offset())
+               return ksft_exit_fail();
+
        return ksft_exit_pass();
 }
index 26b7926bda8849a8b931fff6750a3ed8217dae89..ba34f9e96efd6bd760870150dd9cb6ba0eacbd69 100644 (file)
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()       mb()
-# define dma_rmb()     barrier()
-# define dma_wmb()     barrier()
-# define smp_rmb()     barrier()
-# define smp_wmb()     barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+       typeof(var) virt_store_mb_value = (value); \
+       __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+                         __ATOMIC_SEQ_CST); \
+       barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb() abort()
-# define wmb() abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644 (file)
index 0000000..845960e
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+       (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
index 4db7d5691ba71b33d2bda8c85d634fe3781ae6d6..0338499482159883bb3e30452d5afc0f77af577e 100644 (file)
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <stdarg.h>
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644 (file)
index 0000000..feaa64a
--- /dev/null
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+       -rm main.o
+       -rm ring.o ring
+       -rm virtio_ring_0_9.o virtio_ring_0_9
+       -rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644 (file)
index 0000000..34e94c4
--- /dev/null
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644 (file)
index 0000000..3a5ff43
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = write(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = read(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void kick(void)
+{
+       notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+       wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+       notify(callfd);
+}
+
+void wait_for_call(void)
+{
+       wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+       cpu_set_t cpuset;
+       int ret;
+       pthread_t self;
+       long int cpu;
+       char *endptr;
+
+       if (!arg)
+               return;
+
+       cpu = strtol(arg, &endptr, 0);
+       assert(!*endptr);
+
+       assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+       self = pthread_self();
+       CPU_ZERO(&cpuset);
+       CPU_SET(cpu, &cpuset);
+
+       ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+       assert(!ret);
+}
+
+static void run_guest(void)
+{
+       int completed_before;
+       int completed = 0;
+       int started = 0;
+       int bufs = runcycles;
+       int spurious = 0;
+       int r;
+       unsigned len;
+       void *buf;
+       int tokick = batch;
+
+       for (;;) {
+               if (do_sleep)
+                       disable_call();
+               completed_before = completed;
+               do {
+                       if (started < bufs &&
+                           started - completed < max_outstanding) {
+                               r = add_inbuf(0, NULL, "Hello, world!");
+                               if (__builtin_expect(r == 0, true)) {
+                                       ++started;
+                                       if (!--tokick) {
+                                               tokick = batch;
+                                               if (do_sleep)
+                                                       kick_available();
+                                       }
+
+                               }
+                       } else
+                               r = -1;
+
+                       /* Flush out completed bufs if any */
+                       if (get_buf(&len, &buf)) {
+                               ++completed;
+                               if (__builtin_expect(completed == bufs, false))
+                                       return;
+                               r = 0;
+                       }
+               } while (r == 0);
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               assert(started <= bufs);
+               if (do_sleep) {
+                       if (enable_call())
+                               wait_for_call();
+               } else {
+                       poll_used();
+               }
+       }
+}
+
+static void run_host(void)
+{
+       int completed_before;
+       int completed = 0;
+       int spurious = 0;
+       int bufs = runcycles;
+       unsigned len;
+       void *buf;
+
+       for (;;) {
+               if (do_sleep) {
+                       if (enable_kick())
+                               wait_for_kick();
+               } else {
+                       poll_avail();
+               }
+               if (do_sleep)
+                       disable_kick();
+               completed_before = completed;
+               while (__builtin_expect(use_buf(&len, &buf), true)) {
+                       if (do_sleep)
+                               call_used();
+                       ++completed;
+                       if (__builtin_expect(completed == bufs, false))
+                               return;
+               }
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               if (completed == bufs)
+                       break;
+       }
+}
+
+void *start_guest(void *arg)
+{
+       set_affinity(arg);
+       run_guest();
+       pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+       set_affinity(arg);
+       run_host();
+       pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+       {
+               .name = "help",
+               .has_arg = no_argument,
+               .val = 'h',
+       },
+       {
+               .name = "host-affinity",
+               .has_arg = required_argument,
+               .val = 'H',
+       },
+       {
+               .name = "guest-affinity",
+               .has_arg = required_argument,
+               .val = 'G',
+       },
+       {
+               .name = "ring-size",
+               .has_arg = required_argument,
+               .val = 'R',
+       },
+       {
+               .name = "run-cycles",
+               .has_arg = required_argument,
+               .val = 'C',
+       },
+       {
+               .name = "outstanding",
+               .has_arg = required_argument,
+               .val = 'o',
+       },
+       {
+               .name = "batch",
+               .has_arg = required_argument,
+               .val = 'b',
+       },
+       {
+               .name = "sleep",
+               .has_arg = no_argument,
+               .val = 's',
+       },
+       {
+               .name = "relax",
+               .has_arg = no_argument,
+               .val = 'x',
+       },
+       {
+               .name = "exit",
+               .has_arg = no_argument,
+               .val = 'e',
+       },
+       {
+       }
+};
+
+static void help(void)
+{
+       fprintf(stderr, "Usage: <test> [--help]"
+               " [--host-affinity H]"
+               " [--guest-affinity G]"
+               " [--ring-size R (default: %d)]"
+               " [--run-cycles C (default: %d)]"
+               " [--batch b]"
+               " [--outstanding o]"
+               " [--sleep]"
+               " [--relax]"
+               " [--exit]"
+               "\n",
+               ring_size,
+               runcycles);
+}
+
+int main(int argc, char **argv)
+{
+       int ret;
+       pthread_t host, guest;
+       void *tret;
+       char *host_arg = NULL;
+       char *guest_arg = NULL;
+       char *endptr;
+       long int c;
+
+       kickfd = eventfd(0, 0);
+       assert(kickfd >= 0);
+       callfd = eventfd(0, 0);
+       assert(callfd >= 0);
+
+       for (;;) {
+               int o = getopt_long(argc, argv, optstring, longopts, NULL);
+               switch (o) {
+               case -1:
+                       goto done;
+               case '?':
+                       help();
+                       exit(2);
+               case 'H':
+                       host_arg = optarg;
+                       break;
+               case 'G':
+                       guest_arg = optarg;
+                       break;
+               case 'R':
+                       ring_size = strtol(optarg, &endptr, 0);
+                       assert(ring_size && !(ring_size & (ring_size - 1)));
+                       assert(!*endptr);
+                       break;
+               case 'C':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       runcycles = c;
+                       break;
+               case 'o':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       max_outstanding = c;
+                       break;
+               case 'b':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       batch = c;
+                       break;
+               case 's':
+                       do_sleep = true;
+                       break;
+               case 'x':
+                       do_relax = true;
+                       break;
+               case 'e':
+                       do_exit = true;
+                       break;
+               default:
+                       help();
+                       exit(4);
+                       break;
+               }
+       }
+
+       /* does nothing here, used to make sure all smp APIs compile */
+       smp_acquire();
+       smp_release();
+       smp_mb();
+done:
+
+       if (batch > max_outstanding)
+               batch = max_outstanding;
+
+       if (optind < argc) {
+               help();
+               exit(4);
+       }
+       alloc_ring();
+
+       ret = pthread_create(&host, NULL, start_host, host_arg);
+       assert(!ret);
+       ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+       assert(!ret);
+
+       ret = pthread_join(guest, &tret);
+       assert(!ret);
+       ret = pthread_join(host, &tret);
+       assert(!ret);
+       return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644 (file)
index 0000000..16917ac
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+       unsigned long long t;
+
+       t = __rdtsc();
+       while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+       _Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+       if (do_relax)
+               cpu_relax();
+       else
+               /* prevent compiler from removing busy loops */
+               barrier();
+} 
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644 (file)
index 0000000..c25c8d2
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+                             unsigned short next,
+                             unsigned short prev)
+{
+       return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+       unsigned short flags;
+       unsigned short index;
+       unsigned len;
+       unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+       unsigned short kick_index;
+       unsigned char reserved0[HOST_GUEST_PADDING - 2];
+       unsigned short call_index;
+       unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+       void *buf; /* descriptor is writeable, we can't get buf from there */
+       void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+       unsigned avail_idx;
+       unsigned last_used_idx;
+       unsigned num_free;
+       unsigned kicked_avail_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned used_idx;
+       unsigned called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+
+       ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       event = malloc(sizeof *event);
+       if (!event) {
+               perror("Unable to allocate event buffer.\n");
+               exit(3);
+       }
+       memset(event, 0, sizeof *event);
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       for (i = 0; i < ring_size; ++i) {
+               struct desc desc = {
+                       .index = i,
+               };
+               ring[i] = desc;
+       }
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, index;
+
+       if (!guest.num_free)
+               return -1;
+
+       guest.num_free--;
+       head = (ring_size - 1) & (guest.avail_idx++);
+
+       /* Start with a write. On MESI architectures this helps
+        * avoid a shared state with consumer that is polling this descriptor.
+        */
+       ring[head].addr = (unsigned long)(void*)buf;
+       ring[head].len = len;
+       /* read below might bypass write above. That is OK because it's just an
+        * optimization. If this happens, we will get the cache line in a
+        * shared state which is unfortunate, but probably not worth it to
+        * add an explicit full barrier to avoid this.
+        */
+       barrier();
+       index = ring[head].index;
+       data[index].buf = buf;
+       data[index].data = datap;
+       /* Barrier A (for pairing) */
+       smp_release();
+       ring[head].flags = DESC_HW;
+
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+       unsigned index;
+       void *datap;
+
+       if (ring[head].flags & DESC_HW)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       *lenp = ring[head].len;
+       index = ring[head].index & (ring_size - 1);
+       datap = data[index].data;
+       *bufp = data[index].buf;
+       data[index].buf = NULL;
+       data[index].data = NULL;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       while (ring[head].flags & DESC_HW)
+               busy_wait();
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       event->call_index = guest.last_used_idx;
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!need_event(event->kick_index,
+                       guest.avail_idx,
+                       guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       event->kick_index = host.used_idx;
+       /* Barrier C (for pairing) */
+       smp_mb();
+       return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       while (!(ring[head].flags & DESC_HW))
+               busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       if (!(ring[head].flags & DESC_HW))
+               return false;
+
+       /* make sure length read below is not speculated */
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       /* simple in-order completion: we don't need
+        * to touch index at all. This also means we
+        * can just modify the descriptor in-place.
+        */
+       ring[head].len--;
+       /* Make sure len is valid before flags.
+        * Note: alternative is to write len and flags in one access -
+        * possible on 64 bit architectures but wmb is free on Intel anyway
+        * so I have no way to test whether it's a gain.
+        */
+       /* Barrier B (for pairing) */
+       smp_release();
+       ring[head].flags = 0;
+       host.used_idx++;
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!need_event(event->call_index,
+                       host.used_idx,
+                       host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755 (executable)
index 0000000..52b0f71
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+       #Don't run guest and host on same CPU
+       #It actually works ok if using signalling
+       if
+               (echo "$@" | grep -e "--sleep" > /dev/null) || \
+                       test $HOST_AFFINITY '!=' $cpu
+       then
+               echo "GUEST AFFINITY $cpu"
+               "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+       fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644 (file)
index 0000000..47c9a1a
--- /dev/null
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+       void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+       unsigned short avail_idx;
+       unsigned short last_used_idx;
+       unsigned short num_free;
+       unsigned short kicked_avail_idx;
+       unsigned short free_head;
+       unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned short used_idx;
+       unsigned short called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+       void *p;
+
+       ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       memset(p, 0, vring_size(ring_size, 0x1000));
+       vring_init(&ring, ring_size, p, 0x1000);
+
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       /* Put everything in free lists. */
+       guest.free_head = 0;
+       for (i = 0; i < ring_size - 1; i++)
+               ring.desc[i].next = i + 1;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, avail;
+       struct vring_desc *desc;
+
+       if (!guest.num_free)
+               return -1;
+
+       head = guest.free_head;
+       guest.num_free--;
+
+       desc = ring.desc;
+       desc[head].flags = VRING_DESC_F_NEXT;
+       desc[head].addr = (unsigned long)(void *)buf;
+       desc[head].len = len;
+       /* We do it like this to simulate the way
+        * we'd have to flip it if we had multiple
+        * descriptors.
+        */
+       desc[head].flags &= ~VRING_DESC_F_NEXT;
+       guest.free_head = desc[head].next;
+
+       data[head].data = datap;
+
+#ifdef RING_POLL
+       /* Barrier A (for pairing) */
+       smp_release();
+       avail = guest.avail_idx++;
+       ring.avail->ring[avail & (ring_size - 1)] =
+               (head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+       avail = (ring_size - 1) & (guest.avail_idx++);
+       ring.avail->ring[avail] = head;
+       /* Barrier A (for pairing) */
+       smp_release();
+#endif
+       ring.avail->idx = guest.avail_idx;
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head;
+       unsigned index;
+       void *datap;
+
+#ifdef RING_POLL
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+       if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       index &= ring_size - 1;
+#else
+       if (ring.used->idx == guest.last_used_idx)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+#endif
+       *lenp = ring.used->ring[head].len;
+       datap = data[index].data;
+       *bufp = (void*)(unsigned long)ring.desc[index].addr;
+       data[index].data = NULL;
+       ring.desc[index].next = guest.free_head;
+       guest.free_head = index;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       for (;;) {
+               unsigned index = ring.used->ring[head].id;
+
+               if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       unsigned head = guest.last_used_idx;
+
+       while (ring.used->idx == head)
+               busy_wait();
+#endif
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned short last_used_idx;
+
+       vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned short head = last_used_idx & (ring_size - 1);
+               unsigned index = ring.used->ring[head].id;
+
+               return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_avail_event(&ring),
+                             guest.avail_idx,
+                             guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = host.used_idx;
+
+       vring_avail_event(&ring) = head;
+       /* Barrier C (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+               return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+       unsigned head = host.used_idx;
+#ifdef RING_POLL
+       for (;;) {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+               if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       while (ring.avail->idx == head)
+               busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned used_idx = host.used_idx;
+       struct vring_desc *desc;
+       unsigned head;
+
+#ifdef RING_POLL
+       head = ring.avail->ring[used_idx & (ring_size - 1)];
+       if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+               return false;
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       desc = &ring.desc[head & (ring_size - 1)];
+#else
+       if (used_idx == ring.avail->idx)
+               return false;
+
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       head = ring.avail->ring[used_idx];
+       desc = &ring.desc[head];
+#endif
+
+       *lenp = desc->len;
+       *bufp = (void *)(unsigned long)desc->addr;
+
+       /* now update used ring */
+       ring.used->ring[used_idx].id = head;
+       ring.used->ring[used_idx].len = desc->len - 1;
+       /* Barrier B (for pairing) */
+       smp_release();
+       host.used_idx++;
+       ring.used->idx = host.used_idx;
+       
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_used_event(&ring),
+                             host.used_idx,
+                             host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644 (file)
index 0000000..84fc2c5
--- /dev/null
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"
index 69bca185c471d1dec971f02403ae9fd60851f5bd..ea6064696fe43867d1d1baee35059b276d84817b 100644 (file)
@@ -143,7 +143,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
  * Check if there was a change in the timer state (should we raise or lower
  * the line level to the GIC).
  */
-static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
+static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
@@ -154,10 +154,12 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
         * until we call this function from kvm_timer_flush_hwstate.
         */
        if (!vgic_initialized(vcpu->kvm))
-           return;
+               return -ENODEV;
 
        if (kvm_timer_should_fire(vcpu) != timer->irq.level)
                kvm_timer_update_irq(vcpu, !timer->irq.level);
+
+       return 0;
 }
 
 /*
@@ -218,7 +220,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
        bool phys_active;
        int ret;
 
-       kvm_timer_update_state(vcpu);
+       if (kvm_timer_update_state(vcpu))
+               return;
 
        /*
        * If we enter the guest with the virtual input level to the VGIC
index 043032c6a5a4dc6944c5b2bc6277ad6398decd23..00429b392c61cbbd32203877405631f635fc61e3 100644 (file)
@@ -1875,8 +1875,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
-       int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+       int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+       int sz = nr_longs * sizeof(unsigned long);
        vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
        vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
        vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
index 35315992245600a418874fd371e641ef336a0a22..db2dd3335c6a6de0c2f0dd9fe89e462d16f5ba81 100644 (file)
@@ -172,7 +172,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
         * do alloc nowait since if we are going to sleep anyway we
         * may as well sleep faulting in page
         */
-       work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
+       work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
        if (!work)
                return 0;